cirneco 0.9.17 → 0.9.18

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +0 -3
  3. data/Gemfile.lock +8 -8
  4. data/cirneco.gemspec +1 -1
  5. data/doi.xml +47 -0
  6. data/lib/cirneco/version.rb +1 -1
  7. data/resources/kernel-4.0/include/{datacite-dateType-v4.xsd → datacite-dateType-v4.1.xsd} +3 -1
  8. data/resources/kernel-4.0/include/datacite-nameType-v4.1.xsd +10 -0
  9. data/resources/kernel-4.0/include/{datacite-relationType-v4.xsd → datacite-relationType-v4.1.xsd} +8 -1
  10. data/resources/kernel-4.0/include/{datacite-resourceType-v4.xsd → datacite-resourceType-v4.1.xsd} +3 -1
  11. data/resources/kernel-4.0/metadata.xsd +27 -15
  12. data/resources/kernel-4.0/samples/datacite-example-Box_dateCollected_DataCollector-v4.1.xml +52 -0
  13. data/resources/kernel-4.0/samples/datacite-example-GeoLocation-v4.1.xml +66 -0
  14. data/resources/kernel-4.0/samples/datacite-example-HasMetadata-v4.1.xml +70 -0
  15. data/resources/kernel-4.0/samples/datacite-example-ResearchGroup_Methods-v4.1.xml +46 -0
  16. data/resources/kernel-4.0/samples/datacite-example-ResourceTypeGeneral_Collection-v4.1.xml +52 -0
  17. data/resources/kernel-4.0/samples/datacite-example-complicated-v4.1.xml +58 -0
  18. data/resources/kernel-4.0/samples/datacite-example-datapaper-v4.1.xml +32 -0
  19. data/resources/kernel-4.0/samples/datacite-example-dataset-v4.1.xml +45 -0
  20. data/resources/kernel-4.0/samples/datacite-example-full-v4.1.xml +103 -0
  21. data/resources/kernel-4.0/samples/datacite-example-fundingReference-v.4.1.xml +61 -0
  22. data/resources/kernel-4.0/samples/datacite-example-polygon-advanced-v4.1.xml +141 -0
  23. data/resources/kernel-4.0/samples/datacite-example-polygon-v4.1.xml +161 -0
  24. data/resources/kernel-4.0/samples/datacite-example-relationTypeIsIdenticalTo-v4.1.xml +65 -0
  25. data/resources/kernel-4.0/samples/datacite-example-software-v4.1.xml +67 -0
  26. data/resources/kernel-4.0/samples/datacite-example-video-v4.1.xml +31 -0
  27. data/resources/kernel-4.0/samples/datacite-example-workflow-v4.1.xml +55 -0
  28. data/spec/api_spec.rb +4 -4
  29. data/spec/doi_spec.rb +4 -4
  30. data/spec/fixtures/vcr_cassettes/Cirneco_Work/media/includes_media.yml +11 -13
  31. data/spec/fixtures/vcr_cassettes/Cirneco_Work/schema/BlogPosting.yml +12 -14
  32. data/spec/utils_spec.rb +8 -8
  33. data/spec/work_spec.rb +5 -5
  34. metadata +25 -19
@@ -0,0 +1,55 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://datacite.org/schema/kernel-4" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.1/metadata.xsd">
3
+ <identifier identifierType="DOI">10.5072/100044</identifier>
4
+ <creators>
5
+ <creator>
6
+ <creatorName nameType="Personal">Luo, R</creatorName>
7
+ </creator>
8
+ <creator>
9
+ <creatorName nameType="Personal">Liu, B</creatorName>
10
+ </creator>
11
+ <creator>
12
+ <creatorName nameType="Personal">Xie, Y</creatorName>
13
+ </creator>
14
+ <creator>
15
+ <creatorName nameType="Personal">Li, Z</creatorName>
16
+ </creator>
17
+ </creators>
18
+ <titles>
19
+ <title xml:lang="en">
20
+ Software and supporting material for "SOAPdenovo2: An empirically improved memory-efficient short read de novo assembly"
21
+ </title>
22
+ </titles>
23
+ <publisher>GigaScience Database</publisher>
24
+ <publicationYear>2012</publicationYear>
25
+ <subjects>
26
+ <subject xml:lang="en">DNA (Genetics)</subject>
27
+ <subject xml:lang="en">Computer Program</subject>
28
+ </subjects>
29
+ <dates>
30
+ <date dateType="Available">2012-12-13</date>
31
+ </dates>
32
+ <language>en</language>
33
+ <resourceType resourceTypeGeneral="Workflow">Software</resourceType>
34
+ <relatedIdentifiers>
35
+ <relatedIdentifier relatedIdentifierType="DOI" relationType="IsReferencedBy">10.5072/2047-217X-1-1</relatedIdentifier>
36
+ <relatedIdentifier relatedIdentifierType="DOI" relationType="Compiles">10.5072/100038</relatedIdentifier>
37
+ </relatedIdentifiers>
38
+ <sizes>
39
+ <size>31 MB</size>
40
+ </sizes>
41
+ <rightsList>
42
+ <rights rightsURI="http://creativecommons.org/publicdomain/zero/1.0/">CC0 1.0 Universal</rights>
43
+ </rightsList>
44
+ <descriptions>
45
+ <description xml:lang="en" descriptionType="Abstract">
46
+ SOAPdenovo2 is the latest de novo genome assembly package from BGI's SOAP (short oligonucleotide analysis package) suite of tools (homepage here: http://soap.genomics.org.cn/). Compared to SOAPdenovo1, this new version has the advantage of a new
47
+ algorithm design that reduces memory consumption in graph construction, resolves more repeat regions in contig assembly, increases coverage and length in scaffold construction, improves gap closure, and is optimized for large genomes. Using new
48
+ sequencing data from the YH (Homo sapiens) diploid genome - the first sequenced Han Chinese individual, an updated assembly was produced (see dataset here: doi:10.5524/100038), with the N50 scores for the contig and scaffold being 3-fold and 50-fold
49
+ longer, respectively, than the first published version. The genome coverage increased from 81.16% to 93.91%, and memory consumption was ~2/3 times lower during the point of largest memory consumption. Benchmarking with Assemblathon1 and GAGE datasets
50
+ shows that SOAPdenovo2 greatly surpasses its predecessor SOAPdenovo1 and is competitive to other assemblers on both assembly length and accuracy. In order to facilitate readers to repeat and recreate these findings, configured packages with the
51
+ compressed pipelines containing all of the necessary shell scripts and tools are available from the BGI FTP server (ftp://public.genomics.org.cn/BGI/SOAPdenovo2). The latest version of SOAPdenovo2 is available from Sourceforge:
52
+ http://soapdenovo2.sourceforge.net/ These pipelines will also soon be made available from our data platform as Galaxy workflows: http://galaxy.cbiit.cuhk.edu.hk/
53
+ </description>
54
+ </descriptions>
55
+ </resource>
@@ -2,7 +2,7 @@ require 'spec_helper'
2
2
 
3
3
  describe Cirneco::Work, vcr: true, :order => :defined do
4
4
  let(:samples_path) { "resources/kernel-4.0/samples/" }
5
- let(:input) { samples_path + "datacite-example-complicated-v4.0.xml" }
5
+ let(:input) { samples_path + "datacite-example-complicated-v4.1.xml" }
6
6
  let(:media) { [{ mime_type: "application/pdf", url:"http://www.datacite.org/cirneco-test.pdf" }]}
7
7
  let(:username) { ENV['MDS_USERNAME'] }
8
8
  let(:password) { ENV['MDS_PASSWORD'] }
@@ -19,9 +19,9 @@ describe Cirneco::Work, vcr: true, :order => :defined do
19
19
  context "post" do
20
20
  it 'should post metadata' do
21
21
  response = subject.post_metadata(subject.datacite, options)
22
- expect(response.body["data"]).to eq("OK (10.5072/testpub)")
22
+ expect(response.body["data"]).to eq("OK (10.5072/079j-xh42)")
23
23
  expect(response.status).to eq(201)
24
- expect(response.headers["Location"]).to eq("http://mds.test.datacite.org/metadata/10.5072/testpub")
24
+ expect(response.headers["Location"]).to eq("http://mds.test.datacite.org/metadata/10.5072/079j-xh42")
25
25
  end
26
26
  end
27
27
 
@@ -55,7 +55,7 @@ describe Cirneco::Work, vcr: true, :order => :defined do
55
55
  it 'should get all dois' do
56
56
  response = subject.get_dois(options)
57
57
  dois = response.body["data"]
58
- expect(dois.length).to eq(6)
58
+ expect(dois.length).to eq(7)
59
59
  expect(dois.first).to eq("10.5072/0007-NW90")
60
60
  end
61
61
 
@@ -8,7 +8,7 @@ describe Cirneco::Doi do
8
8
 
9
9
  let(:number) { 123 }
10
10
  let(:prefix) { ENV['PREFIX'] }
11
- let(:doi) { "10.5072/55e5-t5c0" }
11
+ let(:doi) { "10.5072/079j-xh42" }
12
12
  let(:url) { "http://www.datacite.org" }
13
13
  let(:username) { ENV['MDS_USERNAME'] }
14
14
  let(:password) { ENV['MDS_PASSWORD'] }
@@ -25,7 +25,7 @@ describe Cirneco::Doi do
25
25
  context "get" do
26
26
  it 'should get all dois' do
27
27
  subject.options = api_options.merge(limit: 3)
28
- expect { subject.get "all" }.to output("10.5072/0007-NW90\n10.5072/55E5-T5C0\n10.5072/BC11-CQW6\n").to_stdout
28
+ expect { subject.get "all" }.to output("10.5072/0007-NW90\n10.5072/079J-XH42\n10.5072/55E5-T5C0\n").to_stdout
29
29
  end
30
30
 
31
31
  it 'should get doi' do
@@ -50,7 +50,7 @@ describe Cirneco::Doi do
50
50
  context "base32" do
51
51
  it 'generates a doi' do
52
52
  subject.options = { number: number, prefix: prefix }
53
- expect { subject.generate }.to output("10.5072/0000-03vc\n").to_stdout
53
+ expect { subject.generate }.to output("10.5072/0000-3v20\n").to_stdout
54
54
  end
55
55
 
56
56
  it 'requires a prefix' do
@@ -59,7 +59,7 @@ describe Cirneco::Doi do
59
59
  end
60
60
 
61
61
  it 'decodes a doi' do
62
- expect { subject.decode doi }.to output("DOI #{doi} was encoded with 5551351980\n").to_stdout
62
+ expect { subject.decode doi }.to output("DOI #{doi} was encoded with 7654321\n").to_stdout
63
63
  end
64
64
 
65
65
  it 'checks a doi' do
@@ -29,17 +29,15 @@ http_interactions:
29
29
  Location:
30
30
  - "/eating-your-own-dog-food/"
31
31
  Date:
32
- - Fri, 19 Jan 2018 18:47:19 GMT
32
+ - Thu, 01 Feb 2018 10:08:40 GMT
33
33
  Server:
34
34
  - AmazonS3
35
- Age:
36
- - '633'
37
35
  X-Cache:
38
36
  - Hit from cloudfront
39
37
  Via:
40
- - 1.1 b454a0b154ae18408006bc2a9abd88ec.cloudfront.net (CloudFront)
38
+ - 1.1 7e3ec4bce6d89d06369eae9bcbd1cb7e.cloudfront.net (CloudFront)
41
39
  X-Amz-Cf-Id:
42
- - 5vYhT_gx4pRjsk74eKRpjRBaVtvBU7VXYwqjxQ7YymT2k-2-W3gesA==
40
+ - IVyaSArb7jJHKxhWfU6vHn65IuK7kBsOiv6X8m9-K4t8Ihz8B1ffKA==
43
41
  body:
44
42
  encoding: UTF-8
45
43
  string: |
@@ -50,14 +48,14 @@ http_interactions:
50
48
  <ul>
51
49
  <li>Code: Found</li>
52
50
  <li>Message: Resource Found</li>
53
- <li>RequestId: 9B7ED3BBCD7E1247</li>
54
- <li>HostId: qEFnifqb9w2rEeTP6bdbM4fUF5zcKK5f98hgCH10zKGU9mezmBfq//xgQjIv7q6mwWFPLMgT0a8=</li>
51
+ <li>RequestId: 954101BD524D1D33</li>
52
+ <li>HostId: UFzFKnk0WRHCthFD7XYEH891fwFwQwMWBEQaM4G+42waSLPsXAr9zRkYa7KgHfiNxHkQ+wqPvzw=</li>
55
53
  </ul>
56
54
  <hr/>
57
55
  </body>
58
56
  </html>
59
57
  http_version:
60
- recorded_at: Fri, 19 Jan 2018 18:57:53 GMT
58
+ recorded_at: Thu, 01 Feb 2018 10:08:41 GMT
61
59
  - request:
62
60
  method: get
63
61
  uri: https://blog.datacite.org/eating-your-own-dog-food/
@@ -81,7 +79,7 @@ http_interactions:
81
79
  Connection:
82
80
  - keep-alive
83
81
  Date:
84
- - Fri, 19 Jan 2018 06:39:24 GMT
82
+ - Thu, 01 Feb 2018 02:06:02 GMT
85
83
  Cache-Control:
86
84
  - max-age=31536000
87
85
  Last-Modified:
@@ -91,17 +89,17 @@ http_interactions:
91
89
  Server:
92
90
  - AmazonS3
93
91
  Age:
94
- - '44310'
92
+ - '28959'
95
93
  X-Cache:
96
94
  - Hit from cloudfront
97
95
  Via:
98
- - 1.1 0f820adb6671fcc6033a9aa95ec8e0fb.cloudfront.net (CloudFront)
96
+ - 1.1 96918fe484b3cc9879c048ab5c4e033c.cloudfront.net (CloudFront)
99
97
  X-Amz-Cf-Id:
100
- - 1XCiRkZHbkg8lXPYENKy8ehZyMNhC0Bk_pWMIoe8fdbk2yhUvhl2MA==
98
+ - 2_Ar-Iezc9AJ7ebCmayQU46VmQa3RbCxf1L8JQPtEGnQNzCy0YwZwQ==
101
99
  body:
102
100
  encoding: ASCII-8BIT
103
101
  string: !binary |-
104
102
  <!DOCTYPE html>
  <html>
    <head>
    <meta charset="utf-8">
    <!-- (1) Optimize for mobile versions: http://goo.gl/EOpFl -->
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <!-- (1) force latest IE rendering engine: bit.ly/1c8EiC9 -->
    <meta http-equiv="X-UA-Compatible" content="IE=edge">


    <title>Eating your own Dog Food</title>
    <meta name="description" content="Eating your own dog food is a slang term to describe that an organization should itself use the products and services it provides. For DataCite this means that we should use DOIs with appropriate metadata and strategies for long-term preservation for..." />

    <meta name="HandheldFriendly" content="True" />
    <meta name="MobileOptimized" content="320" />
    <meta name="apple-mobile-web-app-capable" content="yes">
    <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">

    <!-- DublinCore Metadata -->
    <meta property="dc:title" content="Eating your own Dog Food" />
    <meta property="dc:format" content="text/html" />
    <meta property="dc:language" content="en" />
    <meta property="dc:rights" content="CC-BY" />
    <meta property="dc:source" content="DataCite Blog" />
    <meta property="dc:subject" content="Scholarly Communication" />
    <meta property="dc:type" content="website" />


    <meta property="og:site_name" content="Eating your own Dog Food" />
    <meta property="og:description" content="Eating your own dog food is a slang term to describe that an organization should itself use the products and services it provides. For DataCite this means that we should use DOIs with appropriate metadata and strategies for long-term preservation for..." />
    <meta property="og:image" content="https://blog.datacite.org/images/2016/12/230785.jpg" />
    <meta property="og:type" content="blog" />

    <link href="//fonts.googleapis.com/css?family=Libre+Baskerville:400,400i,700" rel="stylesheet">
    <link href='//fonts.googleapis.com/css?family=Raleway:400,600,400italic,600italic' rel='stylesheet' type='text/css'>
    <link href="//maxcdn.bootstrapcdn.com/font-awesome/4.6.1/css/font-awesome.min.css" rel="stylesheet" type='text/css'>
    <link href="https://assets.datacite.org/stylesheets/datacite.css" rel='stylesheet' type='text/css'>
    <link href="https://assets.datacite.org/images/favicon.ico" rel="icon" type="image/ico" />

    <script src="//cdnjs.cloudflare.com/ajax/libs/fitvids/1.1.0/jquery.fitvids.min.js"></script>

      <script
        src="//d2wy8f7a9ursnm.cloudfront.net/bugsnag-2.min.js"
        data-apikey="c37a5861967091a9b42a1a77e235114a">
      </script>

    <script type="application/ld+json">
      {"@context":"http://schema.org","@type":"BlogPosting","@id":"https://doi.org/10.5438/4k3m-nyvg","name":"Eating your own Dog Food","alternateName":"MS-49-3632-5083","url":"https://blog.datacite.org/eating-your-own-dog-food/","author":[{"@type":"Person","@id":"https://orcid.org/0000-0003-1419-2405","givenName":"Martin","familyName":"Fenner","name":"Martin Fenner"}],"publisher":{"@type":"Organization","name":"DataCite"},"dateCreated":"2016-12-20","datePublished":"2016-12-20","dateModified":"2016-12-20","keywords":"datacite, doi, metadata, featured","version":"1.0","description":"Eating your own dog food is a slang term to describe that an organization should itself use the products and services it provides. For DataCite this means that we should use DOIs with appropriate metadata and strategies for long-term preservation for...","license":"https://creativecommons.org/licenses/by/4.0/","image":"https://blog.datacite.org/images/2016/12/230785.jpg","isPartOf":{"@type":"Blog","@id":"https://doi.org/10.5438/0000-00SS","name":"DataCite Blog"},"citation":[{"@type":"CreativeWork","@id":"https://doi.org/10.5438/0012"},{"@type":"CreativeWork","@id":"https://doi.org/10.5438/55E5-T5C0"}]}
    </script>
  </head>
  <body>
    <header class="header" id="navtop">
      <div class="navbar navbar-white" role="navigation">
        <div class="container-fluid">
          <div class="navbar-header"
            <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
              <span class="sr-only">Toggle navigation</span>
              <span class="icon-bar"></span>
              <span class="icon-bar"></span>
              <span class="icon-bar"></span>
            </button>
          </div>
          <a class="navbar-brand" href="/">DataCite Blog</a>
          <div class="navbar-collapse collapse">
            <ul class="nav navbar-nav navbar-right">
              <li><a href="https://support.datacite.org">Support</a></li>
              <li class="dropdown">
                <a href="#" class="dropdown-toggle" data-toggle="dropdown" id="sites"><i class='fa fa-th'></i> <span class="caret"></span></a>
                <ul class="dropdown-menu" role="menu">
                  <li><a href="https://www.datacite.org">
                    <i class='fa fa-globe fa-fw'></i>
                    Homepage</a>
                  </li>
                  <li><a href="https://blog.datacite.org">
                    <i class='fa fa-rss fa-fw'></i>
                    Blog</a>
                  </li>
                  <li class="divider"></li>
                  <li><a href="https://mds.datacite.org">
                    <i class='fa fa-database fa-fw'></i>
                    MDS</a>
                  </li>
                  <li><a href="https://schema.datacite.org">
                    <i class='fa fa-file-code-o fa-fw'></i>
                    Schema</a>
                  </li>
                  <li><a href="http://citation.crosscite.org">
                    <i class='fa fa-file-text-o fa-fw'></i>
                    Citation Formatter</a>
                  </li>
                  <li class="divider"></li>
                  <li><a href="https://search.datacite.org">
                    <i class='fa fa-search fa-fw'></i>
                    Search</a>
                  </li>
                  <li><a href="https://oai.datacite.org">
                    <i class='fa fa-table fa-fw'></i>
                    OAI-PMH</a>
                  </li>
                  <li><a href="https://stats.datacite.org">
                    <i class='fa fa-bar-chart fa-fw'></i>
                    Statistics</a>
                  </li>
                  <li><a href="https://api.datacite.org">
                    <i class='fa fa-cogs fa-fw'></i>
                    REST API</a>
                  </li>
                  <li><a href="http://www.re3data.org">
                    <i class='fa fa-cubes fa-fw'></i>
                    re3data</a>
                  </li>
                  <li class="divider"></li>
                  <li><a href="http://status.datacite.org">
                    <i class='fa fa-calendar-check-o fa-fw'></i>
                    Status</a>
                  </li>
                </ul>
              </li>
            </ul>
          </div>
        </div>
      </div>
    </header>
      <div class="wrapper">
    <div class="section section-white">
      <div class="container-fluid">
        <div class="row row-section">
          <div class="col-md-8 col-md-offset-2 post-content">
            <a name="topofpage"></a>
            <div class="post-meta">
              <h1>Eating your own Dog Food</h1>
              December 20, 2016 by Martin Fenner
              • <span class="post-reading-time"></span> read
                <p class="doi"><a href="https://doi.org/10.5438/4k3m-nyvg">https://doi.org/10.5438/4k3m-nyvg</a></p>
            </div>

            <p><a href="https://newrepublic.com/article/115349/dogfooding-tech-slang-working-out-glitches">Eating your own dog food</a> is a slang term to describe that an organization should itself use the products and services it provides. For DataCite this means that we should use DOIs with appropriate metadata and strategies for long-term preservation for the scholarly outputs we produce. For the most part this is not research data, but rather technical documents such as the DataCite Schema and its documentation <span class="citation">(<a href="#ref-https://doi.org/10.5438/0012">2016</a>)</span>.</p>
<p>These outputs also include the posts on this blog, where we discuss topics relevant for the DataCite community, but also of broader interest to anyone who cares about research data, persistent identifiers, and scholarly infrastructure. And starting today all blog posts on this blog will have a DOI, metadata and use a persistent storage mechanism.</p>
<div class="figure">
<img src="/images/2016/12/230785.jpg" alt="Photo by Bill Emrich. CC Zero." />
<p class="caption">Photo by <a href="https://www.pexels.com/photo/black-and-tan-yorkshire-terrier-puppy-230785/">Bill Emrich</a>. <a href="https://creativecommons.org/publicdomain/zero/1.0/">CC Zero</a>.</p>
</div>
<h3 id="technical-implementation">Technical Implementation</h3>
<p>This blog is powered by the static site generator <a href="https://middlemanapp.com/">Middleman</a>, with blog posts written in <a href="http://commonmark.org/">Markdown</a> and converted to HTML using <a href="http://pandoc.org/">Pandoc</a> and the <a href="https://travis-ci.org">Travis CI</a> continuous integration service. Static site generator means that there is no database or application server powering the site, making website adminstration simpler, cheaper and safer. In addition to the blog, the <a href="https://www.datacite.org">DataCite homepage</a> and <a href="https://schema.datacite.org">Metadata Schema subsite</a> are also generated using Middleman.</p>
<p>The simplicity is particularly important here, as registering the DOIs and metadata can be accomplished using a command line utility written by DataCite staff that doesn’t need to know much about the internals of Middleman, and thus can be easily adapted to other static site generators such as <a href="http://jekyllrb.com/">Jekyll</a>, <a href="http://gohugo.io/">Hugo</a> or <a href="https://hexo.io/">Hexo</a>. The command line utility is <a href="https://github.com/datacite/cirneco">Cirneco</a>, generating the metadata XML according to the DataCite Metadata Schema, and registering DOI and metadata with the DataCite MDS. Like all tools mentioned in this post Cirneco is open source software, please reach out to us if you are interested in implementing similar functionality for your blog.</p>
<h3 id="generating-dois">Generating DOIs</h3>
<p>The DOIs for this blog are generated automatically, using a modified base32 encoding algorithm that is provided by Cirneco, as discussed last week <span class="citation">(Fenner, <a href="#ref-https://doi.org/10.5438/55E5-T5C0">2016</a>)</span>. The DOI is generated and minted when a new post is pushed to <a href="https://blog.datacite.org" class="uri">https://blog.datacite.org</a>. This avoids two problems: a) DOI-like strings in the wild before publication and b) the randomly generated DOI exists already (we can simply generate a new one). All DOIs are short, without semantic infomation that might change over time, and with a checksum to minimize transcription errors, for example <strong>https://doi.org/10.5438/XCBJ-G7ZY</strong>. Going forward we encourage users to link to the DataCite Blog using the DOI, as these links will continue to work even if we ever move the blog to a different location.</p>
<h3 id="generating-metadata">Generating Metadata</h3>
<p>For the generation of metadata, we need to strike a balance between simple author provided metadata, but rich enough to aid discovery. We are doing this via three mechanisms:</p>
<ul>
<li>metadata provided by the author</li>
<li>default metadata for the blog</li>
<li>metadata automatically extracted from content</li>
</ul>
<p>The metadata provided by the author are the typical metadata for blog posts, provided via <a href="https://gohugo.io/content/front-matter/">YAML front matter</a> at the beginning of each post:</p>
<div class="sourceCode"><pre class="sourceCode yaml"><code class="sourceCode yaml"><span class="ot">---</span>
<span class="fu">layout:</span> post
<span class="fu">title:</span> Eating your own Dog Food
<span class="fu">author:</span> mfenner
<span class="fu">date:</span> 2016-12-19
<span class="fu">tags:</span>
<span class="kw">-</span> datacite
<span class="kw">-</span> doi
<span class="kw">-</span> metadata
<span class="ot">---</span></code></pre></div>
<p>We can reuse all these metadata when generating DataCite metadata, using the tags as <code>subjects</code>.</p>
<p>The default metadata are metadata that always stay the same for the blog, such as <code>publisher</code>, <code>HostingInstitution</code> and <code>rights</code>. We can store them in a site-wide configuration file. We can also assume reasonable defaults that can be overridden in the YAML front matter, e.g. <code>resourceType</code> (we use <a href="https://schema.org/BlogPosting">BlogPosting</a> with <code>resourceTypeGeneral</code> Text) and <code>version</code>. We store more information about authors outside the blog post, including <code>givenName</code>, <code>familyName</code> and <code>nameIdentifier</code> (we now show the ORCID ID of every blog author at the bottom of the post).</p>
<p>Finally, there are metadata that we can automatically extract from the blog post, and we are currently doing this for the <code>description</code> and <code>relatedIdentifier</code>. This blog uses Pandoc and BibTex to generate the references section at the end, and we can fetch this information and convert it into the format needed for <code>relatedIdentifier</code>.</p>
<p>Taken together we can provide all metadata that are <em>required</em> or <em>recommended</em> in the Metadata Schema documentation <span class="citation">(<a href="#ref-https://doi.org/10.5438/0012">2016</a>)</span>, and we can do this without any extra effort for the author. The full XML is avalailable <a href="https://data.datacite.org/application/x-datacite+xml/10.5438/4K3M-NYVG">here</a>.</p>
<p>Not all blog posts need to be cited formally with metadata in a <em>references</em> list formatted according to a specific citation style. But these metadata greatly help with discovery, a search in DataCite Search for <a href="http://search.datacite.org/works?query=eating+dog+food">eating dog food</a> will for example bring up this blog post as the first hit.</p>
<h3 id="persistent-storage">Persistent storage</h3>
<p>Using DOIs means that readers not only expect rich metadata that help with citation and discovery, but also that DataCite takes extra care to preserve the blog posts, thinking beyond the particular technical implementation or even the contiuing existence of this blog. This is an area where we do need to do more work, starting with a decision about the best archival format for a blog post (HTML, PDF, <a href="https://jats.nlm.nih.gov/">JATS</a>?). For now blog posts are hosted in multiple Git repositories (<a href="https://github.com/datacite/blog">one of them on Github</a>), and in two independent Amazon S3 buckets that each use <a href="http://docs.aws.amazon.com/AmazonS3/latest/dev/Versioning.html">versioning</a>. Multiple locations with versioning are a good start, but more work is clearly needed.</p>
<h3 id="references" class="unnumbered">References</h3>
<div id="refs" class="references">
<div id="ref-https://doi.org/10.5438/0012">
<p>DataCite Metadata Working Group. (2016). DataCite Metadata Schema for the Publication and Citation of Research Data v4.0. <em>DataCite</em>. <a href="https://doi.org/10.5438/0012" class="uri">https://doi.org/10.5438/0012</a></p>
</div>
<div id="ref-https://doi.org/10.5438/55E5-T5C0">
<p>Fenner, M. (2016). Cool DOI’s. <em>DataCite</em>. <a href="https://doi.org/10.5438/55E5-T5C0" class="uri">https://doi.org/10.5438/55E5-T5C0</a></p>
</div>
</div>

            <hr width="80%">
          </div>
        </div>
        <div class="row">
          <div class="col-md-5 col-md-offset-2 post-content">
            <div class="bottom-teaser cf">
  <div class="isLeft">
    <section class="author">
          <div class="author-image" style="background-image: url(https://www.gravatar.com/avatar/434592a097e91261792ebd6b492042bc?s=250&d=mm&r=x)">Blog Logo</div>
        <h4>Martin Fenner</h4>
        <p class="bio">DataCite Technical Director</p>
        <p class="orcid"><a href="https://orcid.org/0000-0003-1419-2405">https://orcid.org/0000-0003-1419-2405</a></p>
        <div class="clearfix"></div>
      <h4>Eating your own Dog Food</h4>
        <p class="published"><a href="https://doi.org/10.5438/4k3m-nyvg">https://doi.org/10.5438/4k3m-nyvg</a>
      <p class="published"><i class="fa fa-calendar"></i> <time datetime="2016-12-20 00:00">December 20, 2016</time></p>
      <p class="published"><i class="fa fa-history"></i> <a href="https://github.com/datacite/blog/commits/master/source/posts/eating-your-own-dog-food.html.md">History</a></p>
      <p class="published">© 2016 Martin Fenner. Distributed under the terms of the <a href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution license</a>.</p>
      <p class="published">
        <i class="fa fa-tags"></i>
        <a href="/index.html?tag=datacite">datacite</a>, <a href="/index.html?tag=doi">doi</a>, <a href="/index.html?tag=metadata">metadata</a>, <a href="/index.html?tag=featured">featured</a>
      </p>
    </section>
  </div>
</div>

          </div>
          <div class="col-md-2 col-md-offset-1">
             <div class="bottom-teaser cf">
  <div class="isLeft">
    <h5 class="index-headline featured"><span>Share on</span></h5>
      <a class="icon-twitter" href="http://twitter.com/share?text=On the @datacite blog: Eating your own Dog Food&amp;url=https://blog.datacite.org/eating-your-own-dog-food/"
        onclick="window.open(this.href, 'twitter-share', 'width=550,height=255');return false;">
        <i class="fa fa-twitter fa-2x"></i><span class="hidden">twitter</span>
      </a>
      <a class="icon-facebook" href="https://www.facebook.com/sharer.php?t=On the @datacite blog: Eating your own Dog Food&amp;u=https://blog.datacite.org/eating-your-own-dog-food/"
        onclick="window.open(this.href, 'facebook-share', 'width=550,height=255');return false;">
        <i class="fa fa-facebook fa-2x"></i><span class="hidden">facebook</span>
      </a>
  </div>
</div>

          </div>
        </div>
          <div class="row">
            <div class="col-md-8 col-md-offset-2 post-content">
              <div id="disqus_thread"></div>
<script>
    var disqus_config = function () {
        this.page.url = 'https://blog.datacite.org/eating-your-own-dog-food/';
        this.page.identifier = 'https://blog.datacite.org/eating-your-own-dog-food/';
    };
    (function() {
        var d = document, s = d.createElement('script');

        s.src = '//datacite.disqus.com/embed.js';  //

        s.setAttribute('data-timestamp', +new Date());
        (d.head || d.body).appendChild(s);
    })();
</script>
<noscript>Please enable JavaScript to view the <a href="https://disqus.com/?ref_noscript" rel="nofollow">comments powered by Disqus.</a></noscript>

            </div>
          </div>
      </div>
    </div>
  </div>
    <footer class='row footer'>
      <div class="container-fluid">
        <div class='col-md-3 col-sm-4'>
          <h4>About DataCite</h4>
          <ul>
            <li><a href="https://www.datacite.org/mission.html">What we do</a></li>
            <li><a href="https://www.datacite.org/board.html">Board</a></li>
            <li><a href="https://www.datacite.org/steering.html">Steering groups</a></li>
            <li><a href="https://www.datacite.org/staff.html">Staff</a></li>
            <li><a href="https://www.datacite.org/jobopportunities.html">Job opportunities</a></li>
          </ul>
        </div>
        <div class='col-md-3 col-sm-4'>
          <h4>Services</h4>
          <ul>
            <li><a href="https://www.datacite.org/dois.html">Assign DOIs</a></li>
            <li><a href="https://www.datacite.org/search.html">Metadata search</a></li>
            <li><a href="https://www.datacite.org/eventdata.html">Event data</a></li>
            <li><a href="https://www.datacite.org/profiles.html">Profiles</a></li>
            <li><a href="https://www.datacite.org/re3data.html">re3data</a></li>
            <li><a href="https://www.datacite.org/citation.html">Citation formatter</a></li>
            <li><a href="https://www.datacite.org/stats.html">Statistics</a></li>
            <li><a href="https://www.datacite.org/content.html">Content negotiation</a></li>
            <li><a href="https://www.datacite.org/oaipmh.html">OAI-PMH</a></li>
          </ul>
        </div>
        <div class='col-md-3 col-sm-4'>
          <h4>Resources</h4>
          <ul>
            <li><a href="https://schema.datacite.org">Metadata schema</a></li>
            <li><a href="https://support.datacite.org">Support</a></li>
          </ul>
          <h4>Community</h4>
          <ul>
            <li><a href="https://www.datacite.org/members.html">Members</a></li>
            <li><a href="https://www.datacite.org/partners.html">Partners</a></li>
            <li><a href="https://www.datacite.org/steering.html">Steering groups</a></li>
            <li><a href="https://www.datacite.org/events.html">Events</a></li>
            <li><a href="https://www.datacite.org/roadmap.html">Roadmap</a></li>
            <li><a href="https://www.datacite.org/user-stories.html">User Stories</a></li>
          </ul>
        </div>
        <div class='col-md-3'>
          <h4 class="share">Contact us</h4>
          <a href='mailto:support@datacite.org' class="share">
            <i class='fa fa-at'></i>
          </a>
          <a href='https://blog.datacite.org/feed.xml' class="share">
            <i class='fa fa-rss'></i>
          </a>
          <a href='https://twitter.com/datacite' class="share">
            <i class='fa fa-twitter'></i>
          </a>
          <a href='https://github.com/datacite/datacite' class="share">
            <i class='fa fa-github'></i>
          </a>
          <a href='https://www.linkedin.com/company/datacite' class="share">
            <i class='fa fa-linkedin'></i>
          </a>
          <ul>
            <li><a href="https://www.datacite.org/terms.html">Terms and conditions</a></li>
            <li><a href="https://www.datacite.org/privacy.html">Privacy policy</a></li>
            <li><a href="https://www.datacite.org/acknowledgments.html">Acknowledgements</a></li>
          </ul>
          <a href="http://status.datacite.org" target="_blank">
            <span class="color-dot"></span>
            <span class="color-description"></span>
          </a>
        </div>
      </div>
    </footer>
    <script src="//code.jquery.com/jquery-2.1.4.min.js"></script>
    <script src="//maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js"></script>
    <script src="//cdn.statuspage.io/se-v2.js"></script>
    <script src="https://assets.datacite.org/javascripts/default.js"></script>
    <script src="/javascripts/readingTime.min.js"></script>
    <script src="/javascripts/index.js"></script>
    <script src="/javascripts/search.js"></script>
    <script id="dsq-count-scr" src="//datacite.disqus.com/count.js" async></script>
    <script>
      (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
      m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
      })(window,document,'script','//www.google-analytics.com/analytics.js','ga');

      ga('create', 'UA-22806196-6', 'auto');
      ga('send', 'pageview');
    </script>
  </body>
</html>

105
103
  http_version:
106
- recorded_at: Fri, 19 Jan 2018 18:57:54 GMT
104
+ recorded_at: Thu, 01 Feb 2018 10:08:41 GMT
107
105
  recorded_with: VCR 3.0.3
@@ -29,17 +29,15 @@ http_interactions:
29
29
  Location:
30
30
  - "/eating-your-own-dog-food/"
31
31
  Date:
32
- - Fri, 19 Jan 2018 18:47:19 GMT
32
+ - Thu, 01 Feb 2018 10:08:40 GMT
33
33
  Server:
34
34
  - AmazonS3
35
- Age:
36
- - '634'
37
35
  X-Cache:
38
- - Hit from cloudfront
36
+ - Miss from cloudfront
39
37
  Via:
40
- - 1.1 efed2d5ffeb697060f4a3aa73bdf068f.cloudfront.net (CloudFront)
38
+ - 1.1 9be2d2d7560f88bdc5d5a3a94863566a.cloudfront.net (CloudFront)
41
39
  X-Amz-Cf-Id:
42
- - aeEi081A_JZeBF8wthNIWES897Lmrm6LXFGwl-WE_h0dQ6yRXznNbw==
40
+ - UOfP-sJzkTg2begqXNuc1z47RNigEEOwBkONle8hAqvgtLG8KdtXKQ==
43
41
  body:
44
42
  encoding: UTF-8
45
43
  string: |
@@ -50,14 +48,14 @@ http_interactions:
50
48
  <ul>
51
49
  <li>Code: Found</li>
52
50
  <li>Message: Resource Found</li>
53
- <li>RequestId: 9B7ED3BBCD7E1247</li>
54
- <li>HostId: qEFnifqb9w2rEeTP6bdbM4fUF5zcKK5f98hgCH10zKGU9mezmBfq//xgQjIv7q6mwWFPLMgT0a8=</li>
51
+ <li>RequestId: 954101BD524D1D33</li>
52
+ <li>HostId: UFzFKnk0WRHCthFD7XYEH891fwFwQwMWBEQaM4G+42waSLPsXAr9zRkYa7KgHfiNxHkQ+wqPvzw=</li>
55
53
  </ul>
56
54
  <hr/>
57
55
  </body>
58
56
  </html>
59
57
  http_version:
60
- recorded_at: Fri, 19 Jan 2018 18:57:54 GMT
58
+ recorded_at: Thu, 01 Feb 2018 10:08:41 GMT
61
59
  - request:
62
60
  method: get
63
61
  uri: https://blog.datacite.org/eating-your-own-dog-food/
@@ -81,7 +79,7 @@ http_interactions:
81
79
  Connection:
82
80
  - keep-alive
83
81
  Date:
84
- - Fri, 19 Jan 2018 06:39:24 GMT
82
+ - Thu, 01 Feb 2018 02:06:02 GMT
85
83
  Cache-Control:
86
84
  - max-age=31536000
87
85
  Last-Modified:
@@ -91,17 +89,17 @@ http_interactions:
91
89
  Server:
92
90
  - AmazonS3
93
91
  Age:
94
- - '44311'
92
+ - '28959'
95
93
  X-Cache:
96
94
  - Hit from cloudfront
97
95
  Via:
98
- - 1.1 6e8dd39e00d9a5c1a31d69ffa2821a5e.cloudfront.net (CloudFront)
96
+ - 1.1 16ba4fd291c7ac4ec424fdbac7065ef1.cloudfront.net (CloudFront)
99
97
  X-Amz-Cf-Id:
100
- - rOJUw3GsymJgV3-ZyetoBK9IdW1C-BWnygfZeB_uhrql2R4Uzix-qQ==
98
+ - MxE0Sqet2MM8w-_fopQVPA4QC9yRqhgrUuUH3IwUoRxihN_-Xhj4Ew==
101
99
  body:
102
100
  encoding: ASCII-8BIT
103
101
  string: !binary |-
104
102
  <!DOCTYPE html>
  <html>
    <head>
    <meta charset="utf-8">
    <!-- (1) Optimize for mobile versions: http://goo.gl/EOpFl -->
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <!-- (1) force latest IE rendering engine: bit.ly/1c8EiC9 -->
    <meta http-equiv="X-UA-Compatible" content="IE=edge">


    <title>Eating your own Dog Food</title>
    <meta name="description" content="Eating your own dog food is a slang term to describe that an organization should itself use the products and services it provides. For DataCite this means that we should use DOIs with appropriate metadata and strategies for long-term preservation for..." />

    <meta name="HandheldFriendly" content="True" />
    <meta name="MobileOptimized" content="320" />
    <meta name="apple-mobile-web-app-capable" content="yes">
    <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">

    <!-- DublinCore Metadata -->
    <meta property="dc:title" content="Eating your own Dog Food" />
    <meta property="dc:format" content="text/html" />
    <meta property="dc:language" content="en" />
    <meta property="dc:rights" content="CC-BY" />
    <meta property="dc:source" content="DataCite Blog" />
    <meta property="dc:subject" content="Scholarly Communication" />
    <meta property="dc:type" content="website" />


    <meta property="og:site_name" content="Eating your own Dog Food" />
    <meta property="og:description" content="Eating your own dog food is a slang term to describe that an organization should itself use the products and services it provides. For DataCite this means that we should use DOIs with appropriate metadata and strategies for long-term preservation for..." />
    <meta property="og:image" content="https://blog.datacite.org/images/2016/12/230785.jpg" />
    <meta property="og:type" content="blog" />

    <link href="//fonts.googleapis.com/css?family=Libre+Baskerville:400,400i,700" rel="stylesheet">
    <link href='//fonts.googleapis.com/css?family=Raleway:400,600,400italic,600italic' rel='stylesheet' type='text/css'>
    <link href="//maxcdn.bootstrapcdn.com/font-awesome/4.6.1/css/font-awesome.min.css" rel="stylesheet" type='text/css'>
    <link href="https://assets.datacite.org/stylesheets/datacite.css" rel='stylesheet' type='text/css'>
    <link href="https://assets.datacite.org/images/favicon.ico" rel="icon" type="image/ico" />

    <script src="//cdnjs.cloudflare.com/ajax/libs/fitvids/1.1.0/jquery.fitvids.min.js"></script>

      <script
        src="//d2wy8f7a9ursnm.cloudfront.net/bugsnag-2.min.js"
        data-apikey="c37a5861967091a9b42a1a77e235114a">
      </script>

    <script type="application/ld+json">
      {"@context":"http://schema.org","@type":"BlogPosting","@id":"https://doi.org/10.5438/4k3m-nyvg","name":"Eating your own Dog Food","alternateName":"MS-49-3632-5083","url":"https://blog.datacite.org/eating-your-own-dog-food/","author":[{"@type":"Person","@id":"https://orcid.org/0000-0003-1419-2405","givenName":"Martin","familyName":"Fenner","name":"Martin Fenner"}],"publisher":{"@type":"Organization","name":"DataCite"},"dateCreated":"2016-12-20","datePublished":"2016-12-20","dateModified":"2016-12-20","keywords":"datacite, doi, metadata, featured","version":"1.0","description":"Eating your own dog food is a slang term to describe that an organization should itself use the products and services it provides. For DataCite this means that we should use DOIs with appropriate metadata and strategies for long-term preservation for...","license":"https://creativecommons.org/licenses/by/4.0/","image":"https://blog.datacite.org/images/2016/12/230785.jpg","isPartOf":{"@type":"Blog","@id":"https://doi.org/10.5438/0000-00SS","name":"DataCite Blog"},"citation":[{"@type":"CreativeWork","@id":"https://doi.org/10.5438/0012"},{"@type":"CreativeWork","@id":"https://doi.org/10.5438/55E5-T5C0"}]}
    </script>
  </head>
  <body>
    <header class="header" id="navtop">
      <div class="navbar navbar-white" role="navigation">
        <div class="container-fluid">
          <div class="navbar-header"
            <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
              <span class="sr-only">Toggle navigation</span>
              <span class="icon-bar"></span>
              <span class="icon-bar"></span>
              <span class="icon-bar"></span>
            </button>
          </div>
          <a class="navbar-brand" href="/">DataCite Blog</a>
          <div class="navbar-collapse collapse">
            <ul class="nav navbar-nav navbar-right">
              <li><a href="https://support.datacite.org">Support</a></li>
              <li class="dropdown">
                <a href="#" class="dropdown-toggle" data-toggle="dropdown" id="sites"><i class='fa fa-th'></i> <span class="caret"></span></a>
                <ul class="dropdown-menu" role="menu">
                  <li><a href="https://www.datacite.org">
                    <i class='fa fa-globe fa-fw'></i>
                    Homepage</a>
                  </li>
                  <li><a href="https://blog.datacite.org">
                    <i class='fa fa-rss fa-fw'></i>
                    Blog</a>
                  </li>
                  <li class="divider"></li>
                  <li><a href="https://mds.datacite.org">
                    <i class='fa fa-database fa-fw'></i>
                    MDS</a>
                  </li>
                  <li><a href="https://schema.datacite.org">
                    <i class='fa fa-file-code-o fa-fw'></i>
                    Schema</a>
                  </li>
                  <li><a href="http://citation.crosscite.org">
                    <i class='fa fa-file-text-o fa-fw'></i>
                    Citation Formatter</a>
                  </li>
                  <li class="divider"></li>
                  <li><a href="https://search.datacite.org">
                    <i class='fa fa-search fa-fw'></i>
                    Search</a>
                  </li>
                  <li><a href="https://oai.datacite.org">
                    <i class='fa fa-table fa-fw'></i>
                    OAI-PMH</a>
                  </li>
                  <li><a href="https://stats.datacite.org">
                    <i class='fa fa-bar-chart fa-fw'></i>
                    Statistics</a>
                  </li>
                  <li><a href="https://api.datacite.org">
                    <i class='fa fa-cogs fa-fw'></i>
                    REST API</a>
                  </li>
                  <li><a href="http://www.re3data.org">
                    <i class='fa fa-cubes fa-fw'></i>
                    re3data</a>
                  </li>
                  <li class="divider"></li>
                  <li><a href="http://status.datacite.org">
                    <i class='fa fa-calendar-check-o fa-fw'></i>
                    Status</a>
                  </li>
                </ul>
              </li>
            </ul>
          </div>
        </div>
      </div>
    </header>
      <div class="wrapper">
    <div class="section section-white">
      <div class="container-fluid">
        <div class="row row-section">
          <div class="col-md-8 col-md-offset-2 post-content">
            <a name="topofpage"></a>
            <div class="post-meta">
              <h1>Eating your own Dog Food</h1>
              December 20, 2016 by Martin Fenner
              • <span class="post-reading-time"></span> read
                <p class="doi"><a href="https://doi.org/10.5438/4k3m-nyvg">https://doi.org/10.5438/4k3m-nyvg</a></p>
            </div>

            <p><a href="https://newrepublic.com/article/115349/dogfooding-tech-slang-working-out-glitches">Eating your own dog food</a> is a slang term to describe that an organization should itself use the products and services it provides. For DataCite this means that we should use DOIs with appropriate metadata and strategies for long-term preservation for the scholarly outputs we produce. For the most part this is not research data, but rather technical documents such as the DataCite Schema and its documentation <span class="citation">(<a href="#ref-https://doi.org/10.5438/0012">2016</a>)</span>.</p>
<p>These outputs also include the posts on this blog, where we discuss topics relevant for the DataCite community, but also of broader interest to anyone who cares about research data, persistent identifiers, and scholarly infrastructure. And starting today all blog posts on this blog will have a DOI, metadata and use a persistent storage mechanism.</p>
<div class="figure">
<img src="/images/2016/12/230785.jpg" alt="Photo by Bill Emrich. CC Zero." />
<p class="caption">Photo by <a href="https://www.pexels.com/photo/black-and-tan-yorkshire-terrier-puppy-230785/">Bill Emrich</a>. <a href="https://creativecommons.org/publicdomain/zero/1.0/">CC Zero</a>.</p>
</div>
<h3 id="technical-implementation">Technical Implementation</h3>
<p>This blog is powered by the static site generator <a href="https://middlemanapp.com/">Middleman</a>, with blog posts written in <a href="http://commonmark.org/">Markdown</a> and converted to HTML using <a href="http://pandoc.org/">Pandoc</a> and the <a href="https://travis-ci.org">Travis CI</a> continuous integration service. Static site generator means that there is no database or application server powering the site, making website adminstration simpler, cheaper and safer. In addition to the blog, the <a href="https://www.datacite.org">DataCite homepage</a> and <a href="https://schema.datacite.org">Metadata Schema subsite</a> are also generated using Middleman.</p>
<p>The simplicity is particularly important here, as registering the DOIs and metadata can be accomplished using a command line utility written by DataCite staff that doesn’t need to know much about the internals of Middleman, and thus can be easily adapted to other static site generators such as <a href="http://jekyllrb.com/">Jekyll</a>, <a href="http://gohugo.io/">Hugo</a> or <a href="https://hexo.io/">Hexo</a>. The command line utility is <a href="https://github.com/datacite/cirneco">Cirneco</a>, generating the metadata XML according to the DataCite Metadata Schema, and registering DOI and metadata with the DataCite MDS. Like all tools mentioned in this post Cirneco is open source software, please reach out to us if you are interested in implementing similar functionality for your blog.</p>
<h3 id="generating-dois">Generating DOIs</h3>
<p>The DOIs for this blog are generated automatically, using a modified base32 encoding algorithm that is provided by Cirneco, as discussed last week <span class="citation">(Fenner, <a href="#ref-https://doi.org/10.5438/55E5-T5C0">2016</a>)</span>. The DOI is generated and minted when a new post is pushed to <a href="https://blog.datacite.org" class="uri">https://blog.datacite.org</a>. This avoids two problems: a) DOI-like strings in the wild before publication and b) the randomly generated DOI exists already (we can simply generate a new one). All DOIs are short, without semantic infomation that might change over time, and with a checksum to minimize transcription errors, for example <strong>https://doi.org/10.5438/XCBJ-G7ZY</strong>. Going forward we encourage users to link to the DataCite Blog using the DOI, as these links will continue to work even if we ever move the blog to a different location.</p>
<h3 id="generating-metadata">Generating Metadata</h3>
<p>For the generation of metadata, we need to strike a balance between simple author provided metadata, but rich enough to aid discovery. We are doing this via three mechanisms:</p>
<ul>
<li>metadata provided by the author</li>
<li>default metadata for the blog</li>
<li>metadata automatically extracted from content</li>
</ul>
<p>The metadata provided by the author are the typical metadata for blog posts, provided via <a href="https://gohugo.io/content/front-matter/">YAML front matter</a> at the beginning of each post:</p>
<div class="sourceCode"><pre class="sourceCode yaml"><code class="sourceCode yaml"><span class="ot">---</span>
<span class="fu">layout:</span> post
<span class="fu">title:</span> Eating your own Dog Food
<span class="fu">author:</span> mfenner
<span class="fu">date:</span> 2016-12-19
<span class="fu">tags:</span>
<span class="kw">-</span> datacite
<span class="kw">-</span> doi
<span class="kw">-</span> metadata
<span class="ot">---</span></code></pre></div>
<p>We can reuse all these metadata when generating DataCite metadata, using the tags as <code>subjects</code>.</p>
<p>The default metadata are metadata that always stay the same for the blog, such as <code>publisher</code>, <code>HostingInstitution</code> and <code>rights</code>. We can store them in a site-wide configuration file. We can also assume reasonable defaults that can be overridden in the YAML front matter, e.g. <code>resourceType</code> (we use <a href="https://schema.org/BlogPosting">BlogPosting</a> with <code>resourceTypeGeneral</code> Text) and <code>version</code>. We store more information about authors outside the blog post, including <code>givenName</code>, <code>familyName</code> and <code>nameIdentifier</code> (we now show the ORCID ID of every blog author at the bottom of the post).</p>
<p>Finally, there are metadata that we can automatically extract from the blog post, and we are currently doing this for the <code>description</code> and <code>relatedIdentifier</code>. This blog uses Pandoc and BibTex to generate the references section at the end, and we can fetch this information and convert it into the format needed for <code>relatedIdentifier</code>.</p>
<p>Taken together we can provide all metadata that are <em>required</em> or <em>recommended</em> in the Metadata Schema documentation <span class="citation">(<a href="#ref-https://doi.org/10.5438/0012">2016</a>)</span>, and we can do this without any extra effort for the author. The full XML is avalailable <a href="https://data.datacite.org/application/x-datacite+xml/10.5438/4K3M-NYVG">here</a>.</p>
<p>Not all blog posts need to be cited formally with metadata in a <em>references</em> list formatted according to a specific citation style. But these metadata greatly help with discovery, a search in DataCite Search for <a href="http://search.datacite.org/works?query=eating+dog+food">eating dog food</a> will for example bring up this blog post as the first hit.</p>
<h3 id="persistent-storage">Persistent storage</h3>
<p>Using DOIs means that readers not only expect rich metadata that help with citation and discovery, but also that DataCite takes extra care to preserve the blog posts, thinking beyond the particular technical implementation or even the contiuing existence of this blog. This is an area where we do need to do more work, starting with a decision about the best archival format for a blog post (HTML, PDF, <a href="https://jats.nlm.nih.gov/">JATS</a>?). For now blog posts are hosted in multiple Git repositories (<a href="https://github.com/datacite/blog">one of them on Github</a>), and in two independent Amazon S3 buckets that each use <a href="http://docs.aws.amazon.com/AmazonS3/latest/dev/Versioning.html">versioning</a>. Multiple locations with versioning are a good start, but more work is clearly needed.</p>
<h3 id="references" class="unnumbered">References</h3>
<div id="refs" class="references">
<div id="ref-https://doi.org/10.5438/0012">
<p>DataCite Metadata Working Group. (2016). DataCite Metadata Schema for the Publication and Citation of Research Data v4.0. <em>DataCite</em>. <a href="https://doi.org/10.5438/0012" class="uri">https://doi.org/10.5438/0012</a></p>
</div>
<div id="ref-https://doi.org/10.5438/55E5-T5C0">
<p>Fenner, M. (2016). Cool DOI’s. <em>DataCite</em>. <a href="https://doi.org/10.5438/55E5-T5C0" class="uri">https://doi.org/10.5438/55E5-T5C0</a></p>
</div>
</div>

            <hr width="80%">
          </div>
        </div>
        <div class="row">
          <div class="col-md-5 col-md-offset-2 post-content">
            <div class="bottom-teaser cf">
  <div class="isLeft">
    <section class="author">
          <div class="author-image" style="background-image: url(https://www.gravatar.com/avatar/434592a097e91261792ebd6b492042bc?s=250&d=mm&r=x)">Blog Logo</div>
        <h4>Martin Fenner</h4>
        <p class="bio">DataCite Technical Director</p>
        <p class="orcid"><a href="https://orcid.org/0000-0003-1419-2405">https://orcid.org/0000-0003-1419-2405</a></p>
        <div class="clearfix"></div>
      <h4>Eating your own Dog Food</h4>
        <p class="published"><a href="https://doi.org/10.5438/4k3m-nyvg">https://doi.org/10.5438/4k3m-nyvg</a>
      <p class="published"><i class="fa fa-calendar"></i> <time datetime="2016-12-20 00:00">December 20, 2016</time></p>
      <p class="published"><i class="fa fa-history"></i> <a href="https://github.com/datacite/blog/commits/master/source/posts/eating-your-own-dog-food.html.md">History</a></p>
      <p class="published">© 2016 Martin Fenner. Distributed under the terms of the <a href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution license</a>.</p>
      <p class="published">
        <i class="fa fa-tags"></i>
        <a href="/index.html?tag=datacite">datacite</a>, <a href="/index.html?tag=doi">doi</a>, <a href="/index.html?tag=metadata">metadata</a>, <a href="/index.html?tag=featured">featured</a>
      </p>
    </section>
  </div>
</div>

          </div>
          <div class="col-md-2 col-md-offset-1">
             <div class="bottom-teaser cf">
  <div class="isLeft">
    <h5 class="index-headline featured"><span>Share on</span></h5>
      <a class="icon-twitter" href="http://twitter.com/share?text=On the @datacite blog: Eating your own Dog Food&amp;url=https://blog.datacite.org/eating-your-own-dog-food/"
        onclick="window.open(this.href, 'twitter-share', 'width=550,height=255');return false;">
        <i class="fa fa-twitter fa-2x"></i><span class="hidden">twitter</span>
      </a>
      <a class="icon-facebook" href="https://www.facebook.com/sharer.php?t=On the @datacite blog: Eating your own Dog Food&amp;u=https://blog.datacite.org/eating-your-own-dog-food/"
        onclick="window.open(this.href, 'facebook-share', 'width=550,height=255');return false;">
        <i class="fa fa-facebook fa-2x"></i><span class="hidden">facebook</span>
      </a>
  </div>
</div>

          </div>
        </div>
          <div class="row">
            <div class="col-md-8 col-md-offset-2 post-content">
              <div id="disqus_thread"></div>
<script>
    var disqus_config = function () {
        this.page.url = 'https://blog.datacite.org/eating-your-own-dog-food/';
        this.page.identifier = 'https://blog.datacite.org/eating-your-own-dog-food/';
    };
    (function() {
        var d = document, s = d.createElement('script');

        s.src = '//datacite.disqus.com/embed.js';  //

        s.setAttribute('data-timestamp', +new Date());
        (d.head || d.body).appendChild(s);
    })();
</script>
<noscript>Please enable JavaScript to view the <a href="https://disqus.com/?ref_noscript" rel="nofollow">comments powered by Disqus.</a></noscript>

            </div>
          </div>
      </div>
    </div>
  </div>
    <footer class='row footer'>
      <div class="container-fluid">
        <div class='col-md-3 col-sm-4'>
          <h4>About DataCite</h4>
          <ul>
            <li><a href="https://www.datacite.org/mission.html">What we do</a></li>
            <li><a href="https://www.datacite.org/board.html">Board</a></li>
            <li><a href="https://www.datacite.org/steering.html">Steering groups</a></li>
            <li><a href="https://www.datacite.org/staff.html">Staff</a></li>
            <li><a href="https://www.datacite.org/jobopportunities.html">Job opportunities</a></li>
          </ul>
        </div>
        <div class='col-md-3 col-sm-4'>
          <h4>Services</h4>
          <ul>
            <li><a href="https://www.datacite.org/dois.html">Assign DOIs</a></li>
            <li><a href="https://www.datacite.org/search.html">Metadata search</a></li>
            <li><a href="https://www.datacite.org/eventdata.html">Event data</a></li>
            <li><a href="https://www.datacite.org/profiles.html">Profiles</a></li>
            <li><a href="https://www.datacite.org/re3data.html">re3data</a></li>
            <li><a href="https://www.datacite.org/citation.html">Citation formatter</a></li>
            <li><a href="https://www.datacite.org/stats.html">Statistics</a></li>
            <li><a href="https://www.datacite.org/content.html">Content negotiation</a></li>
            <li><a href="https://www.datacite.org/oaipmh.html">OAI-PMH</a></li>
          </ul>
        </div>
        <div class='col-md-3 col-sm-4'>
          <h4>Resources</h4>
          <ul>
            <li><a href="https://schema.datacite.org">Metadata schema</a></li>
            <li><a href="https://support.datacite.org">Support</a></li>
          </ul>
          <h4>Community</h4>
          <ul>
            <li><a href="https://www.datacite.org/members.html">Members</a></li>
            <li><a href="https://www.datacite.org/partners.html">Partners</a></li>
            <li><a href="https://www.datacite.org/steering.html">Steering groups</a></li>
            <li><a href="https://www.datacite.org/events.html">Events</a></li>
            <li><a href="https://www.datacite.org/roadmap.html">Roadmap</a></li>
            <li><a href="https://www.datacite.org/user-stories.html">User Stories</a></li>
          </ul>
        </div>
        <div class='col-md-3'>
          <h4 class="share">Contact us</h4>
          <a href='mailto:support@datacite.org' class="share">
            <i class='fa fa-at'></i>
          </a>
          <a href='https://blog.datacite.org/feed.xml' class="share">
            <i class='fa fa-rss'></i>
          </a>
          <a href='https://twitter.com/datacite' class="share">
            <i class='fa fa-twitter'></i>
          </a>
          <a href='https://github.com/datacite/datacite' class="share">
            <i class='fa fa-github'></i>
          </a>
          <a href='https://www.linkedin.com/company/datacite' class="share">
            <i class='fa fa-linkedin'></i>
          </a>
          <ul>
            <li><a href="https://www.datacite.org/terms.html">Terms and conditions</a></li>
            <li><a href="https://www.datacite.org/privacy.html">Privacy policy</a></li>
            <li><a href="https://www.datacite.org/acknowledgments.html">Acknowledgements</a></li>
          </ul>
          <a href="http://status.datacite.org" target="_blank">
            <span class="color-dot"></span>
            <span class="color-description"></span>
          </a>
        </div>
      </div>
    </footer>
    <script src="//code.jquery.com/jquery-2.1.4.min.js"></script>
    <script src="//maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js"></script>
    <script src="//cdn.statuspage.io/se-v2.js"></script>
    <script src="https://assets.datacite.org/javascripts/default.js"></script>
    <script src="/javascripts/readingTime.min.js"></script>
    <script src="/javascripts/index.js"></script>
    <script src="/javascripts/search.js"></script>
    <script id="dsq-count-scr" src="//datacite.disqus.com/count.js" async></script>
    <script>
      (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
      m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
      })(window,document,'script','//www.google-analytics.com/analytics.js','ga');

      ga('create', 'UA-22806196-6', 'auto');
      ga('send', 'pageview');
    </script>
  </body>
</html>

105
103
  http_version:
106
- recorded_at: Fri, 19 Jan 2018 18:57:54 GMT
104
+ recorded_at: Thu, 01 Feb 2018 10:08:41 GMT
107
105
  recorded_with: VCR 3.0.3
@@ -19,15 +19,15 @@ describe Cirneco::DataCenter, vcr: true, :order => :defined do
19
19
  it 'should get all dois by prefix' do
20
20
  response = subject.get_dois_by_prefix(prefix, options)
21
21
  dois = response.body["data"]
22
- expect(dois.length).to eq(5)
22
+ expect(dois.length).to eq(6)
23
23
  expect(dois.first).to eq("10.5072/0007-NW90")
24
24
  end
25
25
  end
26
26
 
27
27
  context "base32" do
28
28
  it 'should decode doi' do
29
- doi = "10.5438/zwsf-4y7y"
30
- expect(subject.decode_doi(doi)).to eq(34252231623)
29
+ doi = "10.5072/079j-xh42"
30
+ expect(subject.decode_doi(doi)).to eq(7654321)
31
31
  end
32
32
 
33
33
  it 'should decode doi not encoded' do
@@ -37,12 +37,12 @@ describe Cirneco::DataCenter, vcr: true, :order => :defined do
37
37
 
38
38
  it 'should encode doi' do
39
39
  number = 123
40
- expect(subject.encode_doi(prefix, number: number)).to eq("10.5072/0000-03vc")
40
+ expect(subject.encode_doi(prefix, number: number)).to eq("10.5072/0000-3v20")
41
41
  end
42
42
 
43
43
  it 'should encode doi number with other characters' do
44
- number = "MS-12-7196-7302"
45
- expect(subject.encode_doi(prefix, number: number)).to eq("10.5072/15x1-bj6r")
44
+ number = "MS-2-7196-7302"
45
+ expect(subject.encode_doi(prefix, number: number)).to eq("10.5072/83bs-2615")
46
46
  end
47
47
 
48
48
  it 'should encode doi random number' do
@@ -57,13 +57,13 @@ describe Cirneco::DataCenter, vcr: true, :order => :defined do
57
57
  it 'should encode doi with shoulder' do
58
58
  number = 7654321
59
59
  shoulder = "dryad."
60
- expect(subject.encode_doi(prefix, number: number, shoulder: shoulder)).to eq("10.5072/dryad.79jxhm")
60
+ expect(subject.encode_doi(prefix, number: number, shoulder: shoulder)).to eq("10.5072/dryad.79jxh42")
61
61
  end
62
62
 
63
63
  it 'should encode doi with empty shoulder' do
64
64
  number = 7654321
65
65
  shoulder = nil
66
- expect(subject.encode_doi(prefix, number: number, shoulder: shoulder)).to eq("10.5072/0079-jxhm")
66
+ expect(subject.encode_doi(prefix, number: number, shoulder: shoulder)).to eq("10.5072/079j-xh42")
67
67
  end
68
68
  end
69
69
 
@@ -29,19 +29,19 @@ describe Cirneco::Work, vcr: true do
29
29
  end
30
30
 
31
31
  it 'validates example full' do
32
- input = samples_path + 'datacite-example-full-v4.0.xml'
32
+ input = samples_path + 'datacite-example-full-v4.1.xml'
33
33
  subject = Cirneco::Work.new(input: input, from: "datacite")
34
34
 
35
35
  expect(subject.valid?).to be true
36
36
  expect(subject.id).to eq("https://doi.org/10.5072/example-full")
37
37
  expect(subject.type).to eq("SoftwareSourceCode")
38
38
  expect(subject.author).to eq("type"=>"Person", "id"=>"https://orcid.org/0000-0001-5000-0007", "name"=>"Miller, Elizabeth", "givenName"=>"Elizabeth", "familyName"=>"Miller")
39
- expect(subject.title).to eq([{"lang"=>"en-us", "text"=>"Full DataCite XML Example"}, {"title_type"=>"Subtitle", "lang"=>"en-us", "text"=>"Demonstration of DataCite Properties."}])
40
- expect(subject.alternate_name).to eq("type"=>"URL", "name"=>"http://schema.datacite.org/schema/meta/kernel-3.1/example/datacite-example-full-v3.1.xml")
41
- expect(subject.description["text"]).to start_with("XML example of all DataCite Metadata Schema v4.0 properties.")
39
+ expect(subject.title).to eq([{"lang"=>"en-US", "text"=>"Full DataCite XML Example"}, {"title_type"=>"Subtitle", "lang"=>"en-US", "text"=>"Demonstration of DataCite Properties."}])
40
+ expect(subject.alternate_name).to eq("type"=>"URL", "name"=>"https://schema.datacite.org/meta/kernel-4.1/example/datacite-example-full-v4.1.xml")
41
+ expect(subject.description["text"]).to start_with("XML example of all DataCite Metadata Schema v4.1 properties.")
42
42
  expect(subject.keywords).to eq([{"subject_scheme"=>"dewey", "scheme_uri"=>"http://dewey.info/", "text"=>"000 computer science"}])
43
43
  expect(subject.date_published).to eq("2014")
44
- expect(subject.date_modified).to eq("2014-10-17")
44
+ expect(subject.date_modified).to eq("2017-09-13")
45
45
  expect(subject.publisher).to eq("DataCite")
46
46
  end
47
47
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cirneco
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.17
4
+ version: 0.9.18
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martin Fenner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-19 00:00:00.000000000 Z
11
+ date: 2018-02-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: maremma
@@ -64,14 +64,14 @@ dependencies:
64
64
  requirements:
65
65
  - - "~>"
66
66
  - !ruby/object:Gem::Version
67
- version: '0.3'
67
+ version: '0.5'
68
68
  type: :runtime
69
69
  prerelease: false
70
70
  version_requirements: !ruby/object:Gem::Requirement
71
71
  requirements:
72
72
  - - "~>"
73
73
  - !ruby/object:Gem::Version
74
- version: '0.3'
74
+ version: '0.5'
75
75
  - !ruby/object:Gem::Dependency
76
76
  name: nokogiri
77
77
  requirement: !ruby/object:Gem::Requirement
@@ -322,6 +322,7 @@ files:
322
322
  - Rakefile
323
323
  - bin/cirneco
324
324
  - cirneco.gemspec
325
+ - doi.xml
325
326
  - lib/cirneco.rb
326
327
  - lib/cirneco/api.rb
327
328
  - lib/cirneco/base.rb
@@ -334,26 +335,31 @@ files:
334
335
  - lib/cirneco/version.rb
335
336
  - lib/cirneco/work.rb
336
337
  - resources/kernel-4.0/include/datacite-contributorType-v4.xsd
337
- - resources/kernel-4.0/include/datacite-dateType-v4.xsd
338
+ - resources/kernel-4.0/include/datacite-dateType-v4.1.xsd
338
339
  - resources/kernel-4.0/include/datacite-descriptionType-v4.xsd
339
340
  - resources/kernel-4.0/include/datacite-funderIdentifierType-v4.xsd
341
+ - resources/kernel-4.0/include/datacite-nameType-v4.1.xsd
340
342
  - resources/kernel-4.0/include/datacite-relatedIdentifierType-v4.xsd
341
- - resources/kernel-4.0/include/datacite-relationType-v4.xsd
342
- - resources/kernel-4.0/include/datacite-resourceType-v4.xsd
343
+ - resources/kernel-4.0/include/datacite-relationType-v4.1.xsd
344
+ - resources/kernel-4.0/include/datacite-resourceType-v4.1.xsd
343
345
  - resources/kernel-4.0/include/datacite-titleType-v4.xsd
344
346
  - resources/kernel-4.0/metadata.xsd
345
- - resources/kernel-4.0/samples/datacite-example-Box_dateCollected_DataCollector-v4.0.xml
346
- - resources/kernel-4.0/samples/datacite-example-GeoLocation-v4.0.xml
347
- - resources/kernel-4.0/samples/datacite-example-HasMetadata-v4.0.xml
348
- - resources/kernel-4.0/samples/datacite-example-ResearchGroup_Methods-v4.0.xml
349
- - resources/kernel-4.0/samples/datacite-example-ResourceTypeGeneral_Collection-v4.0.xml
350
- - resources/kernel-4.0/samples/datacite-example-complicated-v4.0.xml
351
- - resources/kernel-4.0/samples/datacite-example-dataset-v4.0.xml
352
- - resources/kernel-4.0/samples/datacite-example-full-v4.0.xml
353
- - resources/kernel-4.0/samples/datacite-example-fundingReference-v.4.0.xml
354
- - resources/kernel-4.0/samples/datacite-example-relationTypeIsIdenticalTo-v4.0.xml
355
- - resources/kernel-4.0/samples/datacite-example-video-v4.0.xml
356
- - resources/kernel-4.0/samples/datacite-example-workflow-v4.0.xml
347
+ - resources/kernel-4.0/samples/datacite-example-Box_dateCollected_DataCollector-v4.1.xml
348
+ - resources/kernel-4.0/samples/datacite-example-GeoLocation-v4.1.xml
349
+ - resources/kernel-4.0/samples/datacite-example-HasMetadata-v4.1.xml
350
+ - resources/kernel-4.0/samples/datacite-example-ResearchGroup_Methods-v4.1.xml
351
+ - resources/kernel-4.0/samples/datacite-example-ResourceTypeGeneral_Collection-v4.1.xml
352
+ - resources/kernel-4.0/samples/datacite-example-complicated-v4.1.xml
353
+ - resources/kernel-4.0/samples/datacite-example-datapaper-v4.1.xml
354
+ - resources/kernel-4.0/samples/datacite-example-dataset-v4.1.xml
355
+ - resources/kernel-4.0/samples/datacite-example-full-v4.1.xml
356
+ - resources/kernel-4.0/samples/datacite-example-fundingReference-v.4.1.xml
357
+ - resources/kernel-4.0/samples/datacite-example-polygon-advanced-v4.1.xml
358
+ - resources/kernel-4.0/samples/datacite-example-polygon-v4.1.xml
359
+ - resources/kernel-4.0/samples/datacite-example-relationTypeIsIdenticalTo-v4.1.xml
360
+ - resources/kernel-4.0/samples/datacite-example-software-v4.1.xml
361
+ - resources/kernel-4.0/samples/datacite-example-video-v4.1.xml
362
+ - resources/kernel-4.0/samples/datacite-example-workflow-v4.1.xml
357
363
  - spec/api_spec.rb
358
364
  - spec/doi_spec.rb
359
365
  - spec/fixtures/vcr_cassettes/Cirneco_Work/media/includes_media.yml