cirneco 0.4.9 → 0.5.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e11e729de3e3cc8bc220ecb315f826a1878c0b44
4
- data.tar.gz: f0433daccf86a0e8ccdf1ca2f7610beed4edcf5e
3
+ metadata.gz: ff8d6f8a690c5b342a861c193d2a92bd44e7a566
4
+ data.tar.gz: 4ecb864c32c56954b329499356bd3a42294645f6
5
5
  SHA512:
6
- metadata.gz: 59f48e99b4d44c2019fc4b1e6d7a396336a1610ec4369b21a5cdfea0635df55deb712c59b7ac2abef51d628d8510cd69a2c2d604a58a665cc938e121dfea7d37
7
- data.tar.gz: 8eee509876769cf56a17411ee84b50c035c4e24b07b656c935ac10ae28c4db5439043ea36980d0181465ce07f87a32eb65d2310f81aa074484b7ae7f17de8002
6
+ metadata.gz: 0e149987bd0d758fcf5776d31bbe1413eb0345ceb0c00d8a7ca79ff7ee768af2304470e27841ff55b9fb554d107918cbd66143de93bb709b158ba33311851817
7
+ data.tar.gz: ea4c9b11477f8685c915773ba4246ecc518f959bf0ebda548dbf4590f73aad5b363639329b6d147bc2ccee03d9b7148eb474de0c27f2f1b30ad1339051d292ac
data/.gitignore CHANGED
@@ -50,7 +50,7 @@ build-iPhoneSimulator/
50
50
  .rvmrc
51
51
 
52
52
  coverage/
53
-
53
+ spec/fixtures/doi.yml
54
54
  .env
55
55
  .env.*
56
56
  !.env.example
data/.travis.yml CHANGED
@@ -1,12 +1,21 @@
1
1
  language: ruby
2
2
  rvm:
3
3
  - 2.3.1
4
+
5
+ before_install:
6
+ - sudo add-apt-repository ppa:kalakris/cmake -y
7
+ - sudo apt-get update -q
8
+ - sudo apt-get install cmake -y
9
+
4
10
  install:
5
11
  - travis_retry bundle install
12
+
6
13
  script: bundle exec rspec
14
+
7
15
  notifications:
8
16
  slack: "$SLACK_TOKEN"
9
17
  email: false
18
+
10
19
  deploy:
11
20
  skip_cleanup: true
12
21
  provider: rubygems
data/Gemfile.lock CHANGED
@@ -1,15 +1,14 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- cirneco (0.4.9)
4
+ cirneco (0.5.3)
5
5
  activesupport (~> 4.2, >= 4.2.5)
6
6
  base32-crockford-checksum (~> 0.2.2)
7
+ bergamasco (~> 0.1.1)
7
8
  builder (~> 3.2, >= 3.2.2)
8
9
  dotenv (~> 2.1, >= 2.1.1)
9
10
  maremma (~> 3.1)
10
- namae (~> 0.10.1)
11
11
  nokogiri (~> 1.6, >= 1.6.8)
12
- sanitize (~> 4.0, >= 4.0.1)
13
12
  thor (~> 0.19)
14
13
 
15
14
  GEM
@@ -24,12 +23,23 @@ GEM
24
23
  addressable (2.5.0)
25
24
  public_suffix (~> 2.0, >= 2.0.2)
26
25
  base32-crockford-checksum (0.2.2)
26
+ bergamasco (0.1.4)
27
+ activesupport (~> 4.2, >= 4.2.5)
28
+ builder (~> 3.2, >= 3.2.2)
29
+ commonmarker (~> 0.14.0)
30
+ loofah (~> 2.0, >= 2.0.3)
31
+ multi_json (~> 1.11.2)
32
+ nokogiri (~> 1.6.7)
33
+ oj (~> 2.13.1)
34
+ pandoc-ruby (~> 2.0, >= 2.0.0)
35
+ safe_yaml (~> 1.0, >= 1.0.4)
27
36
  builder (3.2.2)
28
37
  codeclimate-test-reporter (1.0.3)
29
38
  simplecov
39
+ commonmarker (0.14.0)
40
+ ruby-enum (~> 0.4)
30
41
  crack (0.4.3)
31
42
  safe_yaml (~> 1.0.0)
32
- crass (1.0.2)
33
43
  diff-lcs (1.2.5)
34
44
  docile (1.1.5)
35
45
  dotenv (2.1.1)
@@ -43,6 +53,8 @@ GEM
43
53
  hashdiff (0.3.1)
44
54
  i18n (0.7.0)
45
55
  json (1.8.3)
56
+ loofah (2.0.3)
57
+ nokogiri (>= 1.5.9)
46
58
  maremma (3.1)
47
59
  activesupport (~> 4.2, >= 4.2.5)
48
60
  builder (~> 3.2, >= 3.2.2)
@@ -57,12 +69,10 @@ GEM
57
69
  minitest (5.10.1)
58
70
  multi_json (1.11.3)
59
71
  multipart-post (2.0.0)
60
- namae (0.10.2)
61
72
  nokogiri (1.6.8.1)
62
73
  mini_portile2 (~> 2.1.0)
63
- nokogumbo (1.4.10)
64
- nokogiri
65
74
  oj (2.13.1)
75
+ pandoc-ruby (2.0.1)
66
76
  public_suffix (2.0.4)
67
77
  rack (2.0.1)
68
78
  rack-test (0.6.3)
@@ -84,11 +94,9 @@ GEM
84
94
  rspec-xsd (0.1.0)
85
95
  nokogiri (~> 1.6)
86
96
  rspec (~> 3)
97
+ ruby-enum (0.6.0)
98
+ i18n
87
99
  safe_yaml (1.0.4)
88
- sanitize (4.4.0)
89
- crass (~> 1.0.2)
90
- nokogiri (>= 1.4.4)
91
- nokogumbo (~> 1.4.1)
92
100
  simplecov (0.12.0)
93
101
  docile (~> 1.1.0)
94
102
  json (>= 1.8, < 3)
data/README.md CHANGED
@@ -5,7 +5,7 @@
5
5
  [![Code Climate](https://codeclimate.com/github/datacite/cirneco/badges/gpa.svg)](https://codeclimate.com/github/datacite/cirneco)
6
6
  [![Test Coverage](https://codeclimate.com/github/datacite/cirneco/badges/coverage.svg)](https://codeclimate.com/github/datacite/cirneco/coverage)
7
7
 
8
- Cirneco is a command-line client for the DataCite Metadata Store (MDS), written as Ruby gem. Uses the MDS API, and includes several utlity functions.
8
+ Cirneco is a command-line client for the [DataCite Metadata Store](https://mds.datacite.org) (MDS), written as Ruby gem. Uses the MDS API, and includes several utlity functions.
9
9
 
10
10
  ## Features
11
11
 
@@ -13,7 +13,7 @@ The following functionality is supported:
13
13
 
14
14
  * the MDS API (DOI, Metadata and Media APIs) is fully supported
15
15
  * generates valid metadata, using Schema 4.0 (currently only partial support of available metadata fields)
16
- * generates a DOI name to be used for registration, using a random number that is [Base32 Crockford encoded](https://github.com/levinalex/base32) and includes a checksum
16
+ * generates a DOI name to be used for registration, using a random number that is [Base32 Crockford encoded](http://www.crockford.com/wrmg/base32.html) and includes a checksum
17
17
 
18
18
  ## Requirements
19
19
 
@@ -37,7 +37,7 @@ gem install cirneco
37
37
 
38
38
  ## Configuration
39
39
 
40
- Configure ENV variables `MDS_USERNAME`, `MDS_PASSWORD` and `PREFIX`, e.g. by storing them in file `.env` in same folder (see `.env.xample`).
40
+ Configure ENV variables `MDS_USERNAME`, `MDS_PASSWORD` and `PREFIX`, e.g. by storing them in file `.env` (see `.env.xample`) in same directory you run the command.
41
41
 
42
42
  ## Commands
43
43
 
@@ -88,6 +88,23 @@ Post media information from file `1234.txt` in same directory
88
88
  cirneco media post 1234.xml
89
89
  ```
90
90
 
91
- ## License
91
+ ## Development
92
+
93
+ We use rspec for unit testing:
94
+
95
+ ```
96
+ bundle exec rspec
97
+ ```
92
98
 
93
- [MIT](license.md)
99
+ Follow along via [Github Issues](https://github.com/datacite/cirneco/issues).
100
+
101
+ ### Note on Patches/Pull Requests
102
+
103
+ * Fork the project
104
+ * Write tests for your new feature or a test that reproduces a bug
105
+ * Implement your feature or make a bug fix
106
+ * Do not mess with Rakefile, version or history
107
+ * Commit, push and make a pull request. Bonus points for topical branches.
108
+
109
+ ## License
110
+ **cirneco** is released under the [MIT License](https://github.com/datacite/cirneco/blob/master/LICENSE.md).
data/cirneco.gemspec CHANGED
@@ -16,12 +16,11 @@ Gem::Specification.new do |s|
16
16
 
17
17
  # Declary dependencies here, rather than in the Gemfile
18
18
  s.add_dependency 'maremma', '~> 3.1'
19
+ s.add_dependency 'bergamasco', '~> 0.1.1'
19
20
  s.add_dependency 'base32-crockford-checksum', '~> 0.2.2'
20
21
  s.add_dependency 'nokogiri', '~> 1.6', '>= 1.6.8'
21
22
  s.add_dependency 'builder', '~> 3.2', '>= 3.2.2'
22
- s.add_dependency 'namae', '~> 0.10.1'
23
23
  s.add_dependency 'activesupport', '~> 4.2', '>= 4.2.5'
24
- s.add_dependency 'sanitize', '~> 4.0', '>= 4.0.1'
25
24
  s.add_dependency 'dotenv', '~> 2.1', '>= 2.1.1'
26
25
  s.add_dependency 'thor', '~> 0.19'
27
26
  s.add_development_dependency 'bundler', '~> 1.0'
@@ -1,6 +1,5 @@
1
1
  require 'active_support/all'
2
2
  require 'nokogiri'
3
- require 'sanitize'
4
3
 
5
4
  require_relative 'api'
6
5
  require_relative 'utils'
data/lib/cirneco/doi.rb CHANGED
@@ -13,7 +13,7 @@ module Cirneco
13
13
  desc "get DOI", "get handle url for DOI"
14
14
  method_option :username, :default => ENV['MDS_USERNAME']
15
15
  method_option :password, :default => ENV['MDS_PASSWORD']
16
- method_option :sandbox, :type => :boolean
16
+ method_option :sandbox, :default => ENV['SANDBOX']
17
17
  def get(doi)
18
18
  if doi == "all"
19
19
  response = get_dois(options)
@@ -47,5 +47,20 @@ module Cirneco
47
47
  puts "Checksum for #{doi} is not valid"
48
48
  end
49
49
  end
50
+
51
+ desc "register DOCUMENTS", "register documents"
52
+ method_option :username, :default => ENV['MDS_USERNAME']
53
+ method_option :password, :default => ENV['MDS_PASSWORD']
54
+ method_option :sandbox, :default => ENV['SANDBOX']
55
+ def register(filepath)
56
+
57
+ if File.directory?(filepath)
58
+ response = register_all_files(filepath, options)
59
+ else
60
+ response = register_file(filepath, options)
61
+ end
62
+
63
+ puts response
64
+ end
50
65
  end
51
66
  end
data/lib/cirneco/media.rb CHANGED
@@ -14,7 +14,7 @@ module Cirneco
14
14
  desc "get DOI", "get media for DOI"
15
15
  method_option :username, :default => ENV['MDS_USERNAME']
16
16
  method_option :password, :default => ENV['MDS_PASSWORD']
17
- method_option :sandbox, :type => :boolean
17
+ method_option :sandbox, :default => ENV['SANDBOX']
18
18
  def get(doi)
19
19
  response = get_media(doi, options.merge(raw: true))
20
20
 
@@ -31,7 +31,7 @@ module Cirneco
31
31
  desc "post DOI", "post media for DOI"
32
32
  method_option :username, :default => ENV['MDS_USERNAME']
33
33
  method_option :password, :default => ENV['MDS_PASSWORD']
34
- method_option :sandbox, :type => :boolean
34
+ method_option :sandbox, :default => ENV['SANDBOX']
35
35
  method_option :file, :aliases => '-f'
36
36
  def post(doi)
37
37
  filename = options[:file] || doi.split("/", 2).last + ".txt"
@@ -14,7 +14,7 @@ module Cirneco
14
14
  desc "get DOI", "get metadata for DOI"
15
15
  method_option :username, :default => ENV['MDS_USERNAME']
16
16
  method_option :password, :default => ENV['MDS_PASSWORD']
17
- method_option :sandbox, :type => :boolean
17
+ method_option :sandbox, :default => ENV['SANDBOX']
18
18
  def get(doi)
19
19
  response = get_metadata(doi, options)
20
20
 
@@ -31,7 +31,7 @@ module Cirneco
31
31
  desc "post DOI", "post metadata for DOI"
32
32
  method_option :username, :default => ENV['MDS_USERNAME']
33
33
  method_option :password, :default => ENV['MDS_PASSWORD']
34
- method_option :sandbox, :type => :boolean
34
+ method_option :sandbox, :default => ENV['SANDBOX']
35
35
  def post(file)
36
36
  data = IO.read(file)
37
37
  response = post_metadata(data, options)
@@ -46,7 +46,7 @@ module Cirneco
46
46
  desc "delete DOI", "hide metadata for DOI"
47
47
  method_option :username, :default => ENV['MDS_USERNAME']
48
48
  method_option :password, :default => ENV['MDS_PASSWORD']
49
- method_option :sandbox, :type => :boolean
49
+ method_option :sandbox, :default => ENV['SANDBOX']
50
50
  def delete(doi)
51
51
  response = delete_metadata(doi, options)
52
52
 
data/lib/cirneco/utils.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  require 'base32/crockford'
2
2
  require 'securerandom'
3
+ require 'bergamasco'
4
+ require 'time'
3
5
 
4
6
  module Cirneco
5
7
  module Utils
@@ -24,5 +26,67 @@ module Cirneco
24
26
  number = options[:number] || SecureRandom.random_number(UPPER_LIMIT)
25
27
  prefix.to_s + "/" + Base32::Crockford.encode(number, split: 4, length: 8, checksum: true)
26
28
  end
29
+
30
+ # currently only supports markdown files with YAML header
31
+ def register_file(filepath, options={})
32
+ filename = File.basename(filepath)
33
+ return "File #{filename} ignored: not a markdown file" unless File.extname(filepath) == ".md"
34
+
35
+ file = IO.read(filepath)
36
+ if options[:unregister]
37
+ doi = nil
38
+ else
39
+ prefix = options[:prefix] || ENV['PREFIX']
40
+ doi = encode_doi(prefix, options)
41
+ end
42
+
43
+ updated_file = Bergamasco::Markdown.update_file(file, { "doi" => doi })
44
+
45
+ if updated_file != file
46
+ IO.write(filepath, updated_file)
47
+
48
+ datapath = options[:datapath] || ENV['DATAPATH'] || "data/doi.yml"
49
+ data = Bergamasco::Markdown.read_yaml(datapath) || []
50
+ data = [data] if data.is_a?(Hash)
51
+ new_data = [{ "filename" => filename, "doi" => doi, "date" => Time.now.utc.iso8601 }]
52
+ Bergamasco::Markdown.write_yaml(datapath, data + new_data)
53
+ end
54
+
55
+ if doi.nil?
56
+ "DOI removed from #{filename}"
57
+ elsif updated_file != file
58
+ "DOI #{doi} added to #{filename}"
59
+ else
60
+ "DOI #{doi} found in #{filename}"
61
+ end
62
+ end
63
+
64
+ def register_all_files(folderpath, options={})
65
+ Dir.glob("#{folderpath}/*.md").map do |filepath|
66
+ register_file(filepath, options)
67
+ end.join("\n")
68
+ end
69
+
70
+ def create_work_from_yaml(metadata:, **options)
71
+ return "Error" unless ["doi", "author", "title", "date", "summary"].all? { |k| metadata.key? k }
72
+
73
+ creators = Array(metadata["author"])
74
+
75
+ publisher = options[:publisher] || ENV['SITE_TITLE']
76
+ publication_year = metadata["date"][0..3].to_i
77
+
78
+ resource_type = metadata["type"] || options[:type] || ENV['SITE_DEFAULT_TYPE'] || "BlogPosting"
79
+ resource_type_general = resource_type == "Dataset" ? "Dataset" : "Text"
80
+
81
+ license_name = options[:license_name] || ENV['SITE_LICENCE_NAME'] || "Creative Commons Attribution"
82
+ license_url = options[:license_url] # || ENV['SITE_LICENCE_URL'] || "https://creativecommons.org/licenses/by/4.0/"
83
+
84
+ descriptions = [{ value: metadata["summary"], description_type: "Abstract" }]
85
+
86
+ contributor = options[:hosting_institution] || ENV['SITE_HOSTING_INSTITUTION']
87
+ contributors = [{ literal: contrbutor }]
88
+
89
+ Cirneco::Work.new(doi: metadata["doi"], creators: creators, title: metadata["title"], publisher: publisher, publication_year: publication_year, resource_type: { value: resource_type, resource_type_general: resource_type_general }, rights: [{ value: license_name, rights_uri: license_url }], subjects: Array(metadata["tags"], descriptions: descriptions, contributors: contributors) )
90
+ end
27
91
  end
28
92
  end
@@ -1,3 +1,3 @@
1
1
  module Cirneco
2
- VERSION = "0.4.9"
2
+ VERSION = "0.5.3"
3
3
  end
data/lib/cirneco/work.rb CHANGED
@@ -1,6 +1,5 @@
1
1
  require 'active_support/all'
2
2
  require 'nokogiri'
3
- require 'sanitize'
4
3
 
5
4
  require_relative 'api'
6
5
  require_relative 'utils'
@@ -36,11 +35,6 @@ module Cirneco
36
35
 
37
36
  SCHEMA = File.expand_path("../../../resources/kernel-4.0/metadata.xsd", __FILE__)
38
37
 
39
-
40
- def sanitize(string)
41
- Sanitize.fragment(string).squish
42
- end
43
-
44
38
  def has_required_elements?
45
39
  doi && creators && title && publisher && publication_year && resource_type
46
40
  end
@@ -0,0 +1,97 @@
1
+ ---
2
+ layout: post
3
+ title: Cool DOI's
4
+ author: mfenner
5
+ date: 2016-12-15
6
+ tags:
7
+ - doi
8
+ - featured
9
+ image: https://blog.datacite.org/images/2016/12/cool-dois.png
10
+ ---
11
+ In 1998 Tim Berners-Lee coined the term cool URIs [-@https://www.w3.org/Provider/Style/URI], that is URIs that don’t change. We know that URLs referenced in the scholarly literature are often not cool, leading to link rot [@https://doi.org/10.1371/journal.pone.0115253] and making it hard or impossible to find the referenced resource.READMORE
12
+
13
+ Cool URIs are, of course, a fundamental principle behind DOIs, with the two important concepts [*resolution*](https://www.doi.org/doi_handbook/3_Resolution.html) (it is very hard to maintain a URL directly pointing at a resource) and [*policies*](https://www.doi.org/doi_handbook/6_Policies.html) (that all DOI registration agencies and organizations minting DOIs agree to maintain the redirection). The third essential element for DOIs, their [*data model*](https://www.doi.org/doi_handbook/4_Data_Model.html), is not directly about persistent linking, but about the discoverability of the linked resources via standard metadata in a central index.
14
+
15
+ All DOIs, expressed as HTTP URI, are therefore cool URIs. So what is a cool DOI? And, furthermore, how to create and use them? To understand what a cool DOI is, we have to explain the three parts that make up a DOI:
16
+
17
+ ![](/images/2016/12/doi-parts.png)
18
+
19
+ ### Proxy
20
+
21
+ The proxy is not part of the DOI specification, but almost all scholarly DOIs that users encounter today will be expressed as HTTP URLs. DataCite recommends that all DOIs are displayed as permanent URLs, consistent with the recommendations of other DOI registration agencies, e.g. the [Crossref DOI display guidelines](http://www.crossref.org/02publishers/doi_display_guidelines.html). When the DOI system was originally designed, it was thought that the DOI protocol would become widely used, but that clearly has not happened and displaying DOIs as **doi:10.5281/ZENODO.31780** is therefore not recommended.
22
+
23
+ The DOI proxy enables the functionality of expressing DOIs as HTTP URIs. Users should also be aware of two these two recommendations:
24
+
25
+ * Use [doi.org](https://www.doi.org/doi_proxy/proxy_policies.html) instead of dx.doi.org as DNS name
26
+ * Use the HTTPS protocol instead of HTTP protocol
27
+
28
+ Ed Pentz from Crossref makes the case for HTTPS in a [September blog post](http://blog.crossref.org/2016/09/new-crossref-doi-display-guidelines.html). The web, and therefore also the scholarly web, is moving to HTTPS as the default. It is important that the DOI proxy redirects to HTTPS URLs, and it will take some time until all DataCite data centers use HTTPS for the landing pages their DOIs redirects to.
29
+
30
+ What many users don’t know is that doi.org is not the only proxy server for DOIs. DOIs use the handle system and any handle server will resolve a DOI, just as doi.org will resolve any handle. This means that [https://hdl.handle.net/10.5281/ZENODO.31780](https://hdl.handle.net/10.5281/ZENODO.31780) will resolve to the landing page for that DOI and that [http://doi.org/10273/BGRB5054RX05201](http://doi.org/10273/BGRB5054RX05201) is a handle (for a [IGSN](http://www.igsn.org/)) and not a DOI.
31
+
32
+ ### Prefix
33
+
34
+ The DOI prefix is used as a namespace so that DOIs are globally unique without requiring global coordination for every new identifier. Prefixes in the handle system and therefore for DOIs are numbers without any semantic meaning. One lesson learned with persistent identifiers is that adding meaning to the identifier (e.g. by using a prefix with the name of the data repository) is always dangerous, because – despite best intentions – all names can change over time.
35
+
36
+ Since the DOI prefix is a namespace to keep DOIs globally unique, there is usually no need for multiple prefixes for one organization managing DOI assignment. The tricky part is that these responsibilities can change, e.g. when an organization manages multiple repositories and one of them is migrated to another organization. It therefore makes sense to assign one prefix per list of resources that always stays together, e.g. one repository. It is possible that one prefix is managed by multiple organizations (as long as they use the same DOI registration agency), but that makes DOI management more complex.
37
+
38
+ ### Suffix
39
+
40
+ The suffix for a DOI can be (almost) any string. Which is both a feature and a curse. It is a feature because it gives maximal flexibility, for example when migrating existing identifiers to the DOI system. And it is a curse because it not always works well in the web context, as the list of characters allowed in a URL is limited. A good example of this are SICIs ([Serial Item and Contribution Identifier](https://en.wikipedia.org/wiki/Serial_Item_and_Contribution_Identifier)), they were defined in 1996 before the DOI system was implemented, and could then be migrated to DOIs. Unfortunately they can contain many characters that are problematic in a URL or make it difficult to validate the DOI, as in [https://doi.org/10.1002/(sici)1099-1409(199908/10)3:6/7<672::aid-jpp192>3.0.co;2-8](https://doi.org/10.1002/(sici)1099-1409(199908/10)3:6/7<672::aid-jpp192>3.0.co;2-8). A Crossref [blog post](http://blog.crossref.org/2015/08/doi-regular-expressions.html) by Andrew Gilmartin gives a good overview about the characters found in DOIs and suggests the following regular expression to check for valid DOIs:
41
+
42
+ ```
43
+ /^10.\d{4,9}/[-._;()/:A-Z0-9]+$/i
44
+ ```
45
+
46
+ SICIs demonstrate two other pitfalls:
47
+
48
+ * they contain semantic information (ISSN, volume, number, etc.) that may change over time, and
49
+ * they are long, difficult to transcribe, with characters not allowed in URLs, and not very human-readable.
50
+
51
+ Semantic information might also lead users to expect certain functionalities. A common pattern that we see at DataCite is to include information about the version or parent in the suffix, e.g. [https://doi.org/10.6084/M9.FIGSHARE.3501629.V1](https://doi.org/10.6084/M9.FIGSHARE.3501629.V1) or [https://doi.org/10.5061/DRYAD.0SN63/7](https://doi.org/10.5061/DRYAD.0SN63/7). While the decision on what to put into the suffix is up to each data center, we should make sure users don't think that these are functionalities of the DOI system (e.g. that adding **.V2** to any DOI name will resolve to version 2 of that resource).
52
+
53
+ Another issue to keep in mind when assigning suffixes is that DOIs – in contrast to HTTP URIs – are case-insensitive, [https://doi.org/10.5281/ZENODO.31780](https://doi.org/10.5281/ZENODO.31780) and [https://doi.org/10.5281/zenodo.31780](https://doi.org/10.5281/zenodo.31780) are the same DOI. All DOIs are [converted to upper case](https://www.doi.org/doi_handbook/2_Numbering.html#2.4) upon registration and DOI resolution, but DOIs are not consistently displayed in such a way.
54
+
55
+ ### Generating cool DOIs
56
+
57
+ With all that, what should the ideal DOI look like? Its suffix should be:
58
+
59
+ * opaque without semantic information
60
+ * work well in a web environment, avoiding characters problematic in URLs
61
+ * short and human-readable
62
+ * Resistant to transcription errors
63
+ * easy to generate
64
+
65
+ On Tuesday DataCite released a tool that helps generating such a suffix, an open source command line tool called [cirneco](https://github.com/datacite/cirneco) (a lot of our open source software uses Italian dog breed names). Cirneco is a Ruby gem that can be installed via
66
+
67
+ ```
68
+ gem install cirneco
69
+ ```
70
+
71
+ Cirneco uses base32 encoding, as [described](http://www.crockford.com/wrmg/base32.html) by Douglas Crockford. The encoding starts with a randomly generated number to guarantee uniqueness of the identifier, and then encodes the number into a string that uses all numbers and uppercase letters. It avoids the letters I, O and L as they can be confused with the letter 1 and 0, using 32 characters (and 5 checksum characters) in total. The last character is a checksum. The resulting string from cirneco always has a length of 8 characters, in groups of 4 separated by a hyphen to help with readability. The advantage of base32 encoding over using only numbers (as for example ORCID is doing) is that the resulting string becomes much more compact, the available 7 characters (plus one for the checksum) can encode 34,359,738,367 strings, compared to 10 million when only using numbers. This number is large enough that the resulting suffix will not only be unique for a given prefix, but also unique for all DOIs (there is a very small chance to get the same random number twice, but this will be rejected when trying to register the DOI).
72
+
73
+ Another common way to generate random strings would have been universally unique identifiers ([UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier)), but they are long and not very human-readable, e.g. [https://doi.org/10.4233/UUID:6D192FE2-DE18-4556-873A-D3CD56AB96A6](https://doi.org/10.4233/UUID:6D192FE2-DE18-4556-873A-D3CD56AB96A6).
74
+
75
+ An example DOI generated by cirneco would be
76
+
77
+ ```
78
+ cirneco doi generate --prefix 10.5555
79
+ 10.5555/KVTD-VPWM
80
+ ```
81
+
82
+ The generated DOI is short enough that it should work well in places where space is limited, providing an alternative to the [ShortDOI](http://shortdoi.org/) service which shortens existing DOIs, but does this by adding another layer on top of the DOI proxy.
83
+
84
+ Another cirneco command checks that this is a valid bas32 string using the checksum
85
+
86
+ ```
87
+ cirneco doi check 10.5555/KVTD-VPWM
88
+ Checksum for 10.5555/KVTD-VPWM is valid
89
+ ```
90
+
91
+ This can be used to quickly verify a DOI, e.g. in a web form or API. The Ruby base32 encoding library used by cirneco is open source ([https://github.com/datacite/base32](https://github.com/datacite/base32). I added the checksum to the existing library), and implementations of the Crockford base32 encoding pattern are available in many other languages, including [Python](https://github.com/jbittel/base32-crockford), [PHP](https://github.com/dflydev/dflydev-base32-crockford), [Javascript](https://www.npmjs.com/package/base32-crockford), [Java](http://stackoverflow.com/questions/22385467/crockford-base32-encoding-for-large-number-java-implementation), [Go](https://github.com/richardlehane/crock32) and [.NET](https://crockfordbase32.codeplex.com/).
92
+
93
+ To answer the question raised at the beginning: a cool DOI is a DOI expressed as HTTPS URI using the doi.org proxy and using a base32-encoded suffix, for example **https://doi.org/10.5555/KVTD-VPWM**. This DOI works well in a web environment, is human readable, easy to parse and detect (e.g. in text mining), and can be generated using an algorithm that is well understood and supported.
94
+
95
+ ![](/images/2016/12/cool-dois.png)
96
+
97
+ ### References
@@ -0,0 +1,9 @@
1
+ ---
2
+ layout: post
3
+ title: Cool DOI's
4
+ author: mfenner
5
+ date: 2016-12-15
6
+ tags:
7
+ - doi
8
+ - featured
9
+ image: https://blog.datacite.org/images/2016/12/cool-dois.png
data/spec/spec_helper.rb CHANGED
@@ -33,3 +33,7 @@ VCR.configure do |c|
33
33
  c.filter_sensitive_data("<MDS_TOKEN>") { mds_token }
34
34
  c.configure_rspec_metadata!
35
35
  end
36
+
37
+ def fixture_path
38
+ File.expand_path("../fixtures", __FILE__) + '/'
39
+ end
data/spec/utils_spec.rb CHANGED
@@ -39,4 +39,31 @@ describe Cirneco::DataCenter, vcr: true, :order => :defined do
39
39
  expect(subject.encode_doi(prefix)).to start_with("10.23725")
40
40
  end
41
41
  end
42
+
43
+ describe "register" do
44
+ it 'should register_file' do
45
+ filepath = fixture_path + 'cool-dois.html.md'
46
+ number = 123
47
+ response = subject.register_file(filepath, number: number)
48
+ expect(response).to eq("DOI 10.23725/0000-03VC added to cool-dois.html.md")
49
+ end
50
+
51
+ it 'should register_file unregister' do
52
+ filepath = fixture_path + 'cool-dois.html.md'
53
+ response = subject.register_file(filepath, unregister: true)
54
+ expect(response).to eq("DOI removed from cool-dois.html.md")
55
+ end
56
+
57
+ it 'should register_all_files unregister' do
58
+ number = 123
59
+ response = subject.register_all_files(fixture_path, number: number, unregister: true)
60
+ expect(response).to eq("DOI removed from cool-dois.html.md")
61
+ end
62
+
63
+ it 'should ignore non-markdown file for register_file' do
64
+ filepath = fixture_path + 'cool-dois.yml'
65
+ response = subject.register_file(filepath)
66
+ expect(response).to eq("File cool-dois.yml ignored: not a markdown file")
67
+ end
68
+ end
42
69
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cirneco
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.9
4
+ version: 0.5.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martin Fenner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-12-13 00:00:00.000000000 Z
11
+ date: 2016-12-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: maremma
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '3.1'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bergamasco
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.1.1
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.1.1
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: base32-crockford-checksum
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -78,20 +92,6 @@ dependencies:
78
92
  - - ">="
79
93
  - !ruby/object:Gem::Version
80
94
  version: 3.2.2
81
- - !ruby/object:Gem::Dependency
82
- name: namae
83
- requirement: !ruby/object:Gem::Requirement
84
- requirements:
85
- - - "~>"
86
- - !ruby/object:Gem::Version
87
- version: 0.10.1
88
- type: :runtime
89
- prerelease: false
90
- version_requirements: !ruby/object:Gem::Requirement
91
- requirements:
92
- - - "~>"
93
- - !ruby/object:Gem::Version
94
- version: 0.10.1
95
95
  - !ruby/object:Gem::Dependency
96
96
  name: activesupport
97
97
  requirement: !ruby/object:Gem::Requirement
@@ -112,26 +112,6 @@ dependencies:
112
112
  - - ">="
113
113
  - !ruby/object:Gem::Version
114
114
  version: 4.2.5
115
- - !ruby/object:Gem::Dependency
116
- name: sanitize
117
- requirement: !ruby/object:Gem::Requirement
118
- requirements:
119
- - - "~>"
120
- - !ruby/object:Gem::Version
121
- version: '4.0'
122
- - - ">="
123
- - !ruby/object:Gem::Version
124
- version: 4.0.1
125
- type: :runtime
126
- prerelease: false
127
- version_requirements: !ruby/object:Gem::Requirement
128
- requirements:
129
- - - "~>"
130
- - !ruby/object:Gem::Version
131
- version: '4.0'
132
- - - ">="
133
- - !ruby/object:Gem::Version
134
- version: 4.0.1
135
115
  - !ruby/object:Gem::Dependency
136
116
  name: dotenv
137
117
  requirement: !ruby/object:Gem::Requirement
@@ -362,6 +342,8 @@ files:
362
342
  - resources/kernel-4.0/samples/datacite-example-video-v4.0.xml
363
343
  - resources/kernel-4.0/samples/datacite-example-workflow-v4.0.xml
364
344
  - spec/api_spec.rb
345
+ - spec/fixtures/cool-dois.html.md
346
+ - spec/fixtures/cool-dois.yml
365
347
  - spec/fixtures/vcr_cassettes/Cirneco_DataCenter/get/should_get_all_dois_by_prefix.yml
366
348
  - spec/fixtures/vcr_cassettes/Cirneco_DataCenter/get/should_get_next_doi.yml
367
349
  - spec/fixtures/vcr_cassettes/Cirneco_DataCenter/get/should_get_number_of_latest_doi.yml
@@ -396,7 +378,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
396
378
  version: '0'
397
379
  requirements: []
398
380
  rubyforge_project:
399
- rubygems_version: 2.4.5
381
+ rubygems_version: 2.6.8
400
382
  signing_key:
401
383
  specification_version: 4
402
384
  summary: Ruby client library for the DataCite MDS