bergamasco 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1397e1d71f822e2ba31fb055cae298505fe60d92
4
+ data.tar.gz: 1928b970eed69989858bf680b6290151ec42539d
5
+ SHA512:
6
+ metadata.gz: 2604ab9dcd36d6e2670ec41392fe27aa308252a93ddaaad7faed930591dd5be15c4e52dd6a156f826a78846f6aed24a93dcdb56b301ab94a227cdb01e4396b91
7
+ data.tar.gz: 677df10181be5b8a3b47fc4a78c2d5f768640c7339b7d52f6696d692b51bc37fefecf5c81c6b8ce9f0142f3c413d8c3702bdee12014a6620e29cddd367459d04
data/.gitignore ADDED
@@ -0,0 +1,50 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /spec/examples.txt
9
+ /test/tmp/
10
+ /test/version_tmp/
11
+ /tmp/
12
+
13
+ # Used by dotenv library to load environment variables.
14
+ # .env
15
+
16
+ ## Specific to RubyMotion:
17
+ .dat*
18
+ .repl_history
19
+ build/
20
+ *.bridgesupport
21
+ build-iPhoneOS/
22
+ build-iPhoneSimulator/
23
+
24
+ ## Specific to RubyMotion (use of CocoaPods):
25
+ #
26
+ # We recommend against adding the Pods directory to your .gitignore. However
27
+ # you should judge for yourself, the pros and cons are mentioned at:
28
+ # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
29
+ #
30
+ # vendor/Pods/
31
+
32
+ ## Documentation cache and generated files:
33
+ /.yardoc/
34
+ /_yardoc/
35
+ /doc/
36
+ /rdoc/
37
+
38
+ ## Environment normalization:
39
+ /.bundle/
40
+ /vendor/bundle
41
+ /lib/bundler/man/
42
+
43
+ # for a library or gem, you might want to ignore these files since the code is
44
+ # intended to run in multiple environments; otherwise, check them in:
45
+ # Gemfile.lock
46
+ # .ruby-version
47
+ # .ruby-gemset
48
+
49
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
50
+ .rvmrc
data/.travis.yml ADDED
@@ -0,0 +1,26 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.3.1
4
+
5
+ before_install:
6
+ - sudo add-apt-repository ppa:kalakris/cmake -y
7
+ - sudo apt-get update -q
8
+ - sudo apt-get install cmake -y
9
+
10
+ install:
11
+ - travis_retry bundle install
12
+
13
+ script: bundle exec rspec
14
+
15
+ notifications:
16
+ slack: "$SLACK_TOKEN"
17
+ email: false
18
+
19
+ deploy:
20
+ provider: rubygems
21
+ api_key:
22
+ secure: fwaYVUhCt3fOJvZj37CsZ+X5DcHwVZjj6x1I3Qs9bNrEC3o9OhPj/W4vC0LslmwXUnKgSwnwK/8vlSjy7H+N5yUbXPowA7N9qOq0FY4nU/8l8gor9SbP6Q9AXH4FmRHYyT+eMYj617boBNQkaf6bMdecxT3XSvqQEVxbB66vd3mltJggBeL7DAkjUJwAoBApFiU7lVyyEwtUTlUNGFWSKNrTGeabqMN1u+C3AjZNNAZXW4dZWxlBpwySAubToSuOzBbVjNJ1leGrKj9UB1DHzry7Qp0yWqKzdrSYdPwuQDygRkM0WmBRfQRZejphqE269cwdYXFV3DyUULuLsiEx0STNzMXxpLD/SyVNs0j+2e0aI3259hFgFJSDHh/iMFtQOAO9hSb+s8eqJTpKMu2dDRJCBA+oBK4Tr0a2ojGlMSQ/3tHwKLjan3dfqoDMUGkSnUTPTrW16n2ZzV1lmmHlOe03hykKNm+HOqu6LuCtGcP/xRxhuvsIe9qNopO6hfJwonFClQnIKXH/n1v0xkGElbxYRlNPhqyrNknFMFKW5puqSQfbwcb5iZPErtMsNo5DuGhz0R0oRVTYWCL8TeX2bnRdO+CQqW0Qam/l1bZkemGnWTHuC4wouBQWeoUcCt5dxLyza4n9qpgdJUYcse/AH1cROhMTybEmFqIr2MOD450=
23
+ gem: bergamasco
24
+ on:
25
+ tags: true
26
+ repo: datacite/bergamasco
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,80 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ bergamasco (0.1.1)
5
+ activesupport (~> 4.2, >= 4.2.5)
6
+ builder (~> 3.2, >= 3.2.2)
7
+ commonmarker (~> 0.14.0)
8
+ loofah (~> 2.0, >= 2.0.3)
9
+ multi_json (~> 1.11.2)
10
+ nokogiri (~> 1.6.7)
11
+ oj (~> 2.13.1)
12
+ pandoc-ruby (~> 2.0, >= 2.0.0)
13
+ safe_yaml (~> 1.0, >= 1.0.4)
14
+
15
+ GEM
16
+ remote: https://rubygems.org/
17
+ specs:
18
+ activesupport (4.2.7.1)
19
+ i18n (~> 0.7)
20
+ json (~> 1.7, >= 1.7.7)
21
+ minitest (~> 5.1)
22
+ thread_safe (~> 0.3, >= 0.3.4)
23
+ tzinfo (~> 1.1)
24
+ builder (3.2.2)
25
+ codeclimate-test-reporter (1.0.3)
26
+ simplecov
27
+ commonmarker (0.14.0)
28
+ ruby-enum (~> 0.4)
29
+ diff-lcs (1.2.5)
30
+ docile (1.1.5)
31
+ i18n (0.7.0)
32
+ json (1.8.3)
33
+ loofah (2.0.3)
34
+ nokogiri (>= 1.5.9)
35
+ mini_portile2 (2.1.0)
36
+ minitest (5.10.1)
37
+ multi_json (1.11.3)
38
+ nokogiri (1.6.8.1)
39
+ mini_portile2 (~> 2.1.0)
40
+ oj (2.13.1)
41
+ pandoc-ruby (2.0.1)
42
+ rake (12.0.0)
43
+ rspec (3.5.0)
44
+ rspec-core (~> 3.5.0)
45
+ rspec-expectations (~> 3.5.0)
46
+ rspec-mocks (~> 3.5.0)
47
+ rspec-core (3.5.4)
48
+ rspec-support (~> 3.5.0)
49
+ rspec-expectations (3.5.0)
50
+ diff-lcs (>= 1.2.0, < 2.0)
51
+ rspec-support (~> 3.5.0)
52
+ rspec-mocks (3.5.0)
53
+ diff-lcs (>= 1.2.0, < 2.0)
54
+ rspec-support (~> 3.5.0)
55
+ rspec-support (3.5.0)
56
+ ruby-enum (0.6.0)
57
+ i18n
58
+ safe_yaml (1.0.4)
59
+ simplecov (0.12.0)
60
+ docile (~> 1.1.0)
61
+ json (>= 1.8, < 3)
62
+ simplecov-html (~> 0.10.0)
63
+ simplecov-html (0.10.0)
64
+ thread_safe (0.3.5)
65
+ tzinfo (1.2.2)
66
+ thread_safe (~> 0.1)
67
+
68
+ PLATFORMS
69
+ ruby
70
+
71
+ DEPENDENCIES
72
+ bergamasco!
73
+ bundler (~> 1.0)
74
+ codeclimate-test-reporter (~> 1.0.0)
75
+ rake
76
+ rspec (~> 3.4)
77
+ simplecov
78
+
79
+ BUNDLED WITH
80
+ 1.13.6
data/LICENSE.md ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2016 DataCite
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,32 @@
1
+ # Bergamasco
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/bergamasco.svg)](https://badge.fury.io/rb/bergamasco)
4
+ [![Build Status](https://travis-ci.org/datacite/bergamasco.svg?branch=master)](https://travis-ci.org/datacite/bergamasco)
5
+ [![Test Coverage](https://codeclimate.com/github/datacite/bergamasco/badges/coverage.svg)](https://codeclimate.com/github/datacite/bergamasco/coverage)
6
+ [![Code Climate](https://codeclimate.com/github/datacite/bergamasco/badges/gpa.svg)](https://codeclimate.com/github/datacite/bergamasco)
7
+
8
+ Text utilities for common cleanup and reformatting tasks when working with scholarly metadata.
9
+
10
+ ## Installation
11
+
12
+ The usual way with Bundler: add the following to your `Gemfile` to install the current version of the gem:
13
+
14
+ ```ruby
15
+ gem 'bergamasco'
16
+ ```
17
+
18
+ Then run `bundle install` to install into your environment.
19
+
20
+ You can also install the gem system-wide in the usual way:
21
+
22
+ ```bash
23
+ gem install bergamasco
24
+ ```
25
+
26
+ ## Usage
27
+
28
+ TBD.
29
+
30
+ ## License
31
+
32
+ [MIT](license.md)
data/Rakefile ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env rake
2
+
3
+ # Get useful helper tasks from Bundler
4
+ require "bundler/gem_tasks"
5
+
@@ -0,0 +1,35 @@
1
+ require "date"
2
+ require File.expand_path("../lib/bergamasco/version", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.authors = "Martin Fenner"
6
+ s.email = "mfenner@datacite.org"
7
+ s.name = "bergamasco"
8
+ s.homepage = "https://github.com/datacite/bergamasco"
9
+ s.summary = "Text utilities for working with scholarly metadata"
10
+ s.date = Date.today
11
+ s.description = "Text utilities for common cleanup and reformatting tasks when working with scholarly metadata"
12
+ s.require_paths = ["lib"]
13
+ s.version = Bergamasco::VERSION
14
+ s.extra_rdoc_files = ["README.md"]
15
+ s.license = 'MIT'
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.require_paths = ["lib"]
20
+
21
+ s.add_dependency 'nokogiri', '~> 1.6.7'
22
+ s.add_dependency 'loofah', '~> 2.0', '>= 2.0.3'
23
+ s.add_dependency "pandoc-ruby", '~> 2.0', '>= 2.0.0'
24
+ s.add_dependency 'builder', '~> 3.2', '>= 3.2.2'
25
+ s.add_dependency 'multi_json', '~> 1.11.2'
26
+ s.add_dependency 'oj', '~> 2.13.1'
27
+ s.add_dependency 'activesupport', '~> 4.2', '>= 4.2.5'
28
+ s.add_dependency 'safe_yaml', '~> 1.0', '>= 1.0.4'
29
+ s.add_dependency 'commonmarker', '~> 0.14.0'
30
+ s.add_development_dependency 'bundler', '~> 1.0'
31
+ s.add_development_dependency 'rspec', '~> 3.4'
32
+ s.add_development_dependency 'rake'
33
+ s.add_development_dependency 'codeclimate-test-reporter', "~> 1.0.0"
34
+ s.add_development_dependency 'simplecov'
35
+ end
data/lib/bergamasco.rb ADDED
@@ -0,0 +1,12 @@
1
+ require 'active_support/all'
2
+ require 'json'
3
+ require 'nokogiri'
4
+ require 'yaml'
5
+ require 'safe_yaml/load'
6
+ require 'loofah'
7
+ require 'commonmarker'
8
+
9
+ require "bergamasco/summarize"
10
+ require "bergamasco/sanitize"
11
+ require "bergamasco/markdown"
12
+ require "bergamasco/whitelist_scrubber"
@@ -0,0 +1,32 @@
1
+ module Bergamasco
2
+ module Markdown
3
+ # split file into YAML frontmatter and content
4
+ YAML_FRONT_MATTER_REGEXP = /\A(---\s*\n.*?\n?)^((---|\.\.\.)\s*$\n?)/m
5
+
6
+ def self.split_yaml_frontmatter(file)
7
+ metadata = SafeYAML.load(file)
8
+ content = YAML_FRONT_MATTER_REGEXP.match(file).post_match
9
+ [metadata, content]
10
+ end
11
+
12
+ def self.update_yaml_frontmatter(metadata, new_metadata)
13
+ metadata.merge(new_metadata).compact
14
+ end
15
+
16
+ def self.join_yaml_frontmatter(metadata, content)
17
+ metadata.to_yaml + "---\n" + content
18
+ end
19
+
20
+ def self.update_file(file, new_metadata)
21
+ metadata, content = split_yaml_frontmatter(file)
22
+ metadata = update_yaml_frontmatter(metadata, new_metadata)
23
+ join_yaml_frontmatter(metadata, content)
24
+ end
25
+
26
+ def self.render_html(text, options={})
27
+ text = split_yaml_frontmatter(text).last if options[:skip_yaml_header]
28
+
29
+ CommonMarker.render_html(text, :default)
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,12 @@
1
+ module Bergamasco
2
+ module Sanitize
3
+ ALLOWED_TAGS = Set.new(%w(strong em b i code pre sub sup br))
4
+
5
+ def self.sanitize(text, options={})
6
+ options[:tags] ||= ALLOWED_TAGS
7
+ custom_scrubber = Bergamasco::WhitelistScrubber.new(options)
8
+
9
+ Loofah.scrub_fragment(text, custom_scrubber).to_s
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,75 @@
1
+ module Bergamasco
2
+ module Summarize
3
+ # based on from https://github.com/middleman/middleman-blog/blob/master/lib/middleman-blog/blog_article.rb
4
+ def self.summary(text, options={})
5
+ if options[:separator]
6
+ truncate_at_separator(text, options[:separator])
7
+ else
8
+ max_length = options[:length] || 250
9
+ ellipsis = options[:ellipsis] || "..."
10
+ truncate_at_length(text, max_length, ellipsis)
11
+ end
12
+ end
13
+
14
+ # from https://github.com/middleman/middleman-blog/blob/master/lib/middleman-blog/truncate_html.rb
15
+ def self.truncate_at_separator(text, separator)
16
+ text = text.encode('UTF-8') if text.respond_to?(:encode)
17
+ doc = Nokogiri::HTML::DocumentFragment.parse text.split(separator).first
18
+ doc.inner_html
19
+ end
20
+
21
+ # from https://github.com/middleman/middleman-blog/blob/master/lib/middleman-blog/truncate_html.rb
22
+ def self.truncate_at_length(text, max_length, ellipsis = "...")
23
+ ellipsis_length = ellipsis.length
24
+ text = text.encode('UTF-8') if text.respond_to?(:encode)
25
+ doc = Nokogiri::HTML::DocumentFragment.parse text
26
+ content_length = doc.inner_text.length
27
+ actual_length = max_length - ellipsis_length
28
+ if content_length > actual_length
29
+ doc.truncate(actual_length, ellipsis).inner_html
30
+ else
31
+ text
32
+ end
33
+ end
34
+ end
35
+ end
36
+
37
+ # from https://github.com/middleman/middleman-blog/blob/master/lib/middleman-blog/truncate_html.rb
38
+ module NokogiriTruncator
39
+ module NodeWithChildren
40
+ def truncate(max_length, ellipsis)
41
+ return self if inner_text.length <= max_length
42
+ truncated_node = self.dup
43
+ truncated_node.children.remove
44
+
45
+ self.children.each do |node|
46
+ remaining_length = max_length - truncated_node.inner_text.length
47
+ break if remaining_length <= 0
48
+ truncated_node.add_child node.truncate(remaining_length, ellipsis)
49
+ end
50
+ truncated_node
51
+ end
52
+ end
53
+
54
+ module TextNode
55
+ def truncate(max_length, ellipsis)
56
+ # Don't break in the middle of a word
57
+ trimmed_content = content.match(/(.{1,#{max_length}}[\w]*)/m).to_s
58
+ trimmed_content << ellipsis if trimmed_content.length < content.length
59
+
60
+ Nokogiri::XML::Text.new(trimmed_content, parent)
61
+ end
62
+ end
63
+
64
+ module CommentNode
65
+ def truncate(*args)
66
+ # Don't truncate comments, since they aren't visible
67
+ self
68
+ end
69
+ end
70
+ end
71
+
72
+ Nokogiri::HTML::DocumentFragment.send(:include, NokogiriTruncator::NodeWithChildren)
73
+ Nokogiri::XML::Element.send(:include, NokogiriTruncator::NodeWithChildren)
74
+ Nokogiri::XML::Text.send(:include, NokogiriTruncator::TextNode)
75
+ Nokogiri::XML::Comment.send(:include, NokogiriTruncator::CommentNode)
@@ -0,0 +1,3 @@
1
+ module Bergamasco
2
+ VERSION = "0.1.1"
3
+ end
@@ -0,0 +1,45 @@
1
+ # modified from https://gist.github.com/ivan-kolmychek/ee2fdc53f3e2c637271d
2
+
3
+ module Bergamasco
4
+ class WhitelistScrubber < Loofah::Scrubber
5
+ def initialize(options={})
6
+ @direction = :bottom_up
7
+ @tags = options[:tags]
8
+ @attributes = options[:attributes]
9
+ end
10
+
11
+ def scrub(node)
12
+ scrub_node_attributes(node) and return CONTINUE if node_allowed?(node)
13
+ node.before node.children
14
+ node.remove
15
+ end
16
+
17
+ private
18
+
19
+ def scrub_node_attributes(node)
20
+ fallback_scrub_node_attributes(node) and return true unless @attributes.present? && @attributes.respond_to?(:include?)
21
+ node.attribute_nodes.each do |attr_node|
22
+ attr_node.remove unless @attributes.include?(attr_node.name)
23
+ end
24
+ end
25
+
26
+ def allowed_not_element_node_types
27
+ [ Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE ]
28
+ end
29
+
30
+ def fallback_scrub_node_attributes(node)
31
+ Loofah::HTML5::Scrub.scrub_attributes(node)
32
+ end
33
+
34
+ def fallback_allowed_element_detection(node)
35
+ Loofah::HTML5::Scrub.allowed_element?(node.name)
36
+ end
37
+
38
+ def node_allowed?(node)
39
+ return fallback_allowed_element_detection(node) unless @tags.present? && @tags.respond_to?(:include?)
40
+ return true if allowed_not_element_node_types.include?(node.type)
41
+ return false unless node.type == Nokogiri::XML::Node::ELEMENT_NODE
42
+ @tags.include? node.name
43
+ end
44
+ end
45
+ end
@@ -0,0 +1 @@
1
+ CERN-LHC. A first measurement is presented of the top quark mass using the decay channel $\text{t}\to(\text{W}\to\ell\nu)\,(\text{b}\to\text{J}/\psi+\text{X}\to\mu^+\mu^-+\text{X})$, in events selected in proton-proton collisions and recorded with the CMS detector at the LHC at a center-of-mass energy of 8 TeV. The data correspond to an integrated luminosity of 19.7 /fb, with 666 $\text{t}\bar{\text{t}}$ and single top quark candidate events containing a reconstructed J/$\psi$ candidate decaying into an oppositely-charged muon pair. The top quark mass is extracted from an unbinned maximum-likelihood fit to the invariant mass of the (J/$\psi$+$\ell$) system, where $\ell$ is an electron or a muon from W boson decay. The resulting top quark mass measurement is 173.5 $\pm$ 3.0(stat) $\pm$ 0.9(syst) GeV. Even though the results are statistically limited, the dominant systematic uncertainties are different from those of the most precise direct reconstruction methods. As the sensitivity to jet-related uncertainties is negligible, this allows the possibility to contribute significantly in combination with other top quark mass measurements. Furthermore, with the larger data set expected in the next runs of the LHC, the method described in this paper will provide a result which will be more competitive with those obtained from the conventional reconstruction techniques.
@@ -0,0 +1,87 @@
1
+ In 1998 Tim Berners-Lee coined the term cool URIs [-@https://www.w3.org/Provider/Style/URI], that is URIs that don’t change. We know that URLs referenced in the scholarly literature are often not cool, leading to link rot [@https://doi.org/10.1371/journal.pone.0115253] and making it hard or impossible to find the referenced resource.READMORE
2
+
3
+ Cool URIs are, of course, a fundamental principle behind DOIs, with the two important concepts [*resolution*](https://www.doi.org/doi_handbook/3_Resolution.html) (it is very hard to maintain a URL directly pointing at a resource) and [*policies*](https://www.doi.org/doi_handbook/6_Policies.html) (that all DOI registration agencies and organizations minting DOIs agree to maintain the redirection). The third essential element for DOIs, their [*data model*](https://www.doi.org/doi_handbook/4_Data_Model.html), is not directly about persistent linking, but about the discoverability of the linked resources via standard metadata in a central index.
4
+
5
+ All DOIs, expressed as HTTP URI, are therefore cool URIs. So what is a cool DOI? And, furthermore, how to create and use them? To understand what a cool DOI is, we have to explain the three parts that make up a DOI:
6
+
7
+ ![](/images/2016/12/doi-parts.png)
8
+
9
+ ### Proxy
10
+
11
+ The proxy is not part of the DOI specification, but almost all scholarly DOIs that users encounter today will be expressed as HTTP URLs. DataCite recommends that all DOIs are displayed as permanent URLs, consistent with the recommendations of other DOI registration agencies, e.g. the [Crossref DOI display guidelines](http://www.crossref.org/02publishers/doi_display_guidelines.html). When the DOI system was originally designed, it was thought that the DOI protocol would become widely used, but that clearly has not happened and displaying DOIs as **doi:10.5281/ZENODO.31780** is therefore not recommended.
12
+
13
+ The DOI proxy enables the functionality of expressing DOIs as HTTP URIs. Users should also be aware of two these two recommendations:
14
+
15
+ * Use [doi.org](https://www.doi.org/doi_proxy/proxy_policies.html) instead of dx.doi.org as DNS name
16
+ * Use the HTTPS protocol instead of HTTP protocol
17
+
18
+ Ed Pentz from Crossref makes the case for HTTPS in a [September blog post](http://blog.crossref.org/2016/09/new-crossref-doi-display-guidelines.html). The web, and therefore also the scholarly web, is moving to HTTPS as the default. It is important that the DOI proxy redirects to HTTPS URLs, and it will take some time until all DataCite data centers use HTTPS for the landing pages their DOIs redirects to.
19
+
20
+ What many users don’t know is that doi.org is not the only proxy server for DOIs. DOIs use the handle system and any handle server will resolve a DOI, just as doi.org will resolve any handle. This means that [https://hdl.handle.net/10.5281/ZENODO.31780](https://hdl.handle.net/10.5281/ZENODO.31780) will resolve to the landing page for that DOI and that [http://doi.org/10273/BGRB5054RX05201](http://doi.org/10273/BGRB5054RX05201) is a handle (for a [IGSN](http://www.igsn.org/)) and not a DOI.
21
+
22
+ ### Prefix
23
+
24
+ The DOI prefix is used as a namespace so that DOIs are globally unique without requiring global coordination for every new identifier. Prefixes in the handle system and therefore for DOIs are numbers without any semantic meaning. One lesson learned with persistent identifiers is that adding meaning to the identifier (e.g. by using a prefix with the name of the data repository) is always dangerous, because – despite best intentions – all names can change over time.
25
+
26
+ Since the DOI prefix is a namespace to keep DOIs globally unique, there is usually no need for multiple prefixes for one organization managing DOI assignment. The tricky part is that these responsibilities can change, e.g. when an organization manages multiple repositories and one of them is migrated to another organization. It therefore makes sense to assign one prefix per list of resources that always stays together, e.g. one repository. It is possible that one prefix is managed by multiple organizations (as long as they use the same DOI registration agency), but that makes DOI management more complex.
27
+
28
+ ### Suffix
29
+
30
+ The suffix for a DOI can be (almost) any string. Which is both a feature and a curse. It is a feature because it gives maximal flexibility, for example when migrating existing identifiers to the DOI system. And it is a curse because it not always works well in the web context, as the list of characters allowed in a URL is limited. A good example of this are SICIs ([Serial Item and Contribution Identifier](https://en.wikipedia.org/wiki/Serial_Item_and_Contribution_Identifier)), they were defined in 1996 before the DOI system was implemented, and could then be migrated to DOIs. Unfortunately they can contain many characters that are problematic in a URL or make it difficult to validate the DOI, as in [https://doi.org/10.1002/(sici)1099-1409(199908/10)3:6/7<672::aid-jpp192>3.0.co;2-8](https://doi.org/10.1002/(sici)1099-1409(199908/10)3:6/7<672::aid-jpp192>3.0.co;2-8). A Crossref [blog post](http://blog.crossref.org/2015/08/doi-regular-expressions.html) by Andrew Gilmartin gives a good overview about the characters found in DOIs and suggests the following regular expression to check for valid DOIs:
31
+
32
+ ```
33
+ /^10.\d{4,9}/[-._;()/:A-Z0-9]+$/i
34
+ ```
35
+
36
+ SICIs demonstrate two other pitfalls:
37
+
38
+ * they contain semantic information (ISSN, volume, number, etc.) that may change over time, and
39
+ * they are long, difficult to transcribe, with characters not allowed in URLs, and not very human-readable.
40
+
41
+ Semantic information might also lead users to expect certain functionalities. A common pattern that we see at DataCite is to include information about the version or parent in the suffix, e.g. [https://doi.org/10.6084/M9.FIGSHARE.3501629.V1](https://doi.org/10.6084/M9.FIGSHARE.3501629.V1) or [https://doi.org/10.5061/DRYAD.0SN63/7](https://doi.org/10.5061/DRYAD.0SN63/7). While the decision on what to put into the suffix is up to each data center, we should make sure users don't think that these are functionalities of the DOI system (e.g. that adding **.V2** to any DOI name will resolve to version 2 of that resource).
42
+
43
+ Another issue to keep in mind when assigning suffixes is that DOIs – in contrast to HTTP URIs – are case-insensitive, [https://doi.org/10.5281/ZENODO.31780](https://doi.org/10.5281/ZENODO.31780) and [https://doi.org/10.5281/zenodo.31780](https://doi.org/10.5281/zenodo.31780) are the same DOI. All DOIs are [converted to upper case](https://www.doi.org/doi_handbook/2_Numbering.html#2.4) upon registration and DOI resolution, but DOIs are not consistently displayed in such a way.
44
+
45
+ ### Generating cool DOIs
46
+
47
+ With all that, what should the ideal DOI look like? Its suffix should be:
48
+
49
+ * opaque without semantic information
50
+ * work well in a web environment, avoiding characters problematic in URLs
51
+ * short and human-readable
52
+ * Resistant to transcription errors
53
+ * easy to generate
54
+
55
+ On Tuesday DataCite released a tool that helps generating such a suffix, an open source command line tool called [cirneco](https://github.com/datacite/cirneco) (a lot of our open source software uses Italian dog breed names). Cirneco is a Ruby gem that can be installed via
56
+
57
+ ```
58
+ gem install cirneco
59
+ ```
60
+
61
+ Cirneco uses base32 encoding, as [described](http://www.crockford.com/wrmg/base32.html) by Douglas Crockford. The encoding starts with a randomly generated number to guarantee uniqueness of the identifier, and then encodes the number into a string that uses all numbers and uppercase letters. It avoids the letters I, O and L as they can be confused with the letter 1 and 0, using 32 characters (and 5 checksum characters) in total. The last character is a checksum. The resulting string from cirneco always has a length of 8 characters, in groups of 4 separated by a hyphen to help with readability. The advantage of base32 encoding over using only numbers (as for example ORCID is doing) is that the resulting string becomes much more compact, the available 7 characters (plus one for the checksum) can encode 34,359,738,367 strings, compared to 10 million when only using numbers. This number is large enough that the resulting suffix will not only be unique for a given prefix, but also unique for all DOIs (there is a very small chance to get the same random number twice, but this will be rejected when trying to register the DOI).
62
+
63
+ Another common way to generate random strings would have been universally unique identifiers ([UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier)), but they are long and not very human-readable, e.g. [https://doi.org/10.4233/UUID:6D192FE2-DE18-4556-873A-D3CD56AB96A6](https://doi.org/10.4233/UUID:6D192FE2-DE18-4556-873A-D3CD56AB96A6).
64
+
65
+ An example DOI generated by cirneco would be
66
+
67
+ ```
68
+ cirneco doi generate --prefix 10.5555
69
+ 10.5555/KVTD-VPWM
70
+ ```
71
+
72
+ The generated DOI is short enough that it should work well in places where space is limited, providing an alternative to the [ShortDOI](http://shortdoi.org/) service which shortens existing DOIs, but does this by adding another layer on top of the DOI proxy.
73
+
74
+ Another cirneco command checks that this is a valid bas32 string using the checksum
75
+
76
+ ```
77
+ cirneco doi check 10.5555/KVTD-VPWM
78
+ Checksum for 10.5555/KVTD-VPWM is valid
79
+ ```
80
+
81
+ This can be used to quickly verify a DOI, e.g. in a web form or API. The Ruby base32 encoding library used by cirneco is open source ([https://github.com/datacite/base32](https://github.com/datacite/base32). I added the checksum to the existing library), and implementations of the Crockford base32 encoding pattern are available in many other languages, including [Python](https://github.com/jbittel/base32-crockford), [PHP](https://github.com/dflydev/dflydev-base32-crockford), [Javascript](https://www.npmjs.com/package/base32-crockford), [Java](http://stackoverflow.com/questions/22385467/crockford-base32-encoding-for-large-number-java-implementation), [Go](https://github.com/richardlehane/crock32) and [.NET](https://crockfordbase32.codeplex.com/).
82
+
83
+ To answer the question raised at the beginning: a cool DOI is a DOI expressed as HTTPS URI using the doi.org proxy and using a base32-encoded suffix, for example **https://doi.org/10.5555/KVTD-VPWM**. This DOI works well in a web environment, is human readable, easy to parse and detect (e.g. in text mining), and can be generated using an algorithm that is well understood and supported.
84
+
85
+ ![](/images/2016/12/cool-dois.png)
86
+
87
+ ### References
@@ -0,0 +1,97 @@
1
+ ---
2
+ layout: post
3
+ title: Cool DOI's
4
+ author: mfenner
5
+ date: 2016-12-15
6
+ tags:
7
+ - doi
8
+ - featured
9
+ image: https://blog.datacite.org/images/2016/12/cool-dois.png
10
+ ---
11
+ In 1998 Tim Berners-Lee coined the term cool URIs [-@https://www.w3.org/Provider/Style/URI], that is URIs that don’t change. We know that URLs referenced in the scholarly literature are often not cool, leading to link rot [@https://doi.org/10.1371/journal.pone.0115253] and making it hard or impossible to find the referenced resource.READMORE
12
+
13
+ Cool URIs are, of course, a fundamental principle behind DOIs, with the two important concepts [*resolution*](https://www.doi.org/doi_handbook/3_Resolution.html) (it is very hard to maintain a URL directly pointing at a resource) and [*policies*](https://www.doi.org/doi_handbook/6_Policies.html) (that all DOI registration agencies and organizations minting DOIs agree to maintain the redirection). The third essential element for DOIs, their [*data model*](https://www.doi.org/doi_handbook/4_Data_Model.html), is not directly about persistent linking, but about the discoverability of the linked resources via standard metadata in a central index.
14
+
15
+ All DOIs, expressed as HTTP URI, are therefore cool URIs. So what is a cool DOI? And, furthermore, how to create and use them? To understand what a cool DOI is, we have to explain the three parts that make up a DOI:
16
+
17
+ ![](/images/2016/12/doi-parts.png)
18
+
19
+ ### Proxy
20
+
21
+ The proxy is not part of the DOI specification, but almost all scholarly DOIs that users encounter today will be expressed as HTTP URLs. DataCite recommends that all DOIs are displayed as permanent URLs, consistent with the recommendations of other DOI registration agencies, e.g. the [Crossref DOI display guidelines](http://www.crossref.org/02publishers/doi_display_guidelines.html). When the DOI system was originally designed, it was thought that the DOI protocol would become widely used, but that clearly has not happened and displaying DOIs as **doi:10.5281/ZENODO.31780** is therefore not recommended.
22
+
23
+ The DOI proxy enables the functionality of expressing DOIs as HTTP URIs. Users should also be aware of two these two recommendations:
24
+
25
+ * Use [doi.org](https://www.doi.org/doi_proxy/proxy_policies.html) instead of dx.doi.org as DNS name
26
+ * Use the HTTPS protocol instead of HTTP protocol
27
+
28
+ Ed Pentz from Crossref makes the case for HTTPS in a [September blog post](http://blog.crossref.org/2016/09/new-crossref-doi-display-guidelines.html). The web, and therefore also the scholarly web, is moving to HTTPS as the default. It is important that the DOI proxy redirects to HTTPS URLs, and it will take some time until all DataCite data centers use HTTPS for the landing pages their DOIs redirects to.
29
+
30
+ What many users don’t know is that doi.org is not the only proxy server for DOIs. DOIs use the handle system and any handle server will resolve a DOI, just as doi.org will resolve any handle. This means that [https://hdl.handle.net/10.5281/ZENODO.31780](https://hdl.handle.net/10.5281/ZENODO.31780) will resolve to the landing page for that DOI and that [http://doi.org/10273/BGRB5054RX05201](http://doi.org/10273/BGRB5054RX05201) is a handle (for a [IGSN](http://www.igsn.org/)) and not a DOI.
31
+
32
+ ### Prefix
33
+
34
+ The DOI prefix is used as a namespace so that DOIs are globally unique without requiring global coordination for every new identifier. Prefixes in the handle system and therefore for DOIs are numbers without any semantic meaning. One lesson learned with persistent identifiers is that adding meaning to the identifier (e.g. by using a prefix with the name of the data repository) is always dangerous, because – despite best intentions – all names can change over time.
35
+
36
+ Since the DOI prefix is a namespace to keep DOIs globally unique, there is usually no need for multiple prefixes for one organization managing DOI assignment. The tricky part is that these responsibilities can change, e.g. when an organization manages multiple repositories and one of them is migrated to another organization. It therefore makes sense to assign one prefix per list of resources that always stays together, e.g. one repository. It is possible that one prefix is managed by multiple organizations (as long as they use the same DOI registration agency), but that makes DOI management more complex.
37
+
38
+ ### Suffix
39
+
40
+ The suffix for a DOI can be (almost) any string. Which is both a feature and a curse. It is a feature because it gives maximal flexibility, for example when migrating existing identifiers to the DOI system. And it is a curse because it not always works well in the web context, as the list of characters allowed in a URL is limited. A good example of this are SICIs ([Serial Item and Contribution Identifier](https://en.wikipedia.org/wiki/Serial_Item_and_Contribution_Identifier)), they were defined in 1996 before the DOI system was implemented, and could then be migrated to DOIs. Unfortunately they can contain many characters that are problematic in a URL or make it difficult to validate the DOI, as in [https://doi.org/10.1002/(sici)1099-1409(199908/10)3:6/7<672::aid-jpp192>3.0.co;2-8](https://doi.org/10.1002/(sici)1099-1409(199908/10)3:6/7<672::aid-jpp192>3.0.co;2-8). A Crossref [blog post](http://blog.crossref.org/2015/08/doi-regular-expressions.html) by Andrew Gilmartin gives a good overview about the characters found in DOIs and suggests the following regular expression to check for valid DOIs:
41
+
42
+ ```
43
+ /^10.\d{4,9}/[-._;()/:A-Z0-9]+$/i
44
+ ```
45
+
46
+ SICIs demonstrate two other pitfalls:
47
+
48
+ * they contain semantic information (ISSN, volume, number, etc.) that may change over time, and
49
+ * they are long, difficult to transcribe, with characters not allowed in URLs, and not very human-readable.
50
+
51
+ Semantic information might also lead users to expect certain functionalities. A common pattern that we see at DataCite is to include information about the version or parent in the suffix, e.g. [https://doi.org/10.6084/M9.FIGSHARE.3501629.V1](https://doi.org/10.6084/M9.FIGSHARE.3501629.V1) or [https://doi.org/10.5061/DRYAD.0SN63/7](https://doi.org/10.5061/DRYAD.0SN63/7). While the decision on what to put into the suffix is up to each data center, we should make sure users don't think that these are functionalities of the DOI system (e.g. that adding **.V2** to any DOI name will resolve to version 2 of that resource).
52
+
53
+ Another issue to keep in mind when assigning suffixes is that DOIs – in contrast to HTTP URIs – are case-insensitive, [https://doi.org/10.5281/ZENODO.31780](https://doi.org/10.5281/ZENODO.31780) and [https://doi.org/10.5281/zenodo.31780](https://doi.org/10.5281/zenodo.31780) are the same DOI. All DOIs are [converted to upper case](https://www.doi.org/doi_handbook/2_Numbering.html#2.4) upon registration and DOI resolution, but DOIs are not consistently displayed in such a way.
54
+
55
+ ### Generating cool DOIs
56
+
57
+ With all that, what should the ideal DOI look like? Its suffix should be:
58
+
59
+ * opaque without semantic information
60
+ * work well in a web environment, avoiding characters problematic in URLs
61
+ * short and human-readable
62
+ * Resistant to transcription errors
63
+ * easy to generate
64
+
65
+ On Tuesday DataCite released a tool that helps generating such a suffix, an open source command line tool called [cirneco](https://github.com/datacite/cirneco) (a lot of our open source software uses Italian dog breed names). Cirneco is a Ruby gem that can be installed via
66
+
67
+ ```
68
+ gem install cirneco
69
+ ```
70
+
71
+ Cirneco uses base32 encoding, as [described](http://www.crockford.com/wrmg/base32.html) by Douglas Crockford. The encoding starts with a randomly generated number to guarantee uniqueness of the identifier, and then encodes the number into a string that uses all numbers and uppercase letters. It avoids the letters I, O and L as they can be confused with the letter 1 and 0, using 32 characters (and 5 checksum characters) in total. The last character is a checksum. The resulting string from cirneco always has a length of 8 characters, in groups of 4 separated by a hyphen to help with readability. The advantage of base32 encoding over using only numbers (as for example ORCID is doing) is that the resulting string becomes much more compact, the available 7 characters (plus one for the checksum) can encode 34,359,738,367 strings, compared to 10 million when only using numbers. This number is large enough that the resulting suffix will not only be unique for a given prefix, but also unique for all DOIs (there is a very small chance to get the same random number twice, but this will be rejected when trying to register the DOI).
72
+
73
+ Another common way to generate random strings would have been universally unique identifiers ([UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier)), but they are long and not very human-readable, e.g. [https://doi.org/10.4233/UUID:6D192FE2-DE18-4556-873A-D3CD56AB96A6](https://doi.org/10.4233/UUID:6D192FE2-DE18-4556-873A-D3CD56AB96A6).
74
+
75
+ An example DOI generated by cirneco would be
76
+
77
+ ```
78
+ cirneco doi generate --prefix 10.5555
79
+ 10.5555/KVTD-VPWM
80
+ ```
81
+
82
+ The generated DOI is short enough that it should work well in places where space is limited, providing an alternative to the [ShortDOI](http://shortdoi.org/) service which shortens existing DOIs, but does this by adding another layer on top of the DOI proxy.
83
+
84
+ Another cirneco command checks that this is a valid bas32 string using the checksum
85
+
86
+ ```
87
+ cirneco doi check 10.5555/KVTD-VPWM
88
+ Checksum for 10.5555/KVTD-VPWM is valid
89
+ ```
90
+
91
+ This can be used to quickly verify a DOI, e.g. in a web form or API. The Ruby base32 encoding library used by cirneco is open source ([https://github.com/datacite/base32](https://github.com/datacite/base32). I added the checksum to the existing library), and implementations of the Crockford base32 encoding pattern are available in many other languages, including [Python](https://github.com/jbittel/base32-crockford), [PHP](https://github.com/dflydev/dflydev-base32-crockford), [Javascript](https://www.npmjs.com/package/base32-crockford), [Java](http://stackoverflow.com/questions/22385467/crockford-base32-encoding-for-large-number-java-implementation), [Go](https://github.com/richardlehane/crock32) and [.NET](https://crockfordbase32.codeplex.com/).
92
+
93
+ To answer the question raised at the beginning: a cool DOI is a DOI expressed as HTTPS URI using the doi.org proxy and using a base32-encoded suffix, for example **https://doi.org/10.5555/KVTD-VPWM**. This DOI works well in a web environment, is human readable, easy to parse and detect (e.g. in text mining), and can be generated using an algorithm that is well understood and supported.
94
+
95
+ ![](/images/2016/12/cool-dois.png)
96
+
97
+ ### References
@@ -0,0 +1,8 @@
1
+ layout: post
2
+ title: Cool DOI's
3
+ author: mfenner
4
+ date: 2016-12-15
5
+ tags:
6
+ - doi
7
+ - featured
8
+ image: https://blog.datacite.org/images/2016/12/cool-dois.png
@@ -0,0 +1,54 @@
1
+ require 'spec_helper'
2
+
3
+ describe Bergamasco::Markdown do
4
+ subject { Bergamasco::Markdown }
5
+
6
+ it 'should split yaml frontmatter' do
7
+ filepath = fixture_path + 'cool-dois.html.md'
8
+ file = IO.read(filepath)
9
+ metadata, content = subject.split_yaml_frontmatter(file)
10
+ expect(metadata).to have_key("title")
11
+ expect(content).to start_with("In 1998 Tim Berners-Lee coined")
12
+ end
13
+
14
+ it 'should update yaml frontmatter' do
15
+ filepath = fixture_path + 'cool-dois.html.md'
16
+ file = IO.read(filepath)
17
+ new_metadata = { "doi" => "10.23725/0000-03VC"}
18
+ metadata, content = subject.split_yaml_frontmatter(file)
19
+ metadata = subject.update_yaml_frontmatter(metadata, new_metadata)
20
+ expect(metadata["doi"]).to eq("10.23725/0000-03VC")
21
+ end
22
+
23
+ it 'should update yaml frontmatter remove attribute' do
24
+ filepath = fixture_path + 'cool-dois.html.md'
25
+ file = IO.read(filepath)
26
+ new_metadata = { "title" => nil }
27
+ metadata, content = subject.split_yaml_frontmatter(file)
28
+ metadata = subject.update_yaml_frontmatter(metadata, new_metadata)
29
+ expect(metadata["title"]).to be nil
30
+ end
31
+
32
+ it 'should join yaml frontmatter' do
33
+ filepath = fixture_path + 'cool-dois.html.md'
34
+ file = IO.read(filepath)
35
+ metadata, content = subject.split_yaml_frontmatter(file)
36
+ updated_file = subject.join_yaml_frontmatter(metadata, content)
37
+ expect(updated_file).to eq(file)
38
+ end
39
+
40
+ it 'should update file' do
41
+ filepath = fixture_path + 'cool-dois.html.md'
42
+ file = IO.read(filepath)
43
+ new_metadata = { "doi" => "10.23725/0000-03VC"}
44
+ updated_file = subject.update_file(file, new_metadata)
45
+ expect(updated_file).to include("\ndoi: 10.23725/0000-03VC\n")
46
+ end
47
+
48
+ it 'should convert markdown' do
49
+ filepath = fixture_path + 'cool-dois.html.md'
50
+ file = IO.read(filepath)
51
+ html = subject.render_html(file, skip_yaml_header: true)
52
+ expect(html).to start_with("<p>In 1998 Tim Berners-Lee coined")
53
+ end
54
+ end
@@ -0,0 +1,17 @@
1
+ require 'spec_helper'
2
+
3
+ describe Bergamasco::Sanitize do
4
+ subject { Bergamasco::Sanitize }
5
+
6
+ it 'should remove a tags' do
7
+ text = "In 1998 <strong>Tim Berners-Lee</strong> coined the term <a href=\"https://www.w3.org/Provider/Style/URI\">cool URIs</a>"
8
+ content = subject.sanitize(text)
9
+ expect(content).to eq("In 1998 <strong>Tim Berners-Lee</strong> coined the term cool URIs")
10
+ end
11
+
12
+ it 'should only keep specific tags' do
13
+ text = "In 1998 <strong>Tim Berners-Lee</strong> coined the term <a href=\"https://www.w3.org/Provider/Style/URI\">cool URIs</a>"
14
+ content = subject.sanitize(text, tags: ["a"])
15
+ expect(content).to eq("In 1998 Tim Berners-Lee coined the term <a href=\"https://www.w3.org/Provider/Style/URI\">cool URIs</a>")
16
+ end
17
+ end
@@ -0,0 +1,18 @@
1
+ require 'bundler/setup'
2
+ Bundler.setup
3
+
4
+ require 'simplecov'
5
+ SimpleCov.start
6
+
7
+ require 'bergamasco'
8
+ require 'rspec'
9
+
10
+ RSpec.configure do |config|
11
+ config.expect_with :rspec do |c|
12
+ c.syntax = :expect
13
+ end
14
+ end
15
+
16
+ def fixture_path
17
+ File.expand_path("../fixtures", __FILE__) + '/'
18
+ end
@@ -0,0 +1,30 @@
1
+ require 'spec_helper'
2
+
3
+ describe Bergamasco::Summarize do
4
+ subject { Bergamasco::Summarize }
5
+
6
+ it 'should truncate after 250 characters' do
7
+ filepath = fixture_path + 'cool-dois-without-yml.md'
8
+ file = IO.read(filepath)
9
+ content = subject.summary(file)
10
+ expect(content.length).to eq(250)
11
+ expect(content).to start_with("In 1998 Tim Berners-Lee coined")
12
+ end
13
+
14
+ it 'should truncate after 75 characters' do
15
+ filepath = fixture_path + 'cool-dois-without-yml.md'
16
+ file = IO.read(filepath)
17
+ content = subject.summary(file, length: 75)
18
+ expect(content.length).to eq(83)
19
+ expect(content).to start_with("In 1998 Tim Berners-Lee coined")
20
+ end
21
+
22
+ it 'should truncate at separator' do
23
+ filepath = fixture_path + 'cool-dois-without-yml.md'
24
+ file = IO.read(filepath)
25
+ separator = "READMORE"
26
+ content = subject.summary(file, separator: separator)
27
+ expect(content).to start_with("In 1998 Tim Berners-Lee coined")
28
+ expect(content).to end_with("the referenced resource.")
29
+ end
30
+ end
metadata ADDED
@@ -0,0 +1,293 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bergamasco
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Martin Fenner
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-12-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 1.6.7
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 1.6.7
27
+ - !ruby/object:Gem::Dependency
28
+ name: loofah
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '2.0'
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: 2.0.3
37
+ type: :runtime
38
+ prerelease: false
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - "~>"
42
+ - !ruby/object:Gem::Version
43
+ version: '2.0'
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 2.0.3
47
+ - !ruby/object:Gem::Dependency
48
+ name: pandoc-ruby
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '2.0'
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: 2.0.0
57
+ type: :runtime
58
+ prerelease: false
59
+ version_requirements: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - "~>"
62
+ - !ruby/object:Gem::Version
63
+ version: '2.0'
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: 2.0.0
67
+ - !ruby/object:Gem::Dependency
68
+ name: builder
69
+ requirement: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - "~>"
72
+ - !ruby/object:Gem::Version
73
+ version: '3.2'
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: 3.2.2
77
+ type: :runtime
78
+ prerelease: false
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - "~>"
82
+ - !ruby/object:Gem::Version
83
+ version: '3.2'
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: 3.2.2
87
+ - !ruby/object:Gem::Dependency
88
+ name: multi_json
89
+ requirement: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - "~>"
92
+ - !ruby/object:Gem::Version
93
+ version: 1.11.2
94
+ type: :runtime
95
+ prerelease: false
96
+ version_requirements: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - "~>"
99
+ - !ruby/object:Gem::Version
100
+ version: 1.11.2
101
+ - !ruby/object:Gem::Dependency
102
+ name: oj
103
+ requirement: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - "~>"
106
+ - !ruby/object:Gem::Version
107
+ version: 2.13.1
108
+ type: :runtime
109
+ prerelease: false
110
+ version_requirements: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - "~>"
113
+ - !ruby/object:Gem::Version
114
+ version: 2.13.1
115
+ - !ruby/object:Gem::Dependency
116
+ name: activesupport
117
+ requirement: !ruby/object:Gem::Requirement
118
+ requirements:
119
+ - - "~>"
120
+ - !ruby/object:Gem::Version
121
+ version: '4.2'
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: 4.2.5
125
+ type: :runtime
126
+ prerelease: false
127
+ version_requirements: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '4.2'
132
+ - - ">="
133
+ - !ruby/object:Gem::Version
134
+ version: 4.2.5
135
+ - !ruby/object:Gem::Dependency
136
+ name: safe_yaml
137
+ requirement: !ruby/object:Gem::Requirement
138
+ requirements:
139
+ - - "~>"
140
+ - !ruby/object:Gem::Version
141
+ version: '1.0'
142
+ - - ">="
143
+ - !ruby/object:Gem::Version
144
+ version: 1.0.4
145
+ type: :runtime
146
+ prerelease: false
147
+ version_requirements: !ruby/object:Gem::Requirement
148
+ requirements:
149
+ - - "~>"
150
+ - !ruby/object:Gem::Version
151
+ version: '1.0'
152
+ - - ">="
153
+ - !ruby/object:Gem::Version
154
+ version: 1.0.4
155
+ - !ruby/object:Gem::Dependency
156
+ name: commonmarker
157
+ requirement: !ruby/object:Gem::Requirement
158
+ requirements:
159
+ - - "~>"
160
+ - !ruby/object:Gem::Version
161
+ version: 0.14.0
162
+ type: :runtime
163
+ prerelease: false
164
+ version_requirements: !ruby/object:Gem::Requirement
165
+ requirements:
166
+ - - "~>"
167
+ - !ruby/object:Gem::Version
168
+ version: 0.14.0
169
+ - !ruby/object:Gem::Dependency
170
+ name: bundler
171
+ requirement: !ruby/object:Gem::Requirement
172
+ requirements:
173
+ - - "~>"
174
+ - !ruby/object:Gem::Version
175
+ version: '1.0'
176
+ type: :development
177
+ prerelease: false
178
+ version_requirements: !ruby/object:Gem::Requirement
179
+ requirements:
180
+ - - "~>"
181
+ - !ruby/object:Gem::Version
182
+ version: '1.0'
183
+ - !ruby/object:Gem::Dependency
184
+ name: rspec
185
+ requirement: !ruby/object:Gem::Requirement
186
+ requirements:
187
+ - - "~>"
188
+ - !ruby/object:Gem::Version
189
+ version: '3.4'
190
+ type: :development
191
+ prerelease: false
192
+ version_requirements: !ruby/object:Gem::Requirement
193
+ requirements:
194
+ - - "~>"
195
+ - !ruby/object:Gem::Version
196
+ version: '3.4'
197
+ - !ruby/object:Gem::Dependency
198
+ name: rake
199
+ requirement: !ruby/object:Gem::Requirement
200
+ requirements:
201
+ - - ">="
202
+ - !ruby/object:Gem::Version
203
+ version: '0'
204
+ type: :development
205
+ prerelease: false
206
+ version_requirements: !ruby/object:Gem::Requirement
207
+ requirements:
208
+ - - ">="
209
+ - !ruby/object:Gem::Version
210
+ version: '0'
211
+ - !ruby/object:Gem::Dependency
212
+ name: codeclimate-test-reporter
213
+ requirement: !ruby/object:Gem::Requirement
214
+ requirements:
215
+ - - "~>"
216
+ - !ruby/object:Gem::Version
217
+ version: 1.0.0
218
+ type: :development
219
+ prerelease: false
220
+ version_requirements: !ruby/object:Gem::Requirement
221
+ requirements:
222
+ - - "~>"
223
+ - !ruby/object:Gem::Version
224
+ version: 1.0.0
225
+ - !ruby/object:Gem::Dependency
226
+ name: simplecov
227
+ requirement: !ruby/object:Gem::Requirement
228
+ requirements:
229
+ - - ">="
230
+ - !ruby/object:Gem::Version
231
+ version: '0'
232
+ type: :development
233
+ prerelease: false
234
+ version_requirements: !ruby/object:Gem::Requirement
235
+ requirements:
236
+ - - ">="
237
+ - !ruby/object:Gem::Version
238
+ version: '0'
239
+ description: Text utilities for common cleanup and reformatting tasks when working
240
+ with scholarly metadata
241
+ email: mfenner@datacite.org
242
+ executables: []
243
+ extensions: []
244
+ extra_rdoc_files:
245
+ - README.md
246
+ files:
247
+ - ".gitignore"
248
+ - ".travis.yml"
249
+ - Gemfile
250
+ - Gemfile.lock
251
+ - LICENSE.md
252
+ - README.md
253
+ - Rakefile
254
+ - bergamasco.gemspec
255
+ - lib/bergamasco.rb
256
+ - lib/bergamasco/markdown.rb
257
+ - lib/bergamasco/sanitize.rb
258
+ - lib/bergamasco/summarize.rb
259
+ - lib/bergamasco/version.rb
260
+ - lib/bergamasco/whitelist_scrubber.rb
261
+ - spec/fixtures/abstract.tex
262
+ - spec/fixtures/cool-dois-without-yml.md
263
+ - spec/fixtures/cool-dois.html.md
264
+ - spec/fixtures/cool-dois.yml
265
+ - spec/markdown_spec.rb
266
+ - spec/sanitize_spec.rb
267
+ - spec/spec_helper.rb
268
+ - spec/summarize_spec.rb
269
+ homepage: https://github.com/datacite/bergamasco
270
+ licenses:
271
+ - MIT
272
+ metadata: {}
273
+ post_install_message:
274
+ rdoc_options: []
275
+ require_paths:
276
+ - lib
277
+ required_ruby_version: !ruby/object:Gem::Requirement
278
+ requirements:
279
+ - - ">="
280
+ - !ruby/object:Gem::Version
281
+ version: '0'
282
+ required_rubygems_version: !ruby/object:Gem::Requirement
283
+ requirements:
284
+ - - ">="
285
+ - !ruby/object:Gem::Version
286
+ version: '0'
287
+ requirements: []
288
+ rubyforge_project:
289
+ rubygems_version: 2.6.8
290
+ signing_key:
291
+ specification_version: 4
292
+ summary: Text utilities for working with scholarly metadata
293
+ test_files: []