RubyGems - identifiers - Versions diffs - 0.8.0 → 0.8.1 - Mend

identifiers 0.8.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/lib/identifiers/arxiv_id.rb +2 -2
data/lib/identifiers/doi.rb +6 -6
data/lib/identifiers/handle.rb +1 -1
data/lib/identifiers/pubmed_id.rb +1 -1
data/lib/identifiers/repec_id.rb +1 -1
data/spec/identifiers/arxiv_id_spec.rb +8 -0
data/spec/identifiers/doi_spec.rb +8 -0
data/spec/identifiers/handle_spec.rb +6 -0
data/spec/identifiers/pubmed_id_spec.rb +4 -0
data/spec/identifiers/repec_id_spec.rb +6 -0
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 1d323ecfde9659ed9a795b2b2519450d33ec77c6
-  data.tar.gz: 391573ad4170738967c32597eee590554f47d59a
+  metadata.gz: ade5848785ab153a6cb5e1b2cffdd05958879943
+  data.tar.gz: 2b0b6fa55d97c5ae2990d606b210168d33dc5dae
 SHA512:
-  metadata.gz: b50b32b55b82b97b3d07bc6613c064ffadf59c760f51ba9b26b1e200c4efe883747bb47b89e889597da2f56e426138412acb21797eddc9da699f94829d65bf17
-  data.tar.gz: 509003d4b24d3cde4b350b5198384d42c5232ac3417007bdc9a8823e07f1551a61ba6c08553fea19f731dd319723d9c7c590d003d31152b5389aa9cb356587b7
+  metadata.gz: bbdd699cd75aef87f0318a54acb55784ead6e3ea2f603a5c3e5437ef7d358222842d2892f6ec24e0fc59119be3d621d41227497d1bd3cbafdb159612331a4ccd
+  data.tar.gz: c26923e8c6c7153fae0dc793eacd6b34e355f78360deee5a47f0c2922076b9e91c5b79c76884f0b3cf0747c93b6c3e2da97090e51fb1114f100ae9d7ae2514d4

data/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,10 @@
 All notable changes to this project will be documented in this file. This
 project adheres to [Semantic Versioning](http://semver.org/).
+## [0.8.1] - 2017-04-10
+### Fixed
+- Fixed extraction of multiple DOIs separated by Unicode whitespace
 ## [0.8.0] - 2017-04-10
 ### Added
 - Added support for ISBNs with digits separated by Unicode whitespace and dashes
@@ -47,3 +51,4 @@ project adheres to [Semantic Versioning](http://semver.org/).
 [0.6.0]: https://github.com/altmetric/identifiers/releases/tag/v0.6.0
 [0.7.0]: https://github.com/altmetric/identifiers/releases/tag/v0.7.0
 [0.8.0]: https://github.com/altmetric/identifiers/releases/tag/v0.8.0
+[0.8.1]: https://github.com/altmetric/identifiers/releases/tag/v0.8.1

data/lib/identifiers/arxiv_id.rb CHANGED Viewed

@@ -6,13 +6,13 @@ module Identifiers
     def self.extract_post_2007_arxiv_ids(str)
       str
-        .scan(%r{(?<=^|\s|/)(?:arXiv:)?\d{4}\.\d{4,5}(?:v\d+)?(?=$|\s)}i)
+        .scan(%r{(?<=^|[[:space:]/])(?:arXiv:)?\d{4}\.\d{4,5}(?:v\d+)?(?=$|[[:space:]])}i)
         .map { |arxiv_id| arxiv_id.sub(/\AarXiv:/i, '') }
     end
     def self.extract_pre_2007_arxiv_ids(str)
       str
-        .scan(%r{(?<=^|\s|/)(?:arXiv:)?[a-z-]+(?:\.[A-Z]{2})?/\d{2}(?:0[1-9]|1[012])\d{3}(?:v\d+)?(?=$|\s)}i)
+        .scan(%r{(?<=^|[[:space:]/])(?:arXiv:)?[a-z-]+(?:\.[A-Z]{2})?/\d{2}(?:0[1-9]|1[012])\d{3}(?:v\d+)?(?=$|[[:space:]])}i)
         .map { |arxiv_id| arxiv_id.sub(/\AarXiv:/i, '') }
     end
   end

data/lib/identifiers/doi.rb CHANGED Viewed

@@ -12,18 +12,18 @@ module Identifiers
         \d{1,7}  # ISBN title enumerator and check digit
         |
         # DOI
-        \d{4,9} # Registrant code
-        /       # Prefix/suffix divider
-        \S+     # DOI suffix
+        \d{4,9}       # Registrant code
+        /             # Prefix/suffix divider
+        [^[:space:]]+ # DOI suffix
       )
     }x
     VALID_ENDING = /
       (?:
-        \p{^Punct}  # Non-punctuation character
+        \p{^Punct} # Non-punctuation character
         |
-        \(.+\)      # Balanced parentheses
+        \(.+\)     # Balanced parentheses
         |
-        2-\#        # Early Wiley DOI suffix
+        2-\#       # Early Wiley DOI suffix
       )
       \z
     /x

data/lib/identifiers/handle.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 module Identifiers
   class Handle
     def self.extract(str)
-      str.scan(%r{\b[0-9.]+/\S+\b}i)
+      str.scan(%r{\b[0-9.]+/[^[:space:]]+\b}i)
     end
   end
 end

data/lib/identifiers/pubmed_id.rb CHANGED Viewed

@@ -2,7 +2,7 @@ module Identifiers
   class PubmedId
     def self.extract(str)
       str
-        .scan(/(?<=^|\s)0*(?!0)(\d+)(?=$|\s)/)
+        .scan(/(?<=^|[[:space:]])0*(?!0)(\d+)(?=$|[[:space:]])/)
         .flatten
     end
   end

data/lib/identifiers/repec_id.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 module Identifiers
   class RepecId
     def self.extract(str)
-      str.scan(/\brepec:\S+\b/i).map { |repec| "RePEc:#{repec.split(':', 2).last}" }
+      str.scan(/\brepec:[^[:space:]]+\b/i).map { |repec| "RePEc:#{repec.split(':', 2).last}" }
     end
   end
 end

data/spec/identifiers/arxiv_id_spec.rb CHANGED Viewed

@@ -21,5 +21,13 @@ RSpec.describe Identifiers::ArxivId do
     it 'does not extract IDs from DOIs that contain a valid arXiv ID' do
       expect(described_class.extract('10.2310/7290.2014.00033')).to be_empty
     end
+    it 'extracts a post 2007 arXiv ID surrounded by Unicode whitespace' do
+      expect(described_class.extract('Example: arXiv:0706.0001 ')).to contain_exactly('0706.0001')
+    end
+    it 'extracts a pre 2007 arXiv ID surrounded by Unicode whitespace' do
+      expect(described_class.extract('Example: math.GT/0309136 ')).to contain_exactly('math.GT/0309136')
+    end
   end
 end

data/spec/identifiers/doi_spec.rb CHANGED Viewed

@@ -110,4 +110,12 @@ RSpec.describe Identifiers::DOI do
   it 'does not extract DOIs with purely punctuation suffixes' do
     expect(described_class.extract('10.1130/!).",')).to be_empty
   end
+  it 'extracts DOIs with emoji in them' do
+    expect(described_class.extract('10.1234/🐔💩123🐔🐔🐔123')).to contain_exactly('10.1234/🐔💩123🐔🐔🐔123')
+  end
+  it 'extracts DOIs separated by Unicode whitespace' do
+    expect(described_class.extract('10.1234/foo  10.1234/bar')).to contain_exactly('10.1234/foo', '10.1234/bar')
+  end
 end

data/spec/identifiers/handle_spec.rb CHANGED Viewed

@@ -12,4 +12,10 @@ RSpec.describe Identifiers::Handle do
     expect(described_class.extract(str)).to contain_exactly('2117/83545it.ly/1UtXnTW')
   end
+  it 'extracts Handles separated by Unicode whitespace' do
+    str = '10149/596901 10251/79612'
+    expect(described_class.extract(str)).to contain_exactly('10149/596901', '10251/79612')
+  end
 end

data/spec/identifiers/pubmed_id_spec.rb CHANGED Viewed

@@ -18,4 +18,8 @@ RSpec.describe Identifiers::PubmedId do
   it 'does not consider 0 as a valid Pubmed ID' do
     expect(described_class.extract("00000000")).to be_empty
   end
+  it 'extracts PubMed IDs separated by Unicode whitespace' do
+    expect(described_class.extract('123 456')).to contain_exactly('123', '456')
+  end
 end

data/spec/identifiers/repec_id_spec.rb CHANGED Viewed

@@ -12,4 +12,10 @@ RSpec.describe Identifiers::RepecId do
     expect(described_class.extract(str)).to contain_exactly('RePEc:wbk:wbpubs:2266', 'RePEc:inn:wpaper:2016-03')
   end
+  it 'extracts RePEc IDs separated by Unicode whitespace' do
+    str = "RePEc:wbk:wbpubs:2266 RePEc:inn:wpaper:2016-03"
+    expect(described_class.extract(str)).to contain_exactly('RePEc:wbk:wbpubs:2266', 'RePEc:inn:wpaper:2016-03')
+  end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: identifiers
 version: !ruby/object:Gem::Version
-  version: 0.8.0
+  version: 0.8.1
 platform: ruby
 authors:
 - Jonathan Hernandez