RubyGems - identifiers - Versions diffs - 0.8.0 → 0.8.1 - Mend

identifiers 0.8.0 → 0.8.1

Files changed (13) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 1d323ecfde9659ed9a795b2b2519450d33ec77c6
-  data.tar.gz: 391573ad4170738967c32597eee590554f47d59a
+  metadata.gz: ade5848785ab153a6cb5e1b2cffdd05958879943
+  data.tar.gz: 2b0b6fa55d97c5ae2990d606b210168d33dc5dae
 SHA512:
-  metadata.gz: b50b32b55b82b97b3d07bc6613c064ffadf59c760f51ba9b26b1e200c4efe883747bb47b89e889597da2f56e426138412acb21797eddc9da699f94829d65bf17
-  data.tar.gz: 509003d4b24d3cde4b350b5198384d42c5232ac3417007bdc9a8823e07f1551a61ba6c08553fea19f731dd319723d9c7c590d003d31152b5389aa9cb356587b7
+  metadata.gz: bbdd699cd75aef87f0318a54acb55784ead6e3ea2f603a5c3e5437ef7d358222842d2892f6ec24e0fc59119be3d621d41227497d1bd3cbafdb159612331a4ccd
+  data.tar.gz: c26923e8c6c7153fae0dc793eacd6b34e355f78360deee5a47f0c2922076b9e91c5b79c76884f0b3cf0747c93b6c3e2da97090e51fb1114f100ae9d7ae2514d4

data/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,10 @@
 All notable changes to this project will be documented in this file. This
 project adheres to [Semantic Versioning](http://semver.org/).
+## [0.8.1] - 2017-04-10
+### Fixed
+- Fixed extraction of multiple DOIs separated by Unicode whitespace
 ## [0.8.0] - 2017-04-10
 ### Added
 - Added support for ISBNs with digits separated by Unicode whitespace and dashes
@@ -47,3 +51,4 @@ project adheres to [Semantic Versioning](http://semver.org/).
 [0.6.0]: https://github.com/altmetric/identifiers/releases/tag/v0.6.0
 [0.7.0]: https://github.com/altmetric/identifiers/releases/tag/v0.7.0
 [0.8.0]: https://github.com/altmetric/identifiers/releases/tag/v0.8.0
+[0.8.1]: https://github.com/altmetric/identifiers/releases/tag/v0.8.1

data/lib/identifiers/arxiv_id.rb CHANGED Viewed

@@ -6,13 +6,13 @@ module Identifiers
     def self.extract_post_2007_arxiv_ids(str)
       str
-        .scan(%r{(?<=^|\s|/)(?:arXiv:)?\d{4}\.\d{4,5}(?:v\d+)?(?=$|\s)}i)
+        .scan(%r{(?<=^|[[:space:]/])(?:arXiv:)?\d{4}\.\d{4,5}(?:v\d+)?(?=$|[[:space:]])}i)
         .map { |arxiv_id| arxiv_id.sub(/\AarXiv:/i, '') }
     end
     def self.extract_pre_2007_arxiv_ids(str)
       str
-        .scan(%r{(?<=^|\s|/)(?:arXiv:)?[a-z-]+(?:\.[A-Z]{2})?/\d{2}(?:0[1-9]|1[012])\d{3}(?:v\d+)?(?=$|\s)}i)
+        .scan(%r{(?<=^|[[:space:]/])(?:arXiv:)?[a-z-]+(?:\.[A-Z]{2})?/\d{2}(?:0[1-9]|1[012])\d{3}(?:v\d+)?(?=$|[[:space:]])}i)
         .map { |arxiv_id| arxiv_id.sub(/\AarXiv:/i, '') }
     end
   end

data/lib/identifiers/doi.rb CHANGED Viewed

@@ -12,18 +12,18 @@ module Identifiers
         \d{1,7}  # ISBN title enumerator and check digit
         |
         # DOI
-        \d{4,9} # Registrant code
-        /       # Prefix/suffix divider
-        \S+     # DOI suffix
+        \d{4,9}       # Registrant code
+        /             # Prefix/suffix divider
+        [^[:space:]]+ # DOI suffix
       )
     }x
     VALID_ENDING = /
       (?:
-        \p{^Punct}  # Non-punctuation character
+        \p{^Punct} # Non-punctuation character
         |
-        \(.+\)      # Balanced parentheses
+        \(.+\)     # Balanced parentheses
         |
-        2-\#        # Early Wiley DOI suffix
+        2-\#       # Early Wiley DOI suffix
       )
       \z
     /x

data/lib/identifiers/handle.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 module Identifiers
   class Handle
     def self.extract(str)
-      str.scan(%r{\b[0-9.]+/\S+\b}i)
+      str.scan(%r{\b[0-9.]+/[^[:space:]]+\b}i)
     end
   end
 end

data/lib/identifiers/pubmed_id.rb CHANGED Viewed

@@ -2,7 +2,7 @@ module Identifiers
   class PubmedId
     def self.extract(str)
       str
-        .scan(/(?<=^|\s)0*(?!0)(\d+)(?=$|\s)/)
+        .scan(/(?<=^|[[:space:]])0*(?!0)(\d+)(?=$|[[:space:]])/)
         .flatten
     end
   end

data/lib/identifiers/repec_id.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 module Identifiers
   class RepecId
     def self.extract(str)
-      str.scan(/\brepec:\S+\b/i).map { |repec| "RePEc:#{repec.split(':', 2).last}" }
+      str.scan(/\brepec:[^[:space:]]+\b/i).map { |repec| "RePEc:#{repec.split(':', 2).last}" }
     end
   end
 end

data/spec/identifiers/arxiv_id_spec.rb CHANGED Viewed

@@ -21,5 +21,13 @@ RSpec.describe Identifiers::ArxivId do
     it 'does not extract IDs from DOIs that contain a valid arXiv ID' do
       expect(described_class.extract('10.2310/7290.2014.00033')).to be_empty
     end
+    it 'extracts a post 2007 arXiv ID surrounded by Unicode whitespace' do
+      expect(described_class.extract('Example: arXiv:0706.0001 ')).to contain_exactly('0706.0001')
+    end
+    it 'extracts a pre 2007 arXiv ID surrounded by Unicode whitespace' do
+      expect(described_class.extract('Example: math.GT/0309136 ')).to contain_exactly('math.GT/0309136')
+    end
   end
 end

data/spec/identifiers/doi_spec.rb CHANGED Viewed

@@ -110,4 +110,12 @@ RSpec.describe Identifiers::DOI do
   it 'does not extract DOIs with purely punctuation suffixes' do
     expect(described_class.extract('10.1130/!).",')).to be_empty
   end
+  it 'extracts DOIs with emoji in them' do
+    expect(described_class.extract('10.1234/🐔💩123🐔🐔🐔123')).to contain_exactly('10.1234/🐔💩123🐔🐔🐔123')
+  end
+  it 'extracts DOIs separated by Unicode whitespace' do
+    expect(described_class.extract('10.1234/foo  10.1234/bar')).to contain_exactly('10.1234/foo', '10.1234/bar')
+  end
 end

data/spec/identifiers/handle_spec.rb CHANGED Viewed

@@ -12,4 +12,10 @@ RSpec.describe Identifiers::Handle do
     expect(described_class.extract(str)).to contain_exactly('2117/83545it.ly/1UtXnTW')
   end
+  it 'extracts Handles separated by Unicode whitespace' do
+    str = '10149/596901 10251/79612'
+    expect(described_class.extract(str)).to contain_exactly('10149/596901', '10251/79612')
+  end
 end

data/spec/identifiers/pubmed_id_spec.rb CHANGED Viewed

@@ -18,4 +18,8 @@ RSpec.describe Identifiers::PubmedId do
   it 'does not consider 0 as a valid Pubmed ID' do
     expect(described_class.extract("00000000")).to be_empty
   end
+  it 'extracts PubMed IDs separated by Unicode whitespace' do
+    expect(described_class.extract('123 456')).to contain_exactly('123', '456')
+  end
 end

data/spec/identifiers/repec_id_spec.rb CHANGED Viewed

@@ -12,4 +12,10 @@ RSpec.describe Identifiers::RepecId do
     expect(described_class.extract(str)).to contain_exactly('RePEc:wbk:wbpubs:2266', 'RePEc:inn:wpaper:2016-03')
   end
+  it 'extracts RePEc IDs separated by Unicode whitespace' do
+    str = "RePEc:wbk:wbpubs:2266 RePEc:inn:wpaper:2016-03"
+    expect(described_class.extract(str)).to contain_exactly('RePEc:wbk:wbpubs:2266', 'RePEc:inn:wpaper:2016-03')
+  end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: identifiers
 version: !ruby/object:Gem::Version
-  version: 0.8.0
+  version: 0.8.1
 platform: ruby
 authors:
 - Jonathan Hernandez