identifiers 0.8.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1d323ecfde9659ed9a795b2b2519450d33ec77c6
4
- data.tar.gz: 391573ad4170738967c32597eee590554f47d59a
3
+ metadata.gz: ade5848785ab153a6cb5e1b2cffdd05958879943
4
+ data.tar.gz: 2b0b6fa55d97c5ae2990d606b210168d33dc5dae
5
5
  SHA512:
6
- metadata.gz: b50b32b55b82b97b3d07bc6613c064ffadf59c760f51ba9b26b1e200c4efe883747bb47b89e889597da2f56e426138412acb21797eddc9da699f94829d65bf17
7
- data.tar.gz: 509003d4b24d3cde4b350b5198384d42c5232ac3417007bdc9a8823e07f1551a61ba6c08553fea19f731dd319723d9c7c590d003d31152b5389aa9cb356587b7
6
+ metadata.gz: bbdd699cd75aef87f0318a54acb55784ead6e3ea2f603a5c3e5437ef7d358222842d2892f6ec24e0fc59119be3d621d41227497d1bd3cbafdb159612331a4ccd
7
+ data.tar.gz: c26923e8c6c7153fae0dc793eacd6b34e355f78360deee5a47f0c2922076b9e91c5b79c76884f0b3cf0747c93b6c3e2da97090e51fb1114f100ae9d7ae2514d4
data/CHANGELOG.md CHANGED
@@ -2,6 +2,10 @@
2
2
  All notable changes to this project will be documented in this file. This
3
3
  project adheres to [Semantic Versioning](http://semver.org/).
4
4
 
5
+ ## [0.8.1] - 2017-04-10
6
+ ### Fixed
7
+ - Fixed extraction of multiple DOIs separated by Unicode whitespace
8
+
5
9
  ## [0.8.0] - 2017-04-10
6
10
  ### Added
7
11
  - Added support for ISBNs with digits separated by Unicode whitespace and dashes
@@ -47,3 +51,4 @@ project adheres to [Semantic Versioning](http://semver.org/).
47
51
  [0.6.0]: https://github.com/altmetric/identifiers/releases/tag/v0.6.0
48
52
  [0.7.0]: https://github.com/altmetric/identifiers/releases/tag/v0.7.0
49
53
  [0.8.0]: https://github.com/altmetric/identifiers/releases/tag/v0.8.0
54
+ [0.8.1]: https://github.com/altmetric/identifiers/releases/tag/v0.8.1
@@ -6,13 +6,13 @@ module Identifiers
6
6
 
7
7
  def self.extract_post_2007_arxiv_ids(str)
8
8
  str
9
- .scan(%r{(?<=^|\s|/)(?:arXiv:)?\d{4}\.\d{4,5}(?:v\d+)?(?=$|\s)}i)
9
+ .scan(%r{(?<=^|[[:space:]/])(?:arXiv:)?\d{4}\.\d{4,5}(?:v\d+)?(?=$|[[:space:]])}i)
10
10
  .map { |arxiv_id| arxiv_id.sub(/\AarXiv:/i, '') }
11
11
  end
12
12
 
13
13
  def self.extract_pre_2007_arxiv_ids(str)
14
14
  str
15
- .scan(%r{(?<=^|\s|/)(?:arXiv:)?[a-z-]+(?:\.[A-Z]{2})?/\d{2}(?:0[1-9]|1[012])\d{3}(?:v\d+)?(?=$|\s)}i)
15
+ .scan(%r{(?<=^|[[:space:]/])(?:arXiv:)?[a-z-]+(?:\.[A-Z]{2})?/\d{2}(?:0[1-9]|1[012])\d{3}(?:v\d+)?(?=$|[[:space:]])}i)
16
16
  .map { |arxiv_id| arxiv_id.sub(/\AarXiv:/i, '') }
17
17
  end
18
18
  end
@@ -12,18 +12,18 @@ module Identifiers
12
12
  \d{1,7} # ISBN title enumerator and check digit
13
13
  |
14
14
  # DOI
15
- \d{4,9} # Registrant code
16
- / # Prefix/suffix divider
17
- \S+ # DOI suffix
15
+ \d{4,9} # Registrant code
16
+ / # Prefix/suffix divider
17
+ [^[:space:]]+ # DOI suffix
18
18
  )
19
19
  }x
20
20
  VALID_ENDING = /
21
21
  (?:
22
- \p{^Punct} # Non-punctuation character
22
+ \p{^Punct} # Non-punctuation character
23
23
  |
24
- \(.+\) # Balanced parentheses
24
+ \(.+\) # Balanced parentheses
25
25
  |
26
- 2-\# # Early Wiley DOI suffix
26
+ 2-\# # Early Wiley DOI suffix
27
27
  )
28
28
  \z
29
29
  /x
@@ -1,7 +1,7 @@
1
1
  module Identifiers
2
2
  class Handle
3
3
  def self.extract(str)
4
- str.scan(%r{\b[0-9.]+/\S+\b}i)
4
+ str.scan(%r{\b[0-9.]+/[^[:space:]]+\b}i)
5
5
  end
6
6
  end
7
7
  end
@@ -2,7 +2,7 @@ module Identifiers
2
2
  class PubmedId
3
3
  def self.extract(str)
4
4
  str
5
- .scan(/(?<=^|\s)0*(?!0)(\d+)(?=$|\s)/)
5
+ .scan(/(?<=^|[[:space:]])0*(?!0)(\d+)(?=$|[[:space:]])/)
6
6
  .flatten
7
7
  end
8
8
  end
@@ -1,7 +1,7 @@
1
1
  module Identifiers
2
2
  class RepecId
3
3
  def self.extract(str)
4
- str.scan(/\brepec:\S+\b/i).map { |repec| "RePEc:#{repec.split(':', 2).last}" }
4
+ str.scan(/\brepec:[^[:space:]]+\b/i).map { |repec| "RePEc:#{repec.split(':', 2).last}" }
5
5
  end
6
6
  end
7
7
  end
@@ -21,5 +21,13 @@ RSpec.describe Identifiers::ArxivId do
21
21
  it 'does not extract IDs from DOIs that contain a valid arXiv ID' do
22
22
  expect(described_class.extract('10.2310/7290.2014.00033')).to be_empty
23
23
  end
24
+
25
+ it 'extracts a post 2007 arXiv ID surrounded by Unicode whitespace' do
26
+ expect(described_class.extract('Example: arXiv:0706.0001 ')).to contain_exactly('0706.0001')
27
+ end
28
+
29
+ it 'extracts a pre 2007 arXiv ID surrounded by Unicode whitespace' do
30
+ expect(described_class.extract('Example: math.GT/0309136 ')).to contain_exactly('math.GT/0309136')
31
+ end
24
32
  end
25
33
  end
@@ -110,4 +110,12 @@ RSpec.describe Identifiers::DOI do
110
110
  it 'does not extract DOIs with purely punctuation suffixes' do
111
111
  expect(described_class.extract('10.1130/!).",')).to be_empty
112
112
  end
113
+
114
+ it 'extracts DOIs with emoji in them' do
115
+ expect(described_class.extract('10.1234/🐔💩123🐔🐔🐔123')).to contain_exactly('10.1234/🐔💩123🐔🐔🐔123')
116
+ end
117
+
118
+ it 'extracts DOIs separated by Unicode whitespace' do
119
+ expect(described_class.extract('10.1234/foo  10.1234/bar')).to contain_exactly('10.1234/foo', '10.1234/bar')
120
+ end
113
121
  end
@@ -12,4 +12,10 @@ RSpec.describe Identifiers::Handle do
12
12
 
13
13
  expect(described_class.extract(str)).to contain_exactly('2117/83545it.ly/1UtXnTW')
14
14
  end
15
+
16
+ it 'extracts Handles separated by Unicode whitespace' do
17
+ str = '10149/596901 10251/79612'
18
+
19
+ expect(described_class.extract(str)).to contain_exactly('10149/596901', '10251/79612')
20
+ end
15
21
  end
@@ -18,4 +18,8 @@ RSpec.describe Identifiers::PubmedId do
18
18
  it 'does not consider 0 as a valid Pubmed ID' do
19
19
  expect(described_class.extract("00000000")).to be_empty
20
20
  end
21
+
22
+ it 'extracts PubMed IDs separated by Unicode whitespace' do
23
+ expect(described_class.extract('123 456')).to contain_exactly('123', '456')
24
+ end
21
25
  end
@@ -12,4 +12,10 @@ RSpec.describe Identifiers::RepecId do
12
12
 
13
13
  expect(described_class.extract(str)).to contain_exactly('RePEc:wbk:wbpubs:2266', 'RePEc:inn:wpaper:2016-03')
14
14
  end
15
+
16
+ it 'extracts RePEc IDs separated by Unicode whitespace' do
17
+ str = "RePEc:wbk:wbpubs:2266 RePEc:inn:wpaper:2016-03"
18
+
19
+ expect(described_class.extract(str)).to contain_exactly('RePEc:wbk:wbpubs:2266', 'RePEc:inn:wpaper:2016-03')
20
+ end
15
21
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: identifiers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.8.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Hernandez