identifiers 0.8.0 → 0.8.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1d323ecfde9659ed9a795b2b2519450d33ec77c6
4
- data.tar.gz: 391573ad4170738967c32597eee590554f47d59a
3
+ metadata.gz: ade5848785ab153a6cb5e1b2cffdd05958879943
4
+ data.tar.gz: 2b0b6fa55d97c5ae2990d606b210168d33dc5dae
5
5
  SHA512:
6
- metadata.gz: b50b32b55b82b97b3d07bc6613c064ffadf59c760f51ba9b26b1e200c4efe883747bb47b89e889597da2f56e426138412acb21797eddc9da699f94829d65bf17
7
- data.tar.gz: 509003d4b24d3cde4b350b5198384d42c5232ac3417007bdc9a8823e07f1551a61ba6c08553fea19f731dd319723d9c7c590d003d31152b5389aa9cb356587b7
6
+ metadata.gz: bbdd699cd75aef87f0318a54acb55784ead6e3ea2f603a5c3e5437ef7d358222842d2892f6ec24e0fc59119be3d621d41227497d1bd3cbafdb159612331a4ccd
7
+ data.tar.gz: c26923e8c6c7153fae0dc793eacd6b34e355f78360deee5a47f0c2922076b9e91c5b79c76884f0b3cf0747c93b6c3e2da97090e51fb1114f100ae9d7ae2514d4
data/CHANGELOG.md CHANGED
@@ -2,6 +2,10 @@
2
2
  All notable changes to this project will be documented in this file. This
3
3
  project adheres to [Semantic Versioning](http://semver.org/).
4
4
 
5
+ ## [0.8.1] - 2017-04-10
6
+ ### Fixed
7
+ - Fixed extraction of multiple DOIs separated by Unicode whitespace
8
+
5
9
  ## [0.8.0] - 2017-04-10
6
10
  ### Added
7
11
  - Added support for ISBNs with digits separated by Unicode whitespace and dashes
@@ -47,3 +51,4 @@ project adheres to [Semantic Versioning](http://semver.org/).
47
51
  [0.6.0]: https://github.com/altmetric/identifiers/releases/tag/v0.6.0
48
52
  [0.7.0]: https://github.com/altmetric/identifiers/releases/tag/v0.7.0
49
53
  [0.8.0]: https://github.com/altmetric/identifiers/releases/tag/v0.8.0
54
+ [0.8.1]: https://github.com/altmetric/identifiers/releases/tag/v0.8.1
@@ -6,13 +6,13 @@ module Identifiers
6
6
 
7
7
  def self.extract_post_2007_arxiv_ids(str)
8
8
  str
9
- .scan(%r{(?<=^|\s|/)(?:arXiv:)?\d{4}\.\d{4,5}(?:v\d+)?(?=$|\s)}i)
9
+ .scan(%r{(?<=^|[[:space:]/])(?:arXiv:)?\d{4}\.\d{4,5}(?:v\d+)?(?=$|[[:space:]])}i)
10
10
  .map { |arxiv_id| arxiv_id.sub(/\AarXiv:/i, '') }
11
11
  end
12
12
 
13
13
  def self.extract_pre_2007_arxiv_ids(str)
14
14
  str
15
- .scan(%r{(?<=^|\s|/)(?:arXiv:)?[a-z-]+(?:\.[A-Z]{2})?/\d{2}(?:0[1-9]|1[012])\d{3}(?:v\d+)?(?=$|\s)}i)
15
+ .scan(%r{(?<=^|[[:space:]/])(?:arXiv:)?[a-z-]+(?:\.[A-Z]{2})?/\d{2}(?:0[1-9]|1[012])\d{3}(?:v\d+)?(?=$|[[:space:]])}i)
16
16
  .map { |arxiv_id| arxiv_id.sub(/\AarXiv:/i, '') }
17
17
  end
18
18
  end
@@ -12,18 +12,18 @@ module Identifiers
12
12
  \d{1,7} # ISBN title enumerator and check digit
13
13
  |
14
14
  # DOI
15
- \d{4,9} # Registrant code
16
- / # Prefix/suffix divider
17
- \S+ # DOI suffix
15
+ \d{4,9} # Registrant code
16
+ / # Prefix/suffix divider
17
+ [^[:space:]]+ # DOI suffix
18
18
  )
19
19
  }x
20
20
  VALID_ENDING = /
21
21
  (?:
22
- \p{^Punct} # Non-punctuation character
22
+ \p{^Punct} # Non-punctuation character
23
23
  |
24
- \(.+\) # Balanced parentheses
24
+ \(.+\) # Balanced parentheses
25
25
  |
26
- 2-\# # Early Wiley DOI suffix
26
+ 2-\# # Early Wiley DOI suffix
27
27
  )
28
28
  \z
29
29
  /x
@@ -1,7 +1,7 @@
1
1
  module Identifiers
2
2
  class Handle
3
3
  def self.extract(str)
4
- str.scan(%r{\b[0-9.]+/\S+\b}i)
4
+ str.scan(%r{\b[0-9.]+/[^[:space:]]+\b}i)
5
5
  end
6
6
  end
7
7
  end
@@ -2,7 +2,7 @@ module Identifiers
2
2
  class PubmedId
3
3
  def self.extract(str)
4
4
  str
5
- .scan(/(?<=^|\s)0*(?!0)(\d+)(?=$|\s)/)
5
+ .scan(/(?<=^|[[:space:]])0*(?!0)(\d+)(?=$|[[:space:]])/)
6
6
  .flatten
7
7
  end
8
8
  end
@@ -1,7 +1,7 @@
1
1
  module Identifiers
2
2
  class RepecId
3
3
  def self.extract(str)
4
- str.scan(/\brepec:\S+\b/i).map { |repec| "RePEc:#{repec.split(':', 2).last}" }
4
+ str.scan(/\brepec:[^[:space:]]+\b/i).map { |repec| "RePEc:#{repec.split(':', 2).last}" }
5
5
  end
6
6
  end
7
7
  end
@@ -21,5 +21,13 @@ RSpec.describe Identifiers::ArxivId do
21
21
  it 'does not extract IDs from DOIs that contain a valid arXiv ID' do
22
22
  expect(described_class.extract('10.2310/7290.2014.00033')).to be_empty
23
23
  end
24
+
25
+ it 'extracts a post 2007 arXiv ID surrounded by Unicode whitespace' do
26
+ expect(described_class.extract('Example: arXiv:0706.0001 ')).to contain_exactly('0706.0001')
27
+ end
28
+
29
+ it 'extracts a pre 2007 arXiv ID surrounded by Unicode whitespace' do
30
+ expect(described_class.extract('Example: math.GT/0309136 ')).to contain_exactly('math.GT/0309136')
31
+ end
24
32
  end
25
33
  end
@@ -110,4 +110,12 @@ RSpec.describe Identifiers::DOI do
110
110
  it 'does not extract DOIs with purely punctuation suffixes' do
111
111
  expect(described_class.extract('10.1130/!).",')).to be_empty
112
112
  end
113
+
114
+ it 'extracts DOIs with emoji in them' do
115
+ expect(described_class.extract('10.1234/🐔💩123🐔🐔🐔123')).to contain_exactly('10.1234/🐔💩123🐔🐔🐔123')
116
+ end
117
+
118
+ it 'extracts DOIs separated by Unicode whitespace' do
119
+ expect(described_class.extract('10.1234/foo  10.1234/bar')).to contain_exactly('10.1234/foo', '10.1234/bar')
120
+ end
113
121
  end
@@ -12,4 +12,10 @@ RSpec.describe Identifiers::Handle do
12
12
 
13
13
  expect(described_class.extract(str)).to contain_exactly('2117/83545it.ly/1UtXnTW')
14
14
  end
15
+
16
+ it 'extracts Handles separated by Unicode whitespace' do
17
+ str = '10149/596901 10251/79612'
18
+
19
+ expect(described_class.extract(str)).to contain_exactly('10149/596901', '10251/79612')
20
+ end
15
21
  end
@@ -18,4 +18,8 @@ RSpec.describe Identifiers::PubmedId do
18
18
  it 'does not consider 0 as a valid Pubmed ID' do
19
19
  expect(described_class.extract("00000000")).to be_empty
20
20
  end
21
+
22
+ it 'extracts PubMed IDs separated by Unicode whitespace' do
23
+ expect(described_class.extract('123 456')).to contain_exactly('123', '456')
24
+ end
21
25
  end
@@ -12,4 +12,10 @@ RSpec.describe Identifiers::RepecId do
12
12
 
13
13
  expect(described_class.extract(str)).to contain_exactly('RePEc:wbk:wbpubs:2266', 'RePEc:inn:wpaper:2016-03')
14
14
  end
15
+
16
+ it 'extracts RePEc IDs separated by Unicode whitespace' do
17
+ str = "RePEc:wbk:wbpubs:2266 RePEc:inn:wpaper:2016-03"
18
+
19
+ expect(described_class.extract(str)).to contain_exactly('RePEc:wbk:wbpubs:2266', 'RePEc:inn:wpaper:2016-03')
20
+ end
15
21
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: identifiers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.8.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Hernandez