identifiers 0.8.0 → 0.8.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/lib/identifiers/arxiv_id.rb +2 -2
- data/lib/identifiers/doi.rb +6 -6
- data/lib/identifiers/handle.rb +1 -1
- data/lib/identifiers/pubmed_id.rb +1 -1
- data/lib/identifiers/repec_id.rb +1 -1
- data/spec/identifiers/arxiv_id_spec.rb +8 -0
- data/spec/identifiers/doi_spec.rb +8 -0
- data/spec/identifiers/handle_spec.rb +6 -0
- data/spec/identifiers/pubmed_id_spec.rb +4 -0
- data/spec/identifiers/repec_id_spec.rb +6 -0
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ade5848785ab153a6cb5e1b2cffdd05958879943
|
4
|
+
data.tar.gz: 2b0b6fa55d97c5ae2990d606b210168d33dc5dae
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bbdd699cd75aef87f0318a54acb55784ead6e3ea2f603a5c3e5437ef7d358222842d2892f6ec24e0fc59119be3d621d41227497d1bd3cbafdb159612331a4ccd
|
7
|
+
data.tar.gz: c26923e8c6c7153fae0dc793eacd6b34e355f78360deee5a47f0c2922076b9e91c5b79c76884f0b3cf0747c93b6c3e2da97090e51fb1114f100ae9d7ae2514d4
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,10 @@
|
|
2
2
|
All notable changes to this project will be documented in this file. This
|
3
3
|
project adheres to [Semantic Versioning](http://semver.org/).
|
4
4
|
|
5
|
+
## [0.8.1] - 2017-04-10
|
6
|
+
### Fixed
|
7
|
+
- Fixed extraction of multiple DOIs separated by Unicode whitespace
|
8
|
+
|
5
9
|
## [0.8.0] - 2017-04-10
|
6
10
|
### Added
|
7
11
|
- Added support for ISBNs with digits separated by Unicode whitespace and dashes
|
@@ -47,3 +51,4 @@ project adheres to [Semantic Versioning](http://semver.org/).
|
|
47
51
|
[0.6.0]: https://github.com/altmetric/identifiers/releases/tag/v0.6.0
|
48
52
|
[0.7.0]: https://github.com/altmetric/identifiers/releases/tag/v0.7.0
|
49
53
|
[0.8.0]: https://github.com/altmetric/identifiers/releases/tag/v0.8.0
|
54
|
+
[0.8.1]: https://github.com/altmetric/identifiers/releases/tag/v0.8.1
|
data/lib/identifiers/arxiv_id.rb
CHANGED
@@ -6,13 +6,13 @@ module Identifiers
|
|
6
6
|
|
7
7
|
def self.extract_post_2007_arxiv_ids(str)
|
8
8
|
str
|
9
|
-
.scan(%r{(
|
9
|
+
.scan(%r{(?<=^|[[:space:]/])(?:arXiv:)?\d{4}\.\d{4,5}(?:v\d+)?(?=$|[[:space:]])}i)
|
10
10
|
.map { |arxiv_id| arxiv_id.sub(/\AarXiv:/i, '') }
|
11
11
|
end
|
12
12
|
|
13
13
|
def self.extract_pre_2007_arxiv_ids(str)
|
14
14
|
str
|
15
|
-
.scan(%r{(
|
15
|
+
.scan(%r{(?<=^|[[:space:]/])(?:arXiv:)?[a-z-]+(?:\.[A-Z]{2})?/\d{2}(?:0[1-9]|1[012])\d{3}(?:v\d+)?(?=$|[[:space:]])}i)
|
16
16
|
.map { |arxiv_id| arxiv_id.sub(/\AarXiv:/i, '') }
|
17
17
|
end
|
18
18
|
end
|
data/lib/identifiers/doi.rb
CHANGED
@@ -12,18 +12,18 @@ module Identifiers
|
|
12
12
|
\d{1,7} # ISBN title enumerator and check digit
|
13
13
|
|
|
14
14
|
# DOI
|
15
|
-
\d{4,9}
|
16
|
-
/
|
17
|
-
|
15
|
+
\d{4,9} # Registrant code
|
16
|
+
/ # Prefix/suffix divider
|
17
|
+
[^[:space:]]+ # DOI suffix
|
18
18
|
)
|
19
19
|
}x
|
20
20
|
VALID_ENDING = /
|
21
21
|
(?:
|
22
|
-
\p{^Punct}
|
22
|
+
\p{^Punct} # Non-punctuation character
|
23
23
|
|
|
24
|
-
\(.+\)
|
24
|
+
\(.+\) # Balanced parentheses
|
25
25
|
|
|
26
|
-
2-\#
|
26
|
+
2-\# # Early Wiley DOI suffix
|
27
27
|
)
|
28
28
|
\z
|
29
29
|
/x
|
data/lib/identifiers/handle.rb
CHANGED
data/lib/identifiers/repec_id.rb
CHANGED
@@ -21,5 +21,13 @@ RSpec.describe Identifiers::ArxivId do
|
|
21
21
|
it 'does not extract IDs from DOIs that contain a valid arXiv ID' do
|
22
22
|
expect(described_class.extract('10.2310/7290.2014.00033')).to be_empty
|
23
23
|
end
|
24
|
+
|
25
|
+
it 'extracts a post 2007 arXiv ID surrounded by Unicode whitespace' do
|
26
|
+
expect(described_class.extract('Example: arXiv:0706.0001 ')).to contain_exactly('0706.0001')
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'extracts a pre 2007 arXiv ID surrounded by Unicode whitespace' do
|
30
|
+
expect(described_class.extract('Example: math.GT/0309136 ')).to contain_exactly('math.GT/0309136')
|
31
|
+
end
|
24
32
|
end
|
25
33
|
end
|
@@ -110,4 +110,12 @@ RSpec.describe Identifiers::DOI do
|
|
110
110
|
it 'does not extract DOIs with purely punctuation suffixes' do
|
111
111
|
expect(described_class.extract('10.1130/!).",')).to be_empty
|
112
112
|
end
|
113
|
+
|
114
|
+
it 'extracts DOIs with emoji in them' do
|
115
|
+
expect(described_class.extract('10.1234/🐔💩123🐔🐔🐔123')).to contain_exactly('10.1234/🐔💩123🐔🐔🐔123')
|
116
|
+
end
|
117
|
+
|
118
|
+
it 'extracts DOIs separated by Unicode whitespace' do
|
119
|
+
expect(described_class.extract('10.1234/foo 10.1234/bar')).to contain_exactly('10.1234/foo', '10.1234/bar')
|
120
|
+
end
|
113
121
|
end
|
@@ -12,4 +12,10 @@ RSpec.describe Identifiers::Handle do
|
|
12
12
|
|
13
13
|
expect(described_class.extract(str)).to contain_exactly('2117/83545it.ly/1UtXnTW')
|
14
14
|
end
|
15
|
+
|
16
|
+
it 'extracts Handles separated by Unicode whitespace' do
|
17
|
+
str = '10149/596901 10251/79612'
|
18
|
+
|
19
|
+
expect(described_class.extract(str)).to contain_exactly('10149/596901', '10251/79612')
|
20
|
+
end
|
15
21
|
end
|
@@ -18,4 +18,8 @@ RSpec.describe Identifiers::PubmedId do
|
|
18
18
|
it 'does not consider 0 as a valid Pubmed ID' do
|
19
19
|
expect(described_class.extract("00000000")).to be_empty
|
20
20
|
end
|
21
|
+
|
22
|
+
it 'extracts PubMed IDs separated by Unicode whitespace' do
|
23
|
+
expect(described_class.extract('123 456')).to contain_exactly('123', '456')
|
24
|
+
end
|
21
25
|
end
|
@@ -12,4 +12,10 @@ RSpec.describe Identifiers::RepecId do
|
|
12
12
|
|
13
13
|
expect(described_class.extract(str)).to contain_exactly('RePEc:wbk:wbpubs:2266', 'RePEc:inn:wpaper:2016-03')
|
14
14
|
end
|
15
|
+
|
16
|
+
it 'extracts RePEc IDs separated by Unicode whitespace' do
|
17
|
+
str = "RePEc:wbk:wbpubs:2266 RePEc:inn:wpaper:2016-03"
|
18
|
+
|
19
|
+
expect(described_class.extract(str)).to contain_exactly('RePEc:wbk:wbpubs:2266', 'RePEc:inn:wpaper:2016-03')
|
20
|
+
end
|
15
21
|
end
|