identifiers 0.8.0 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/lib/identifiers/arxiv_id.rb +2 -2
- data/lib/identifiers/doi.rb +6 -6
- data/lib/identifiers/handle.rb +1 -1
- data/lib/identifiers/pubmed_id.rb +1 -1
- data/lib/identifiers/repec_id.rb +1 -1
- data/spec/identifiers/arxiv_id_spec.rb +8 -0
- data/spec/identifiers/doi_spec.rb +8 -0
- data/spec/identifiers/handle_spec.rb +6 -0
- data/spec/identifiers/pubmed_id_spec.rb +4 -0
- data/spec/identifiers/repec_id_spec.rb +6 -0
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ade5848785ab153a6cb5e1b2cffdd05958879943
|
4
|
+
data.tar.gz: 2b0b6fa55d97c5ae2990d606b210168d33dc5dae
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bbdd699cd75aef87f0318a54acb55784ead6e3ea2f603a5c3e5437ef7d358222842d2892f6ec24e0fc59119be3d621d41227497d1bd3cbafdb159612331a4ccd
|
7
|
+
data.tar.gz: c26923e8c6c7153fae0dc793eacd6b34e355f78360deee5a47f0c2922076b9e91c5b79c76884f0b3cf0747c93b6c3e2da97090e51fb1114f100ae9d7ae2514d4
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,10 @@
|
|
2
2
|
All notable changes to this project will be documented in this file. This
|
3
3
|
project adheres to [Semantic Versioning](http://semver.org/).
|
4
4
|
|
5
|
+
## [0.8.1] - 2017-04-10
|
6
|
+
### Fixed
|
7
|
+
- Fixed extraction of multiple DOIs separated by Unicode whitespace
|
8
|
+
|
5
9
|
## [0.8.0] - 2017-04-10
|
6
10
|
### Added
|
7
11
|
- Added support for ISBNs with digits separated by Unicode whitespace and dashes
|
@@ -47,3 +51,4 @@ project adheres to [Semantic Versioning](http://semver.org/).
|
|
47
51
|
[0.6.0]: https://github.com/altmetric/identifiers/releases/tag/v0.6.0
|
48
52
|
[0.7.0]: https://github.com/altmetric/identifiers/releases/tag/v0.7.0
|
49
53
|
[0.8.0]: https://github.com/altmetric/identifiers/releases/tag/v0.8.0
|
54
|
+
[0.8.1]: https://github.com/altmetric/identifiers/releases/tag/v0.8.1
|
data/lib/identifiers/arxiv_id.rb
CHANGED
@@ -6,13 +6,13 @@ module Identifiers
|
|
6
6
|
|
7
7
|
def self.extract_post_2007_arxiv_ids(str)
|
8
8
|
str
|
9
|
-
.scan(%r{(
|
9
|
+
.scan(%r{(?<=^|[[:space:]/])(?:arXiv:)?\d{4}\.\d{4,5}(?:v\d+)?(?=$|[[:space:]])}i)
|
10
10
|
.map { |arxiv_id| arxiv_id.sub(/\AarXiv:/i, '') }
|
11
11
|
end
|
12
12
|
|
13
13
|
def self.extract_pre_2007_arxiv_ids(str)
|
14
14
|
str
|
15
|
-
.scan(%r{(
|
15
|
+
.scan(%r{(?<=^|[[:space:]/])(?:arXiv:)?[a-z-]+(?:\.[A-Z]{2})?/\d{2}(?:0[1-9]|1[012])\d{3}(?:v\d+)?(?=$|[[:space:]])}i)
|
16
16
|
.map { |arxiv_id| arxiv_id.sub(/\AarXiv:/i, '') }
|
17
17
|
end
|
18
18
|
end
|
data/lib/identifiers/doi.rb
CHANGED
@@ -12,18 +12,18 @@ module Identifiers
|
|
12
12
|
\d{1,7} # ISBN title enumerator and check digit
|
13
13
|
|
|
14
14
|
# DOI
|
15
|
-
\d{4,9}
|
16
|
-
/
|
17
|
-
|
15
|
+
\d{4,9} # Registrant code
|
16
|
+
/ # Prefix/suffix divider
|
17
|
+
[^[:space:]]+ # DOI suffix
|
18
18
|
)
|
19
19
|
}x
|
20
20
|
VALID_ENDING = /
|
21
21
|
(?:
|
22
|
-
\p{^Punct}
|
22
|
+
\p{^Punct} # Non-punctuation character
|
23
23
|
|
|
24
|
-
\(.+\)
|
24
|
+
\(.+\) # Balanced parentheses
|
25
25
|
|
|
26
|
-
2-\#
|
26
|
+
2-\# # Early Wiley DOI suffix
|
27
27
|
)
|
28
28
|
\z
|
29
29
|
/x
|
data/lib/identifiers/handle.rb
CHANGED
data/lib/identifiers/repec_id.rb
CHANGED
@@ -21,5 +21,13 @@ RSpec.describe Identifiers::ArxivId do
|
|
21
21
|
it 'does not extract IDs from DOIs that contain a valid arXiv ID' do
|
22
22
|
expect(described_class.extract('10.2310/7290.2014.00033')).to be_empty
|
23
23
|
end
|
24
|
+
|
25
|
+
it 'extracts a post 2007 arXiv ID surrounded by Unicode whitespace' do
|
26
|
+
expect(described_class.extract('Example: arXiv:0706.0001 ')).to contain_exactly('0706.0001')
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'extracts a pre 2007 arXiv ID surrounded by Unicode whitespace' do
|
30
|
+
expect(described_class.extract('Example: math.GT/0309136 ')).to contain_exactly('math.GT/0309136')
|
31
|
+
end
|
24
32
|
end
|
25
33
|
end
|
@@ -110,4 +110,12 @@ RSpec.describe Identifiers::DOI do
|
|
110
110
|
it 'does not extract DOIs with purely punctuation suffixes' do
|
111
111
|
expect(described_class.extract('10.1130/!).",')).to be_empty
|
112
112
|
end
|
113
|
+
|
114
|
+
it 'extracts DOIs with emoji in them' do
|
115
|
+
expect(described_class.extract('10.1234/🐔💩123🐔🐔🐔123')).to contain_exactly('10.1234/🐔💩123🐔🐔🐔123')
|
116
|
+
end
|
117
|
+
|
118
|
+
it 'extracts DOIs separated by Unicode whitespace' do
|
119
|
+
expect(described_class.extract('10.1234/foo 10.1234/bar')).to contain_exactly('10.1234/foo', '10.1234/bar')
|
120
|
+
end
|
113
121
|
end
|
@@ -12,4 +12,10 @@ RSpec.describe Identifiers::Handle do
|
|
12
12
|
|
13
13
|
expect(described_class.extract(str)).to contain_exactly('2117/83545it.ly/1UtXnTW')
|
14
14
|
end
|
15
|
+
|
16
|
+
it 'extracts Handles separated by Unicode whitespace' do
|
17
|
+
str = '10149/596901 10251/79612'
|
18
|
+
|
19
|
+
expect(described_class.extract(str)).to contain_exactly('10149/596901', '10251/79612')
|
20
|
+
end
|
15
21
|
end
|
@@ -18,4 +18,8 @@ RSpec.describe Identifiers::PubmedId do
|
|
18
18
|
it 'does not consider 0 as a valid Pubmed ID' do
|
19
19
|
expect(described_class.extract("00000000")).to be_empty
|
20
20
|
end
|
21
|
+
|
22
|
+
it 'extracts PubMed IDs separated by Unicode whitespace' do
|
23
|
+
expect(described_class.extract('123 456')).to contain_exactly('123', '456')
|
24
|
+
end
|
21
25
|
end
|
@@ -12,4 +12,10 @@ RSpec.describe Identifiers::RepecId do
|
|
12
12
|
|
13
13
|
expect(described_class.extract(str)).to contain_exactly('RePEc:wbk:wbpubs:2266', 'RePEc:inn:wpaper:2016-03')
|
14
14
|
end
|
15
|
+
|
16
|
+
it 'extracts RePEc IDs separated by Unicode whitespace' do
|
17
|
+
str = "RePEc:wbk:wbpubs:2266 RePEc:inn:wpaper:2016-03"
|
18
|
+
|
19
|
+
expect(described_class.extract(str)).to contain_exactly('RePEc:wbk:wbpubs:2266', 'RePEc:inn:wpaper:2016-03')
|
20
|
+
end
|
15
21
|
end
|