identifiers 0.9.1 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/lib/identifiers/isbn.rb +99 -99
- data/lib/identifiers/pubmed_id.rb +11 -7
- data/spec/identifiers/isbn_spec.rb +97 -97
- data/spec/identifiers/pubmed_id_spec.rb +99 -29
- data/spec/spec_helper.rb +18 -18
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 612736c08372d3108c5b62dbab508417c3b67a73
|
4
|
+
data.tar.gz: a693d991efe3c913cce2fe6fc4496999c0883103
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 19ca1c46ff464e700b3158794edbcdb677d25fa2e3b7f557595d897e430a84e725d0d2a0ce0e8904af713c9adead71d26ff0dd7c72b788a033c451fec31ba73b
|
7
|
+
data.tar.gz: cfcfc156b3ca04b2becf36f810a0a6b3ad4e402a3eb6d1b2ad4642091b91c20cd64e5e739d30f2e4fb35b849c583e08cab5d4a8e06996ced17db3a593b6c61a8
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,10 @@
|
|
2
2
|
All notable changes to this project will be documented in this file. This
|
3
3
|
project adheres to [Semantic Versioning](http://semver.org/).
|
4
4
|
|
5
|
+
## [0.10.0] - 2017-12-20
|
6
|
+
### Added
|
7
|
+
- Extract PubMed IDs from URLs (e.g https://www.ncbi.nlm.nih.gov/pubmed/123456) and URIs with schemes `pmid:` and `info:pmid`
|
8
|
+
|
5
9
|
## [0.9.1] - 2017-08-01
|
6
10
|
### Fixed
|
7
11
|
- Don't extract duplicate ISBN-10s from within ISBN-13s
|
@@ -63,3 +67,4 @@ project adheres to [Semantic Versioning](http://semver.org/).
|
|
63
67
|
[0.8.1]: https://github.com/altmetric/identifiers/releases/tag/v0.8.1
|
64
68
|
[0.9.0]: https://github.com/altmetric/identifiers/releases/tag/v0.9.0
|
65
69
|
[0.9.1]: https://github.com/altmetric/identifiers/releases/tag/v0.9.1
|
70
|
+
[0.10.0]: https://github.com/altmetric/identifiers/releases/tag/v0.10.0
|
data/lib/identifiers/isbn.rb
CHANGED
@@ -1,99 +1,99 @@
|
|
1
|
-
module Identifiers
|
2
|
-
class ISBN
|
3
|
-
ISBN_13_REGEXP = /
|
4
|
-
\b
|
5
|
-
97[89] # ISBN (GS1) Bookland prefix
|
6
|
-
[\p{Pd}\p{Zs}]? # Optional hyphenation
|
7
|
-
(?:
|
8
|
-
\d # Digit
|
9
|
-
[\p{Pd}\p{Zs}]? # Optional hyphenation
|
10
|
-
){9}
|
11
|
-
\d # Check digit
|
12
|
-
\b
|
13
|
-
/x
|
14
|
-
ISBN_10_REGEXP = /
|
15
|
-
(?<! # Don't match a hyphenated or spaced ISBN-13
|
16
|
-
97[89]
|
17
|
-
[\p{Pd}\p{Zs}]
|
18
|
-
)
|
19
|
-
\b
|
20
|
-
(?:
|
21
|
-
\d # Digit
|
22
|
-
[\p{Pd}\p{Zs}]? # Optional hyphenation
|
23
|
-
){9}
|
24
|
-
[\dX] # Check digit
|
25
|
-
\b
|
26
|
-
/x
|
27
|
-
ISBN_A_REGEXP = %r{
|
28
|
-
\b
|
29
|
-
(?<=10\.) # Directory indicator (always 10)
|
30
|
-
97[89]\. # ISBN (GS1) Bookland prefix
|
31
|
-
\d{2,8} # ISBN registration group element and publisher prefix
|
32
|
-
/ # Prefix/suffix divider
|
33
|
-
\d{1,7} # ISBN title enumerator and check digit
|
34
|
-
\b
|
35
|
-
}x
|
36
|
-
|
37
|
-
def self.extract(str)
|
38
|
-
extract_isbn_as(str) + extract_thirteen_digit_isbns(str) + extract_ten_digit_isbns(str)
|
39
|
-
end
|
40
|
-
|
41
|
-
def self.extract_isbn_as(str)
|
42
|
-
extract_thirteen_digit_isbns(str.to_s.scan(ISBN_A_REGEXP).join("\n").tr('/.', ''))
|
43
|
-
end
|
44
|
-
|
45
|
-
def self.extract_thirteen_digit_isbns(str)
|
46
|
-
str
|
47
|
-
.to_s
|
48
|
-
.scan(ISBN_13_REGEXP)
|
49
|
-
.map { |isbn| isbn.gsub(/[\p{Pd}\p{Zs}]/, '') }
|
50
|
-
.select { |isbn| valid_isbn_13?(isbn) }
|
51
|
-
end
|
52
|
-
|
53
|
-
def self.extract_ten_digit_isbns(str)
|
54
|
-
str
|
55
|
-
.to_s
|
56
|
-
.scan(ISBN_10_REGEXP)
|
57
|
-
.map { |isbn| isbn.gsub(/[\p{Pd}\p{Zs}]/, '') }
|
58
|
-
.select { |isbn| valid_isbn_10?(isbn) }
|
59
|
-
.map { |isbn|
|
60
|
-
isbn.chop!
|
61
|
-
isbn.prepend('978')
|
62
|
-
isbn << isbn_13_check_digit(isbn).to_s
|
63
|
-
|
64
|
-
isbn
|
65
|
-
}
|
66
|
-
end
|
67
|
-
|
68
|
-
def self.isbn_13_check_digit(isbn)
|
69
|
-
sum = digits_of(isbn).zip([1, 3].cycle).map { |digit, weight| digit * weight }.reduce(:+)
|
70
|
-
check_digit = 10 - (sum % 10)
|
71
|
-
|
72
|
-
if check_digit == 10
|
73
|
-
0
|
74
|
-
else
|
75
|
-
check_digit
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
def self.valid_isbn_13?(isbn)
|
80
|
-
return false unless isbn =~ ISBN_13_REGEXP
|
81
|
-
|
82
|
-
result = digits_of(isbn).zip([1, 3].cycle).map { |digit, weight| digit * weight }.reduce(:+)
|
83
|
-
|
84
|
-
(result % 10).zero?
|
85
|
-
end
|
86
|
-
|
87
|
-
def self.valid_isbn_10?(isbn)
|
88
|
-
return false unless isbn =~ ISBN_10_REGEXP
|
89
|
-
|
90
|
-
result = digits_of(isbn).with_index.map { |digit, weight| digit * weight.succ }.reduce(:+)
|
91
|
-
|
92
|
-
(result % 11).zero?
|
93
|
-
end
|
94
|
-
|
95
|
-
def self.digits_of(isbn)
|
96
|
-
isbn.to_s.each_char.map { |char| char == 'X' ? 10 : Integer(char) }.to_enum
|
97
|
-
end
|
98
|
-
end
|
99
|
-
end
|
1
|
+
module Identifiers
|
2
|
+
class ISBN
|
3
|
+
ISBN_13_REGEXP = /
|
4
|
+
\b
|
5
|
+
97[89] # ISBN (GS1) Bookland prefix
|
6
|
+
[\p{Pd}\p{Zs}]? # Optional hyphenation
|
7
|
+
(?:
|
8
|
+
\d # Digit
|
9
|
+
[\p{Pd}\p{Zs}]? # Optional hyphenation
|
10
|
+
){9}
|
11
|
+
\d # Check digit
|
12
|
+
\b
|
13
|
+
/x
|
14
|
+
ISBN_10_REGEXP = /
|
15
|
+
(?<! # Don't match a hyphenated or spaced ISBN-13
|
16
|
+
97[89]
|
17
|
+
[\p{Pd}\p{Zs}]
|
18
|
+
)
|
19
|
+
\b
|
20
|
+
(?:
|
21
|
+
\d # Digit
|
22
|
+
[\p{Pd}\p{Zs}]? # Optional hyphenation
|
23
|
+
){9}
|
24
|
+
[\dX] # Check digit
|
25
|
+
\b
|
26
|
+
/x
|
27
|
+
ISBN_A_REGEXP = %r{
|
28
|
+
\b
|
29
|
+
(?<=10\.) # Directory indicator (always 10)
|
30
|
+
97[89]\. # ISBN (GS1) Bookland prefix
|
31
|
+
\d{2,8} # ISBN registration group element and publisher prefix
|
32
|
+
/ # Prefix/suffix divider
|
33
|
+
\d{1,7} # ISBN title enumerator and check digit
|
34
|
+
\b
|
35
|
+
}x
|
36
|
+
|
37
|
+
def self.extract(str)
|
38
|
+
extract_isbn_as(str) + extract_thirteen_digit_isbns(str) + extract_ten_digit_isbns(str)
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.extract_isbn_as(str)
|
42
|
+
extract_thirteen_digit_isbns(str.to_s.scan(ISBN_A_REGEXP).join("\n").tr('/.', ''))
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.extract_thirteen_digit_isbns(str)
|
46
|
+
str
|
47
|
+
.to_s
|
48
|
+
.scan(ISBN_13_REGEXP)
|
49
|
+
.map { |isbn| isbn.gsub(/[\p{Pd}\p{Zs}]/, '') }
|
50
|
+
.select { |isbn| valid_isbn_13?(isbn) }
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.extract_ten_digit_isbns(str)
|
54
|
+
str
|
55
|
+
.to_s
|
56
|
+
.scan(ISBN_10_REGEXP)
|
57
|
+
.map { |isbn| isbn.gsub(/[\p{Pd}\p{Zs}]/, '') }
|
58
|
+
.select { |isbn| valid_isbn_10?(isbn) }
|
59
|
+
.map { |isbn|
|
60
|
+
isbn.chop!
|
61
|
+
isbn.prepend('978')
|
62
|
+
isbn << isbn_13_check_digit(isbn).to_s
|
63
|
+
|
64
|
+
isbn
|
65
|
+
}
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.isbn_13_check_digit(isbn)
|
69
|
+
sum = digits_of(isbn).zip([1, 3].cycle).map { |digit, weight| digit * weight }.reduce(:+)
|
70
|
+
check_digit = 10 - (sum % 10)
|
71
|
+
|
72
|
+
if check_digit == 10
|
73
|
+
0
|
74
|
+
else
|
75
|
+
check_digit
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.valid_isbn_13?(isbn)
|
80
|
+
return false unless isbn =~ ISBN_13_REGEXP
|
81
|
+
|
82
|
+
result = digits_of(isbn).zip([1, 3].cycle).map { |digit, weight| digit * weight }.reduce(:+)
|
83
|
+
|
84
|
+
(result % 10).zero?
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.valid_isbn_10?(isbn)
|
88
|
+
return false unless isbn =~ ISBN_10_REGEXP
|
89
|
+
|
90
|
+
result = digits_of(isbn).with_index.map { |digit, weight| digit * weight.succ }.reduce(:+)
|
91
|
+
|
92
|
+
(result % 11).zero?
|
93
|
+
end
|
94
|
+
|
95
|
+
def self.digits_of(isbn)
|
96
|
+
isbn.to_s.each_char.map { |char| char == 'X' ? 10 : Integer(char) }.to_enum
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -1,7 +1,11 @@
|
|
1
|
-
module Identifiers
|
2
|
-
class PubmedId
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
1
|
+
module Identifiers
|
2
|
+
class PubmedId
|
3
|
+
ZERO_PADDED_NUMBER = %r{(?<=^|[[:space:]])0*(?!0)(\d+)(?=$|[[:space:]])}
|
4
|
+
PUBMED_URL = %r{(?:https?://(?:www\.)?ncbi\.nlm\.nih\.gov/(?:m/)?pubmed/|pmid:|info:pmid/)0*(\d+)}i
|
5
|
+
|
6
|
+
def self.extract(str)
|
7
|
+
str = str.to_s
|
8
|
+
str.scan(ZERO_PADDED_NUMBER).flatten | str.scan(PUBMED_URL).flatten
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
@@ -1,97 +1,97 @@
|
|
1
|
-
require 'identifiers/isbn'
|
2
|
-
|
3
|
-
RSpec.describe Identifiers::ISBN do
|
4
|
-
it 'extracts a ISBN' do
|
5
|
-
expect(described_class.extract('ISBN: 9780805069099')).to contain_exactly('9780805069099')
|
6
|
-
end
|
7
|
-
|
8
|
-
it 'extracts ISBNs when given as a number' do
|
9
|
-
isbn = 9780805069099
|
10
|
-
|
11
|
-
expect(described_class.extract(isbn)).to contain_exactly('9780805069099')
|
12
|
-
end
|
13
|
-
|
14
|
-
it 'normalizes 13-digit ISBNs' do
|
15
|
-
str = "978-0-80-506909-9\n978-0-67-187919-8"
|
16
|
-
|
17
|
-
expect(described_class.extract(str)).to contain_exactly('9780805069099', '9780671879198')
|
18
|
-
end
|
19
|
-
|
20
|
-
it 'extracts multiple ISBN-13s separated by a space' do
|
21
|
-
str = '978-0-80-506909-9 978-0-67-187919-8'
|
22
|
-
|
23
|
-
expect(described_class.extract(str)).to contain_exactly('9780805069099', '9780671879198')
|
24
|
-
end
|
25
|
-
|
26
|
-
it 'extracts ISBNs with hyphens' do
|
27
|
-
expect(described_class.extract('ISBN: 978-0-80-506909-9')).to contain_exactly('9780805069099')
|
28
|
-
end
|
29
|
-
|
30
|
-
it 'extracts ISBNs with Unicode dashes' do
|
31
|
-
expect(described_class.extract('ISBN: 978–0–80–506909–9')).to contain_exactly('9780805069099')
|
32
|
-
end
|
33
|
-
|
34
|
-
it 'extracts ISBNs with spaces' do
|
35
|
-
expect(described_class.extract('ISBN: 978 0 80 506909 9')).to contain_exactly('9780805069099')
|
36
|
-
end
|
37
|
-
|
38
|
-
it 'extracts ISBNs with Unicode spaces' do
|
39
|
-
expect(described_class.extract('ISBN: 978 0 80 506909 9')).to contain_exactly('9780805069099')
|
40
|
-
end
|
41
|
-
|
42
|
-
it 'extracts ISBN-13s from ISBN-As' do
|
43
|
-
expect(described_class.extract('10.978.8898392/315')).to contain_exactly('9788898392315')
|
44
|
-
end
|
45
|
-
|
46
|
-
it 'does not extract invalid ISBNs from ISBN-As' do
|
47
|
-
expect(described_class.extract('10.978.8898392/316')).to be_empty
|
48
|
-
end
|
49
|
-
|
50
|
-
it 'normalizes 10-digit ISBNs' do
|
51
|
-
str = "0-8050-6909-7 \n 2-7594-0269-X"
|
52
|
-
|
53
|
-
expect(described_class.extract(str)).to contain_exactly('9780805069099', '9782759402694')
|
54
|
-
end
|
55
|
-
|
56
|
-
it 'extracts multiple 10-digit ISBNs separated by a space' do
|
57
|
-
str = '0-8050-6909-7 2-7594-0269-X'
|
58
|
-
|
59
|
-
expect(described_class.extract(str)).to contain_exactly('9780805069099', '9782759402694')
|
60
|
-
end
|
61
|
-
|
62
|
-
it 'normalizes 10-digit ISBNs with Unicode dashes' do
|
63
|
-
expect(described_class.extract('0–8050–6909–7')).to contain_exactly('9780805069099')
|
64
|
-
end
|
65
|
-
|
66
|
-
it 'normalizes 10-digit ISBNs with a check digit of 10' do
|
67
|
-
expect(described_class.extract('4423272350')).to contain_exactly('9784423272350')
|
68
|
-
end
|
69
|
-
|
70
|
-
it 'normalizes 10-digit ISBNs with spaces' do
|
71
|
-
expect(described_class.extract('0 8050 6909 7')).to contain_exactly('9780805069099')
|
72
|
-
end
|
73
|
-
|
74
|
-
it 'normalizes 10-digit ISBNs with Unicode spaces' do
|
75
|
-
expect(described_class.extract('0 8050 6909 7')).to contain_exactly('9780805069099')
|
76
|
-
end
|
77
|
-
|
78
|
-
it 'normalizes 10-digit ISBNs with spaces and a check digit of X' do
|
79
|
-
expect(described_class.extract('2 7594 0269 X')).to contain_exactly('9782759402694')
|
80
|
-
end
|
81
|
-
|
82
|
-
it 'does not extract invalid 13-digit ISBNs' do
|
83
|
-
expect(described_class.extract('9783319217280')).to be_empty
|
84
|
-
end
|
85
|
-
|
86
|
-
it 'does not extract invalid 10-digit ISBNs' do
|
87
|
-
expect(described_class.extract('3319217280')).to be_empty
|
88
|
-
end
|
89
|
-
|
90
|
-
it 'does not extract ISBN-10s from hyphenated ISBN-13s' do
|
91
|
-
expect(described_class.extract('978-0-309-57079-4')).to contain_exactly('9780309570794')
|
92
|
-
end
|
93
|
-
|
94
|
-
it 'does not extract ISBN-10s from space-separated ISBN-13s' do
|
95
|
-
expect(described_class.extract('978 0 309 57079 4')).to contain_exactly('9780309570794')
|
96
|
-
end
|
97
|
-
end
|
1
|
+
require 'identifiers/isbn'
|
2
|
+
|
3
|
+
RSpec.describe Identifiers::ISBN do
|
4
|
+
it 'extracts a ISBN' do
|
5
|
+
expect(described_class.extract('ISBN: 9780805069099')).to contain_exactly('9780805069099')
|
6
|
+
end
|
7
|
+
|
8
|
+
it 'extracts ISBNs when given as a number' do
|
9
|
+
isbn = 9780805069099
|
10
|
+
|
11
|
+
expect(described_class.extract(isbn)).to contain_exactly('9780805069099')
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'normalizes 13-digit ISBNs' do
|
15
|
+
str = "978-0-80-506909-9\n978-0-67-187919-8"
|
16
|
+
|
17
|
+
expect(described_class.extract(str)).to contain_exactly('9780805069099', '9780671879198')
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'extracts multiple ISBN-13s separated by a space' do
|
21
|
+
str = '978-0-80-506909-9 978-0-67-187919-8'
|
22
|
+
|
23
|
+
expect(described_class.extract(str)).to contain_exactly('9780805069099', '9780671879198')
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'extracts ISBNs with hyphens' do
|
27
|
+
expect(described_class.extract('ISBN: 978-0-80-506909-9')).to contain_exactly('9780805069099')
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'extracts ISBNs with Unicode dashes' do
|
31
|
+
expect(described_class.extract('ISBN: 978–0–80–506909–9')).to contain_exactly('9780805069099')
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'extracts ISBNs with spaces' do
|
35
|
+
expect(described_class.extract('ISBN: 978 0 80 506909 9')).to contain_exactly('9780805069099')
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'extracts ISBNs with Unicode spaces' do
|
39
|
+
expect(described_class.extract('ISBN: 978 0 80 506909 9')).to contain_exactly('9780805069099')
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'extracts ISBN-13s from ISBN-As' do
|
43
|
+
expect(described_class.extract('10.978.8898392/315')).to contain_exactly('9788898392315')
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'does not extract invalid ISBNs from ISBN-As' do
|
47
|
+
expect(described_class.extract('10.978.8898392/316')).to be_empty
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'normalizes 10-digit ISBNs' do
|
51
|
+
str = "0-8050-6909-7 \n 2-7594-0269-X"
|
52
|
+
|
53
|
+
expect(described_class.extract(str)).to contain_exactly('9780805069099', '9782759402694')
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'extracts multiple 10-digit ISBNs separated by a space' do
|
57
|
+
str = '0-8050-6909-7 2-7594-0269-X'
|
58
|
+
|
59
|
+
expect(described_class.extract(str)).to contain_exactly('9780805069099', '9782759402694')
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'normalizes 10-digit ISBNs with Unicode dashes' do
|
63
|
+
expect(described_class.extract('0–8050–6909–7')).to contain_exactly('9780805069099')
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'normalizes 10-digit ISBNs with a check digit of 10' do
|
67
|
+
expect(described_class.extract('4423272350')).to contain_exactly('9784423272350')
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'normalizes 10-digit ISBNs with spaces' do
|
71
|
+
expect(described_class.extract('0 8050 6909 7')).to contain_exactly('9780805069099')
|
72
|
+
end
|
73
|
+
|
74
|
+
it 'normalizes 10-digit ISBNs with Unicode spaces' do
|
75
|
+
expect(described_class.extract('0 8050 6909 7')).to contain_exactly('9780805069099')
|
76
|
+
end
|
77
|
+
|
78
|
+
it 'normalizes 10-digit ISBNs with spaces and a check digit of X' do
|
79
|
+
expect(described_class.extract('2 7594 0269 X')).to contain_exactly('9782759402694')
|
80
|
+
end
|
81
|
+
|
82
|
+
it 'does not extract invalid 13-digit ISBNs' do
|
83
|
+
expect(described_class.extract('9783319217280')).to be_empty
|
84
|
+
end
|
85
|
+
|
86
|
+
it 'does not extract invalid 10-digit ISBNs' do
|
87
|
+
expect(described_class.extract('3319217280')).to be_empty
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'does not extract ISBN-10s from hyphenated ISBN-13s' do
|
91
|
+
expect(described_class.extract('978-0-309-57079-4')).to contain_exactly('9780309570794')
|
92
|
+
end
|
93
|
+
|
94
|
+
it 'does not extract ISBN-10s from space-separated ISBN-13s' do
|
95
|
+
expect(described_class.extract('978 0 309 57079 4')).to contain_exactly('9780309570794')
|
96
|
+
end
|
97
|
+
end
|
@@ -1,29 +1,99 @@
|
|
1
|
-
require 'identifiers/pubmed_id'
|
2
|
-
|
3
|
-
RSpec.describe Identifiers::PubmedId do
|
4
|
-
it 'extracts PubMed IDs' do
|
5
|
-
expect(described_class.extract("123\n456")).to contain_exactly('123', '456')
|
6
|
-
end
|
7
|
-
|
8
|
-
it '
|
9
|
-
|
10
|
-
|
11
|
-
expect(described_class.extract(
|
12
|
-
end
|
13
|
-
|
14
|
-
it '
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
expect(described_class.extract(
|
24
|
-
end
|
25
|
-
|
26
|
-
it '
|
27
|
-
|
28
|
-
|
29
|
-
|
1
|
+
require 'identifiers/pubmed_id'
|
2
|
+
|
3
|
+
RSpec.describe Identifiers::PubmedId do
|
4
|
+
it 'extracts PubMed IDs' do
|
5
|
+
expect(described_class.extract("123\n456")).to contain_exactly('123', '456')
|
6
|
+
end
|
7
|
+
|
8
|
+
it 'extracts PubMed IDs from a PubMed URL with www' do
|
9
|
+
url = 'http://www.ncbi.nlm.nih.gov/pubmed/123456'
|
10
|
+
|
11
|
+
expect(described_class.extract(url)).to contain_exactly('123456')
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'extracts PubMed IDs from a PubMed URL with www and https' do
|
15
|
+
url = 'https://www.ncbi.nlm.nih.gov/pubmed/123456'
|
16
|
+
|
17
|
+
expect(described_class.extract(url)).to contain_exactly('123456')
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'extracts PubMed IDs from a PubMed URL without www' do
|
21
|
+
url = 'http://ncbi.nlm.nih.gov/pubmed/123456'
|
22
|
+
|
23
|
+
expect(described_class.extract(url)).to contain_exactly('123456')
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'extracts PubMed IDs from a PubMed URL without www but with https' do
|
27
|
+
url = 'https://ncbi.nlm.nih.gov/pubmed/123456'
|
28
|
+
|
29
|
+
expect(described_class.extract(url)).to contain_exactly('123456')
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'extracts PubMed IDs from a PubMed mobile URL' do
|
33
|
+
url = 'https://www.ncbi.nlm.nih.gov/m/pubmed/123456'
|
34
|
+
|
35
|
+
expect(described_class.extract(url)).to contain_exactly('123456')
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'extracts PubMed IDs from a PubMed URL with hash parameters' do
|
39
|
+
url = 'https://www.ncbi.nlm.nih.gov/pubmed/123456#cm6191871_69589'
|
40
|
+
|
41
|
+
expect(described_class.extract(url)).to contain_exactly('123456')
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'extracts PubMed IDs from a PubMed URL with query parameters' do
|
45
|
+
url = 'https://www.ncbi.nlm.nih.gov/pubmed/123456?hi=hello&goodbye=bye'
|
46
|
+
|
47
|
+
expect(described_class.extract(url)).to contain_exactly('123456')
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'extracts zero leading PubMed IDs from a PubMed URL with query parameters' do
|
51
|
+
url = 'https://www.ncbi.nlm.nih.gov/pubmed/00123456?hi=hello&goodbye=bye'
|
52
|
+
|
53
|
+
expect(described_class.extract(url)).to contain_exactly('123456')
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'extracts both number and URLs PubMed IDs' do
|
57
|
+
url = 'PubMed ID: 112233 another: https://www.ncbi.nlm.nih.gov/pubmed/123456'
|
58
|
+
|
59
|
+
expect(described_class.extract(url)).to contain_exactly('112233', '123456')
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'does not return outputs with PubMed IDs in DOIs' do
|
63
|
+
str = "10.1038/nplants.2015.3\n10.1126/science.286.5445.1679e"
|
64
|
+
|
65
|
+
expect(described_class.extract(str)).to be_empty
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'strips leading 0s' do
|
69
|
+
expect(described_class.extract("0000010203\n000456000")).to contain_exactly('10203', '456000')
|
70
|
+
end
|
71
|
+
|
72
|
+
it 'does not consider 0 as a valid PubMed ID' do
|
73
|
+
expect(described_class.extract('00000000')).to be_empty
|
74
|
+
end
|
75
|
+
|
76
|
+
it 'extracts PubMed IDs separated by Unicode whitespace' do
|
77
|
+
expect(described_class.extract('123 456')).to contain_exactly('123', '456')
|
78
|
+
end
|
79
|
+
|
80
|
+
it 'considers Fixnum as potential PubMed IDs too' do
|
81
|
+
expect(described_class.extract(123)).to contain_exactly('123')
|
82
|
+
end
|
83
|
+
|
84
|
+
it 'extracts PubMed IDs with pmid scheme' do
|
85
|
+
expect(described_class.extract('pmid:123')).to contain_exactly('123')
|
86
|
+
end
|
87
|
+
|
88
|
+
it 'strips leading zeroes from pmid scheme' do
|
89
|
+
expect(described_class.extract('pmid:000123')).to contain_exactly('123')
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'extracts PubMed IDs with info pmid scheme' do
|
93
|
+
expect(described_class.extract('info:pmid/123')).to contain_exactly('123')
|
94
|
+
end
|
95
|
+
|
96
|
+
it 'strips leading zeroes from info pmid scheme' do
|
97
|
+
expect(described_class.extract('info:pmid/000123')).to contain_exactly('123')
|
98
|
+
end
|
99
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,18 +1,18 @@
|
|
1
|
-
RSpec.configure do |config|
|
2
|
-
config.filter_run :focus
|
3
|
-
config.run_all_when_everything_filtered = true
|
4
|
-
config.example_status_persistence_file_path = "spec/examples.txt"
|
5
|
-
config.disable_monkey_patching!
|
6
|
-
config.warnings = true
|
7
|
-
config.order = :random
|
8
|
-
config.default_formatter = 'doc' if config.files_to_run.one?
|
9
|
-
Kernel.srand config.seed
|
10
|
-
|
11
|
-
config.expect_with :rspec do |expectations|
|
12
|
-
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
13
|
-
end
|
14
|
-
|
15
|
-
config.mock_with :rspec do |mocks|
|
16
|
-
mocks.verify_partial_doubles = true
|
17
|
-
end
|
18
|
-
end
|
1
|
+
RSpec.configure do |config|
|
2
|
+
config.filter_run :focus
|
3
|
+
config.run_all_when_everything_filtered = true
|
4
|
+
config.example_status_persistence_file_path = "spec/examples.txt"
|
5
|
+
config.disable_monkey_patching!
|
6
|
+
config.warnings = true
|
7
|
+
config.order = :random
|
8
|
+
config.default_formatter = 'doc' if config.files_to_run.one?
|
9
|
+
Kernel.srand config.seed
|
10
|
+
|
11
|
+
config.expect_with :rspec do |expectations|
|
12
|
+
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
13
|
+
end
|
14
|
+
|
15
|
+
config.mock_with :rspec do |mocks|
|
16
|
+
mocks.verify_partial_doubles = true
|
17
|
+
end
|
18
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: identifiers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Hernandez
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-
|
12
|
+
date: 2017-12-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: urn
|
@@ -119,7 +119,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
119
119
|
version: '0'
|
120
120
|
requirements: []
|
121
121
|
rubyforge_project:
|
122
|
-
rubygems_version: 2.
|
122
|
+
rubygems_version: 2.6.13
|
123
123
|
signing_key:
|
124
124
|
specification_version: 4
|
125
125
|
summary: Utilities library for various scholarly identifiers used by Altmetric
|