identifiers 0.9.1 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/lib/identifiers/isbn.rb +99 -99
- data/lib/identifiers/pubmed_id.rb +11 -7
- data/spec/identifiers/isbn_spec.rb +97 -97
- data/spec/identifiers/pubmed_id_spec.rb +99 -29
- data/spec/spec_helper.rb +18 -18
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 612736c08372d3108c5b62dbab508417c3b67a73
|
4
|
+
data.tar.gz: a693d991efe3c913cce2fe6fc4496999c0883103
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 19ca1c46ff464e700b3158794edbcdb677d25fa2e3b7f557595d897e430a84e725d0d2a0ce0e8904af713c9adead71d26ff0dd7c72b788a033c451fec31ba73b
|
7
|
+
data.tar.gz: cfcfc156b3ca04b2becf36f810a0a6b3ad4e402a3eb6d1b2ad4642091b91c20cd64e5e739d30f2e4fb35b849c583e08cab5d4a8e06996ced17db3a593b6c61a8
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,10 @@
|
|
2
2
|
All notable changes to this project will be documented in this file. This
|
3
3
|
project adheres to [Semantic Versioning](http://semver.org/).
|
4
4
|
|
5
|
+
## [0.10.0] - 2017-12-20
|
6
|
+
### Added
|
7
|
+
- Extract PubMed IDs from URLs (e.g https://www.ncbi.nlm.nih.gov/pubmed/123456) and URIs with schemes `pmid:` and `info:pmid`
|
8
|
+
|
5
9
|
## [0.9.1] - 2017-08-01
|
6
10
|
### Fixed
|
7
11
|
- Don't extract duplicate ISBN-10s from within ISBN-13s
|
@@ -63,3 +67,4 @@ project adheres to [Semantic Versioning](http://semver.org/).
|
|
63
67
|
[0.8.1]: https://github.com/altmetric/identifiers/releases/tag/v0.8.1
|
64
68
|
[0.9.0]: https://github.com/altmetric/identifiers/releases/tag/v0.9.0
|
65
69
|
[0.9.1]: https://github.com/altmetric/identifiers/releases/tag/v0.9.1
|
70
|
+
[0.10.0]: https://github.com/altmetric/identifiers/releases/tag/v0.10.0
|
data/lib/identifiers/isbn.rb
CHANGED
@@ -1,99 +1,99 @@
|
|
1
|
-
module Identifiers
|
2
|
-
class ISBN
|
3
|
-
ISBN_13_REGEXP = /
|
4
|
-
\b
|
5
|
-
97[89] # ISBN (GS1) Bookland prefix
|
6
|
-
[\p{Pd}\p{Zs}]? # Optional hyphenation
|
7
|
-
(?:
|
8
|
-
\d # Digit
|
9
|
-
[\p{Pd}\p{Zs}]? # Optional hyphenation
|
10
|
-
){9}
|
11
|
-
\d # Check digit
|
12
|
-
\b
|
13
|
-
/x
|
14
|
-
ISBN_10_REGEXP = /
|
15
|
-
(?<! # Don't match a hyphenated or spaced ISBN-13
|
16
|
-
97[89]
|
17
|
-
[\p{Pd}\p{Zs}]
|
18
|
-
)
|
19
|
-
\b
|
20
|
-
(?:
|
21
|
-
\d # Digit
|
22
|
-
[\p{Pd}\p{Zs}]? # Optional hyphenation
|
23
|
-
){9}
|
24
|
-
[\dX] # Check digit
|
25
|
-
\b
|
26
|
-
/x
|
27
|
-
ISBN_A_REGEXP = %r{
|
28
|
-
\b
|
29
|
-
(?<=10\.) # Directory indicator (always 10)
|
30
|
-
97[89]\. # ISBN (GS1) Bookland prefix
|
31
|
-
\d{2,8} # ISBN registration group element and publisher prefix
|
32
|
-
/ # Prefix/suffix divider
|
33
|
-
\d{1,7} # ISBN title enumerator and check digit
|
34
|
-
\b
|
35
|
-
}x
|
36
|
-
|
37
|
-
def self.extract(str)
|
38
|
-
extract_isbn_as(str) + extract_thirteen_digit_isbns(str) + extract_ten_digit_isbns(str)
|
39
|
-
end
|
40
|
-
|
41
|
-
def self.extract_isbn_as(str)
|
42
|
-
extract_thirteen_digit_isbns(str.to_s.scan(ISBN_A_REGEXP).join("\n").tr('/.', ''))
|
43
|
-
end
|
44
|
-
|
45
|
-
def self.extract_thirteen_digit_isbns(str)
|
46
|
-
str
|
47
|
-
.to_s
|
48
|
-
.scan(ISBN_13_REGEXP)
|
49
|
-
.map { |isbn| isbn.gsub(/[\p{Pd}\p{Zs}]/, '') }
|
50
|
-
.select { |isbn| valid_isbn_13?(isbn) }
|
51
|
-
end
|
52
|
-
|
53
|
-
def self.extract_ten_digit_isbns(str)
|
54
|
-
str
|
55
|
-
.to_s
|
56
|
-
.scan(ISBN_10_REGEXP)
|
57
|
-
.map { |isbn| isbn.gsub(/[\p{Pd}\p{Zs}]/, '') }
|
58
|
-
.select { |isbn| valid_isbn_10?(isbn) }
|
59
|
-
.map { |isbn|
|
60
|
-
isbn.chop!
|
61
|
-
isbn.prepend('978')
|
62
|
-
isbn << isbn_13_check_digit(isbn).to_s
|
63
|
-
|
64
|
-
isbn
|
65
|
-
}
|
66
|
-
end
|
67
|
-
|
68
|
-
def self.isbn_13_check_digit(isbn)
|
69
|
-
sum = digits_of(isbn).zip([1, 3].cycle).map { |digit, weight| digit * weight }.reduce(:+)
|
70
|
-
check_digit = 10 - (sum % 10)
|
71
|
-
|
72
|
-
if check_digit == 10
|
73
|
-
0
|
74
|
-
else
|
75
|
-
check_digit
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
def self.valid_isbn_13?(isbn)
|
80
|
-
return false unless isbn =~ ISBN_13_REGEXP
|
81
|
-
|
82
|
-
result = digits_of(isbn).zip([1, 3].cycle).map { |digit, weight| digit * weight }.reduce(:+)
|
83
|
-
|
84
|
-
(result % 10).zero?
|
85
|
-
end
|
86
|
-
|
87
|
-
def self.valid_isbn_10?(isbn)
|
88
|
-
return false unless isbn =~ ISBN_10_REGEXP
|
89
|
-
|
90
|
-
result = digits_of(isbn).with_index.map { |digit, weight| digit * weight.succ }.reduce(:+)
|
91
|
-
|
92
|
-
(result % 11).zero?
|
93
|
-
end
|
94
|
-
|
95
|
-
def self.digits_of(isbn)
|
96
|
-
isbn.to_s.each_char.map { |char| char == 'X' ? 10 : Integer(char) }.to_enum
|
97
|
-
end
|
98
|
-
end
|
99
|
-
end
|
1
|
+
module Identifiers
|
2
|
+
class ISBN
|
3
|
+
ISBN_13_REGEXP = /
|
4
|
+
\b
|
5
|
+
97[89] # ISBN (GS1) Bookland prefix
|
6
|
+
[\p{Pd}\p{Zs}]? # Optional hyphenation
|
7
|
+
(?:
|
8
|
+
\d # Digit
|
9
|
+
[\p{Pd}\p{Zs}]? # Optional hyphenation
|
10
|
+
){9}
|
11
|
+
\d # Check digit
|
12
|
+
\b
|
13
|
+
/x
|
14
|
+
ISBN_10_REGEXP = /
|
15
|
+
(?<! # Don't match a hyphenated or spaced ISBN-13
|
16
|
+
97[89]
|
17
|
+
[\p{Pd}\p{Zs}]
|
18
|
+
)
|
19
|
+
\b
|
20
|
+
(?:
|
21
|
+
\d # Digit
|
22
|
+
[\p{Pd}\p{Zs}]? # Optional hyphenation
|
23
|
+
){9}
|
24
|
+
[\dX] # Check digit
|
25
|
+
\b
|
26
|
+
/x
|
27
|
+
ISBN_A_REGEXP = %r{
|
28
|
+
\b
|
29
|
+
(?<=10\.) # Directory indicator (always 10)
|
30
|
+
97[89]\. # ISBN (GS1) Bookland prefix
|
31
|
+
\d{2,8} # ISBN registration group element and publisher prefix
|
32
|
+
/ # Prefix/suffix divider
|
33
|
+
\d{1,7} # ISBN title enumerator and check digit
|
34
|
+
\b
|
35
|
+
}x
|
36
|
+
|
37
|
+
def self.extract(str)
|
38
|
+
extract_isbn_as(str) + extract_thirteen_digit_isbns(str) + extract_ten_digit_isbns(str)
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.extract_isbn_as(str)
|
42
|
+
extract_thirteen_digit_isbns(str.to_s.scan(ISBN_A_REGEXP).join("\n").tr('/.', ''))
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.extract_thirteen_digit_isbns(str)
|
46
|
+
str
|
47
|
+
.to_s
|
48
|
+
.scan(ISBN_13_REGEXP)
|
49
|
+
.map { |isbn| isbn.gsub(/[\p{Pd}\p{Zs}]/, '') }
|
50
|
+
.select { |isbn| valid_isbn_13?(isbn) }
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.extract_ten_digit_isbns(str)
|
54
|
+
str
|
55
|
+
.to_s
|
56
|
+
.scan(ISBN_10_REGEXP)
|
57
|
+
.map { |isbn| isbn.gsub(/[\p{Pd}\p{Zs}]/, '') }
|
58
|
+
.select { |isbn| valid_isbn_10?(isbn) }
|
59
|
+
.map { |isbn|
|
60
|
+
isbn.chop!
|
61
|
+
isbn.prepend('978')
|
62
|
+
isbn << isbn_13_check_digit(isbn).to_s
|
63
|
+
|
64
|
+
isbn
|
65
|
+
}
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.isbn_13_check_digit(isbn)
|
69
|
+
sum = digits_of(isbn).zip([1, 3].cycle).map { |digit, weight| digit * weight }.reduce(:+)
|
70
|
+
check_digit = 10 - (sum % 10)
|
71
|
+
|
72
|
+
if check_digit == 10
|
73
|
+
0
|
74
|
+
else
|
75
|
+
check_digit
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.valid_isbn_13?(isbn)
|
80
|
+
return false unless isbn =~ ISBN_13_REGEXP
|
81
|
+
|
82
|
+
result = digits_of(isbn).zip([1, 3].cycle).map { |digit, weight| digit * weight }.reduce(:+)
|
83
|
+
|
84
|
+
(result % 10).zero?
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.valid_isbn_10?(isbn)
|
88
|
+
return false unless isbn =~ ISBN_10_REGEXP
|
89
|
+
|
90
|
+
result = digits_of(isbn).with_index.map { |digit, weight| digit * weight.succ }.reduce(:+)
|
91
|
+
|
92
|
+
(result % 11).zero?
|
93
|
+
end
|
94
|
+
|
95
|
+
def self.digits_of(isbn)
|
96
|
+
isbn.to_s.each_char.map { |char| char == 'X' ? 10 : Integer(char) }.to_enum
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -1,7 +1,11 @@
|
|
1
|
-
module Identifiers
|
2
|
-
class PubmedId
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
1
|
+
module Identifiers
|
2
|
+
class PubmedId
|
3
|
+
ZERO_PADDED_NUMBER = %r{(?<=^|[[:space:]])0*(?!0)(\d+)(?=$|[[:space:]])}
|
4
|
+
PUBMED_URL = %r{(?:https?://(?:www\.)?ncbi\.nlm\.nih\.gov/(?:m/)?pubmed/|pmid:|info:pmid/)0*(\d+)}i
|
5
|
+
|
6
|
+
def self.extract(str)
|
7
|
+
str = str.to_s
|
8
|
+
str.scan(ZERO_PADDED_NUMBER).flatten | str.scan(PUBMED_URL).flatten
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
@@ -1,97 +1,97 @@
|
|
1
|
-
require 'identifiers/isbn'
|
2
|
-
|
3
|
-
RSpec.describe Identifiers::ISBN do
|
4
|
-
it 'extracts a ISBN' do
|
5
|
-
expect(described_class.extract('ISBN: 9780805069099')).to contain_exactly('9780805069099')
|
6
|
-
end
|
7
|
-
|
8
|
-
it 'extracts ISBNs when given as a number' do
|
9
|
-
isbn = 9780805069099
|
10
|
-
|
11
|
-
expect(described_class.extract(isbn)).to contain_exactly('9780805069099')
|
12
|
-
end
|
13
|
-
|
14
|
-
it 'normalizes 13-digit ISBNs' do
|
15
|
-
str = "978-0-80-506909-9\n978-0-67-187919-8"
|
16
|
-
|
17
|
-
expect(described_class.extract(str)).to contain_exactly('9780805069099', '9780671879198')
|
18
|
-
end
|
19
|
-
|
20
|
-
it 'extracts multiple ISBN-13s separated by a space' do
|
21
|
-
str = '978-0-80-506909-9 978-0-67-187919-8'
|
22
|
-
|
23
|
-
expect(described_class.extract(str)).to contain_exactly('9780805069099', '9780671879198')
|
24
|
-
end
|
25
|
-
|
26
|
-
it 'extracts ISBNs with hyphens' do
|
27
|
-
expect(described_class.extract('ISBN: 978-0-80-506909-9')).to contain_exactly('9780805069099')
|
28
|
-
end
|
29
|
-
|
30
|
-
it 'extracts ISBNs with Unicode dashes' do
|
31
|
-
expect(described_class.extract('ISBN: 978–0–80–506909–9')).to contain_exactly('9780805069099')
|
32
|
-
end
|
33
|
-
|
34
|
-
it 'extracts ISBNs with spaces' do
|
35
|
-
expect(described_class.extract('ISBN: 978 0 80 506909 9')).to contain_exactly('9780805069099')
|
36
|
-
end
|
37
|
-
|
38
|
-
it 'extracts ISBNs with Unicode spaces' do
|
39
|
-
expect(described_class.extract('ISBN: 978 0 80 506909 9')).to contain_exactly('9780805069099')
|
40
|
-
end
|
41
|
-
|
42
|
-
it 'extracts ISBN-13s from ISBN-As' do
|
43
|
-
expect(described_class.extract('10.978.8898392/315')).to contain_exactly('9788898392315')
|
44
|
-
end
|
45
|
-
|
46
|
-
it 'does not extract invalid ISBNs from ISBN-As' do
|
47
|
-
expect(described_class.extract('10.978.8898392/316')).to be_empty
|
48
|
-
end
|
49
|
-
|
50
|
-
it 'normalizes 10-digit ISBNs' do
|
51
|
-
str = "0-8050-6909-7 \n 2-7594-0269-X"
|
52
|
-
|
53
|
-
expect(described_class.extract(str)).to contain_exactly('9780805069099', '9782759402694')
|
54
|
-
end
|
55
|
-
|
56
|
-
it 'extracts multiple 10-digit ISBNs separated by a space' do
|
57
|
-
str = '0-8050-6909-7 2-7594-0269-X'
|
58
|
-
|
59
|
-
expect(described_class.extract(str)).to contain_exactly('9780805069099', '9782759402694')
|
60
|
-
end
|
61
|
-
|
62
|
-
it 'normalizes 10-digit ISBNs with Unicode dashes' do
|
63
|
-
expect(described_class.extract('0–8050–6909–7')).to contain_exactly('9780805069099')
|
64
|
-
end
|
65
|
-
|
66
|
-
it 'normalizes 10-digit ISBNs with a check digit of 10' do
|
67
|
-
expect(described_class.extract('4423272350')).to contain_exactly('9784423272350')
|
68
|
-
end
|
69
|
-
|
70
|
-
it 'normalizes 10-digit ISBNs with spaces' do
|
71
|
-
expect(described_class.extract('0 8050 6909 7')).to contain_exactly('9780805069099')
|
72
|
-
end
|
73
|
-
|
74
|
-
it 'normalizes 10-digit ISBNs with Unicode spaces' do
|
75
|
-
expect(described_class.extract('0 8050 6909 7')).to contain_exactly('9780805069099')
|
76
|
-
end
|
77
|
-
|
78
|
-
it 'normalizes 10-digit ISBNs with spaces and a check digit of X' do
|
79
|
-
expect(described_class.extract('2 7594 0269 X')).to contain_exactly('9782759402694')
|
80
|
-
end
|
81
|
-
|
82
|
-
it 'does not extract invalid 13-digit ISBNs' do
|
83
|
-
expect(described_class.extract('9783319217280')).to be_empty
|
84
|
-
end
|
85
|
-
|
86
|
-
it 'does not extract invalid 10-digit ISBNs' do
|
87
|
-
expect(described_class.extract('3319217280')).to be_empty
|
88
|
-
end
|
89
|
-
|
90
|
-
it 'does not extract ISBN-10s from hyphenated ISBN-13s' do
|
91
|
-
expect(described_class.extract('978-0-309-57079-4')).to contain_exactly('9780309570794')
|
92
|
-
end
|
93
|
-
|
94
|
-
it 'does not extract ISBN-10s from space-separated ISBN-13s' do
|
95
|
-
expect(described_class.extract('978 0 309 57079 4')).to contain_exactly('9780309570794')
|
96
|
-
end
|
97
|
-
end
|
1
|
+
require 'identifiers/isbn'
|
2
|
+
|
3
|
+
RSpec.describe Identifiers::ISBN do
|
4
|
+
it 'extracts a ISBN' do
|
5
|
+
expect(described_class.extract('ISBN: 9780805069099')).to contain_exactly('9780805069099')
|
6
|
+
end
|
7
|
+
|
8
|
+
it 'extracts ISBNs when given as a number' do
|
9
|
+
isbn = 9780805069099
|
10
|
+
|
11
|
+
expect(described_class.extract(isbn)).to contain_exactly('9780805069099')
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'normalizes 13-digit ISBNs' do
|
15
|
+
str = "978-0-80-506909-9\n978-0-67-187919-8"
|
16
|
+
|
17
|
+
expect(described_class.extract(str)).to contain_exactly('9780805069099', '9780671879198')
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'extracts multiple ISBN-13s separated by a space' do
|
21
|
+
str = '978-0-80-506909-9 978-0-67-187919-8'
|
22
|
+
|
23
|
+
expect(described_class.extract(str)).to contain_exactly('9780805069099', '9780671879198')
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'extracts ISBNs with hyphens' do
|
27
|
+
expect(described_class.extract('ISBN: 978-0-80-506909-9')).to contain_exactly('9780805069099')
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'extracts ISBNs with Unicode dashes' do
|
31
|
+
expect(described_class.extract('ISBN: 978–0–80–506909–9')).to contain_exactly('9780805069099')
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'extracts ISBNs with spaces' do
|
35
|
+
expect(described_class.extract('ISBN: 978 0 80 506909 9')).to contain_exactly('9780805069099')
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'extracts ISBNs with Unicode spaces' do
|
39
|
+
expect(described_class.extract('ISBN: 978 0 80 506909 9')).to contain_exactly('9780805069099')
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'extracts ISBN-13s from ISBN-As' do
|
43
|
+
expect(described_class.extract('10.978.8898392/315')).to contain_exactly('9788898392315')
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'does not extract invalid ISBNs from ISBN-As' do
|
47
|
+
expect(described_class.extract('10.978.8898392/316')).to be_empty
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'normalizes 10-digit ISBNs' do
|
51
|
+
str = "0-8050-6909-7 \n 2-7594-0269-X"
|
52
|
+
|
53
|
+
expect(described_class.extract(str)).to contain_exactly('9780805069099', '9782759402694')
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'extracts multiple 10-digit ISBNs separated by a space' do
|
57
|
+
str = '0-8050-6909-7 2-7594-0269-X'
|
58
|
+
|
59
|
+
expect(described_class.extract(str)).to contain_exactly('9780805069099', '9782759402694')
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'normalizes 10-digit ISBNs with Unicode dashes' do
|
63
|
+
expect(described_class.extract('0–8050–6909–7')).to contain_exactly('9780805069099')
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'normalizes 10-digit ISBNs with a check digit of 10' do
|
67
|
+
expect(described_class.extract('4423272350')).to contain_exactly('9784423272350')
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'normalizes 10-digit ISBNs with spaces' do
|
71
|
+
expect(described_class.extract('0 8050 6909 7')).to contain_exactly('9780805069099')
|
72
|
+
end
|
73
|
+
|
74
|
+
it 'normalizes 10-digit ISBNs with Unicode spaces' do
|
75
|
+
expect(described_class.extract('0 8050 6909 7')).to contain_exactly('9780805069099')
|
76
|
+
end
|
77
|
+
|
78
|
+
it 'normalizes 10-digit ISBNs with spaces and a check digit of X' do
|
79
|
+
expect(described_class.extract('2 7594 0269 X')).to contain_exactly('9782759402694')
|
80
|
+
end
|
81
|
+
|
82
|
+
it 'does not extract invalid 13-digit ISBNs' do
|
83
|
+
expect(described_class.extract('9783319217280')).to be_empty
|
84
|
+
end
|
85
|
+
|
86
|
+
it 'does not extract invalid 10-digit ISBNs' do
|
87
|
+
expect(described_class.extract('3319217280')).to be_empty
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'does not extract ISBN-10s from hyphenated ISBN-13s' do
|
91
|
+
expect(described_class.extract('978-0-309-57079-4')).to contain_exactly('9780309570794')
|
92
|
+
end
|
93
|
+
|
94
|
+
it 'does not extract ISBN-10s from space-separated ISBN-13s' do
|
95
|
+
expect(described_class.extract('978 0 309 57079 4')).to contain_exactly('9780309570794')
|
96
|
+
end
|
97
|
+
end
|
@@ -1,29 +1,99 @@
|
|
1
|
-
require 'identifiers/pubmed_id'
|
2
|
-
|
3
|
-
RSpec.describe Identifiers::PubmedId do
|
4
|
-
it 'extracts PubMed IDs' do
|
5
|
-
expect(described_class.extract("123\n456")).to contain_exactly('123', '456')
|
6
|
-
end
|
7
|
-
|
8
|
-
it '
|
9
|
-
|
10
|
-
|
11
|
-
expect(described_class.extract(
|
12
|
-
end
|
13
|
-
|
14
|
-
it '
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
expect(described_class.extract(
|
24
|
-
end
|
25
|
-
|
26
|
-
it '
|
27
|
-
|
28
|
-
|
29
|
-
|
1
|
+
require 'identifiers/pubmed_id'
|
2
|
+
|
3
|
+
RSpec.describe Identifiers::PubmedId do
|
4
|
+
it 'extracts PubMed IDs' do
|
5
|
+
expect(described_class.extract("123\n456")).to contain_exactly('123', '456')
|
6
|
+
end
|
7
|
+
|
8
|
+
it 'extracts PubMed IDs from a PubMed URL with www' do
|
9
|
+
url = 'http://www.ncbi.nlm.nih.gov/pubmed/123456'
|
10
|
+
|
11
|
+
expect(described_class.extract(url)).to contain_exactly('123456')
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'extracts PubMed IDs from a PubMed URL with www and https' do
|
15
|
+
url = 'https://www.ncbi.nlm.nih.gov/pubmed/123456'
|
16
|
+
|
17
|
+
expect(described_class.extract(url)).to contain_exactly('123456')
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'extracts PubMed IDs from a PubMed URL without www' do
|
21
|
+
url = 'http://ncbi.nlm.nih.gov/pubmed/123456'
|
22
|
+
|
23
|
+
expect(described_class.extract(url)).to contain_exactly('123456')
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'extracts PubMed IDs from a PubMed URL without www but with https' do
|
27
|
+
url = 'https://ncbi.nlm.nih.gov/pubmed/123456'
|
28
|
+
|
29
|
+
expect(described_class.extract(url)).to contain_exactly('123456')
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'extracts PubMed IDs from a PubMed mobile URL' do
|
33
|
+
url = 'https://www.ncbi.nlm.nih.gov/m/pubmed/123456'
|
34
|
+
|
35
|
+
expect(described_class.extract(url)).to contain_exactly('123456')
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'extracts PubMed IDs from a PubMed URL with hash parameters' do
|
39
|
+
url = 'https://www.ncbi.nlm.nih.gov/pubmed/123456#cm6191871_69589'
|
40
|
+
|
41
|
+
expect(described_class.extract(url)).to contain_exactly('123456')
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'extracts PubMed IDs from a PubMed URL with query parameters' do
|
45
|
+
url = 'https://www.ncbi.nlm.nih.gov/pubmed/123456?hi=hello&goodbye=bye'
|
46
|
+
|
47
|
+
expect(described_class.extract(url)).to contain_exactly('123456')
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'extracts zero leading PubMed IDs from a PubMed URL with query parameters' do
|
51
|
+
url = 'https://www.ncbi.nlm.nih.gov/pubmed/00123456?hi=hello&goodbye=bye'
|
52
|
+
|
53
|
+
expect(described_class.extract(url)).to contain_exactly('123456')
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'extracts both number and URLs PubMed IDs' do
|
57
|
+
url = 'PubMed ID: 112233 another: https://www.ncbi.nlm.nih.gov/pubmed/123456'
|
58
|
+
|
59
|
+
expect(described_class.extract(url)).to contain_exactly('112233', '123456')
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'does not return outputs with PubMed IDs in DOIs' do
|
63
|
+
str = "10.1038/nplants.2015.3\n10.1126/science.286.5445.1679e"
|
64
|
+
|
65
|
+
expect(described_class.extract(str)).to be_empty
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'strips leading 0s' do
|
69
|
+
expect(described_class.extract("0000010203\n000456000")).to contain_exactly('10203', '456000')
|
70
|
+
end
|
71
|
+
|
72
|
+
it 'does not consider 0 as a valid PubMed ID' do
|
73
|
+
expect(described_class.extract('00000000')).to be_empty
|
74
|
+
end
|
75
|
+
|
76
|
+
it 'extracts PubMed IDs separated by Unicode whitespace' do
|
77
|
+
expect(described_class.extract('123 456')).to contain_exactly('123', '456')
|
78
|
+
end
|
79
|
+
|
80
|
+
it 'considers Fixnum as potential PubMed IDs too' do
|
81
|
+
expect(described_class.extract(123)).to contain_exactly('123')
|
82
|
+
end
|
83
|
+
|
84
|
+
it 'extracts PubMed IDs with pmid scheme' do
|
85
|
+
expect(described_class.extract('pmid:123')).to contain_exactly('123')
|
86
|
+
end
|
87
|
+
|
88
|
+
it 'strips leading zeroes from pmid scheme' do
|
89
|
+
expect(described_class.extract('pmid:000123')).to contain_exactly('123')
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'extracts PubMed IDs with info pmid scheme' do
|
93
|
+
expect(described_class.extract('info:pmid/123')).to contain_exactly('123')
|
94
|
+
end
|
95
|
+
|
96
|
+
it 'strips leading zeroes from info pmid scheme' do
|
97
|
+
expect(described_class.extract('info:pmid/000123')).to contain_exactly('123')
|
98
|
+
end
|
99
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,18 +1,18 @@
|
|
1
|
-
RSpec.configure do |config|
|
2
|
-
config.filter_run :focus
|
3
|
-
config.run_all_when_everything_filtered = true
|
4
|
-
config.example_status_persistence_file_path = "spec/examples.txt"
|
5
|
-
config.disable_monkey_patching!
|
6
|
-
config.warnings = true
|
7
|
-
config.order = :random
|
8
|
-
config.default_formatter = 'doc' if config.files_to_run.one?
|
9
|
-
Kernel.srand config.seed
|
10
|
-
|
11
|
-
config.expect_with :rspec do |expectations|
|
12
|
-
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
13
|
-
end
|
14
|
-
|
15
|
-
config.mock_with :rspec do |mocks|
|
16
|
-
mocks.verify_partial_doubles = true
|
17
|
-
end
|
18
|
-
end
|
1
|
+
RSpec.configure do |config|
|
2
|
+
config.filter_run :focus
|
3
|
+
config.run_all_when_everything_filtered = true
|
4
|
+
config.example_status_persistence_file_path = "spec/examples.txt"
|
5
|
+
config.disable_monkey_patching!
|
6
|
+
config.warnings = true
|
7
|
+
config.order = :random
|
8
|
+
config.default_formatter = 'doc' if config.files_to_run.one?
|
9
|
+
Kernel.srand config.seed
|
10
|
+
|
11
|
+
config.expect_with :rspec do |expectations|
|
12
|
+
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
13
|
+
end
|
14
|
+
|
15
|
+
config.mock_with :rspec do |mocks|
|
16
|
+
mocks.verify_partial_doubles = true
|
17
|
+
end
|
18
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: identifiers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Hernandez
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-
|
12
|
+
date: 2017-12-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: urn
|
@@ -119,7 +119,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
119
119
|
version: '0'
|
120
120
|
requirements: []
|
121
121
|
rubyforge_project:
|
122
|
-
rubygems_version: 2.
|
122
|
+
rubygems_version: 2.6.13
|
123
123
|
signing_key:
|
124
124
|
specification_version: 4
|
125
125
|
summary: Utilities library for various scholarly identifiers used by Altmetric
|