identifiers 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +5 -1
- data/lib/identifiers/doi.rb +37 -1
- data/lib/identifiers/isbn.rb +7 -5
- data/spec/identifiers/doi_spec.rb +49 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 20bb1b38cae83fa5173c1c5a955aa799ccb1dee1
|
4
|
+
data.tar.gz: 5885f3af6e8c0c5fef3b0418105602bdf721a0ee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ccfe0444e88dcc61b51c17d8ce431318c7854936604843d50fd56a96da88536ca15da90572af3352b853e9506e1c40638418d187fe359b2c8bc5ec6ab2eaa34
|
7
|
+
data.tar.gz: be40a3196e1c7e8f17786b53df685e7b6c4f70a182ad8363d4a1efb5e6d5471d939c2e44e89e7181a6781fab35a928ad874f086585bfa9317491cfbd7518fce8
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,10 @@
|
|
2
2
|
All notable changes to this project will be documented in this file. This
|
3
3
|
project adheres to [Semantic Versioning](http://semver.org/).
|
4
4
|
|
5
|
+
## [0.6.0] - 2017-04-08
|
6
|
+
### Added
|
7
|
+
- Added support for valid DOIs ending in punctuation
|
8
|
+
|
5
9
|
## [0.5.0] - 2017-01-27
|
6
10
|
### Added
|
7
11
|
- Added support for ISBN-As when extracting DOIs and ISBNs
|
@@ -32,3 +36,4 @@ project adheres to [Semantic Versioning](http://semver.org/).
|
|
32
36
|
[0.3.1]: https://github.com/altmetric/identifiers/releases/tag/v0.3.1
|
33
37
|
[0.4.0]: https://github.com/altmetric/identifiers/releases/tag/v0.4.0
|
34
38
|
[0.5.0]: https://github.com/altmetric/identifiers/releases/tag/v0.5.0
|
39
|
+
[0.6.0]: https://github.com/altmetric/identifiers/releases/tag/v0.6.0
|
data/README.md
CHANGED
@@ -18,7 +18,7 @@ Collection of utilities related to the extraction, validation and normalization
|
|
18
18
|
Add this line to your application's `Gemfile`:
|
19
19
|
|
20
20
|
```ruby
|
21
|
-
gem 'identifiers', '~> 0.
|
21
|
+
gem 'identifiers', '~> 0.6'
|
22
22
|
```
|
23
23
|
|
24
24
|
And then execute:
|
@@ -69,6 +69,10 @@ For `URN`s, please check the [URN gem documentation](https://github.com/altmetri
|
|
69
69
|
|
70
70
|
Bug reports and pull requests are welcome on GitHub at https://github.com/altmetric/identifiers.
|
71
71
|
|
72
|
+
## Contributions
|
73
|
+
|
74
|
+
* Thanks to [Tom Stuart](https://github.com/tomstuart) for [cleaning up the ISBN check digit code](https://github.com/altmetric/identifiers/pull/10).
|
75
|
+
|
72
76
|
## PHP version
|
73
77
|
|
74
78
|
We also maintain [a version of this library for PHP](https://github.com/altmetric/php-identifiers).
|
data/lib/identifiers/doi.rb
CHANGED
@@ -1,7 +1,43 @@
|
|
1
1
|
module Identifiers
|
2
2
|
class DOI
|
3
|
+
PATTERN = %r{
|
4
|
+
\b
|
5
|
+
10 # Directory indicator (always 10)
|
6
|
+
\.
|
7
|
+
(?:
|
8
|
+
# ISBN-A
|
9
|
+
97[89]\. # ISBN (GS1) Bookland prefix
|
10
|
+
\d{2,8} # ISBN registration group element and publisher prefix
|
11
|
+
/ # Prefix/suffix divider
|
12
|
+
\d{1,7} # ISBN title enumerator and check digit
|
13
|
+
|
|
14
|
+
# DOI
|
15
|
+
\d{4,9} # Registrant code
|
16
|
+
/ # Prefix/suffix divider
|
17
|
+
\S+ # DOI suffix
|
18
|
+
)
|
19
|
+
}x
|
20
|
+
VALID_ENDING = /
|
21
|
+
(?:
|
22
|
+
\p{^Punct} # Non-punctuation character
|
23
|
+
|
|
24
|
+
\(.+\) # Balanced parentheses
|
25
|
+
|
|
26
|
+
2-\# # Early Wiley DOI suffix
|
27
|
+
)
|
28
|
+
\z
|
29
|
+
/x
|
30
|
+
|
3
31
|
def self.extract(str)
|
4
|
-
str
|
32
|
+
str
|
33
|
+
.to_s
|
34
|
+
.downcase
|
35
|
+
.scan(PATTERN)
|
36
|
+
.map { |doi|
|
37
|
+
next doi if doi =~ VALID_ENDING
|
38
|
+
|
39
|
+
doi.sub(/\p{Punct}+\z/, '')
|
40
|
+
}
|
5
41
|
end
|
6
42
|
end
|
7
43
|
end
|
data/lib/identifiers/isbn.rb
CHANGED
@@ -27,7 +27,7 @@ module Identifiers
|
|
27
27
|
end
|
28
28
|
|
29
29
|
def self.isbn_13_check_digit(isbn)
|
30
|
-
sum = isbn.
|
30
|
+
sum = digits_of(isbn).zip([1, 3].cycle).map { |digit, weight| digit * weight }.reduce(:+)
|
31
31
|
check_digit = 10 - (sum % 10)
|
32
32
|
|
33
33
|
if check_digit == 10
|
@@ -40,7 +40,7 @@ module Identifiers
|
|
40
40
|
def self.valid_isbn_13?(isbn)
|
41
41
|
return false unless isbn =~ REGEX_13
|
42
42
|
|
43
|
-
result = isbn.
|
43
|
+
result = digits_of(isbn).zip([1, 3].cycle).map { |digit, weight| digit * weight }.reduce(:+)
|
44
44
|
|
45
45
|
(result % 10).zero?
|
46
46
|
end
|
@@ -48,11 +48,13 @@ module Identifiers
|
|
48
48
|
def self.valid_isbn_10?(isbn)
|
49
49
|
return false unless isbn =~ REGEX_10
|
50
50
|
|
51
|
-
result = isbn.
|
52
|
-
sum + (Integer(values[0].sub('X', '10')) * values[1].succ)
|
53
|
-
}
|
51
|
+
result = digits_of(isbn).with_index.map { |digit, weight| digit * weight.succ }.reduce(:+)
|
54
52
|
|
55
53
|
(result % 11).zero?
|
56
54
|
end
|
55
|
+
|
56
|
+
def self.digits_of(isbn)
|
57
|
+
isbn.each_char.map { |char| char == 'X' ? 10 : Integer(char) }.to_enum
|
58
|
+
end
|
57
59
|
end
|
58
60
|
end
|
@@ -2,13 +2,19 @@ require 'identifiers/doi'
|
|
2
2
|
|
3
3
|
RSpec.describe Identifiers::DOI do
|
4
4
|
it 'extracts DOIs from a string' do
|
5
|
-
str = 'This is an example of DOI: 10.1049/el.2013.3006'
|
5
|
+
str = 'This is an example of a DOI: 10.1049/el.2013.3006'
|
6
6
|
|
7
7
|
expect(described_class.extract(str)).to contain_exactly('10.1049/el.2013.3006')
|
8
8
|
end
|
9
9
|
|
10
|
-
it '
|
11
|
-
str = 'This is an example of DOI
|
10
|
+
it 'extracts DOIs from anywhere in a string' do
|
11
|
+
str = 'This is an example of a DOI - 10.1049/el.2013.3006 - which is entirely valid'
|
12
|
+
|
13
|
+
expect(described_class.extract(str)).to contain_exactly('10.1049/el.2013.3006')
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'downcases the DOIs extracted' do
|
17
|
+
str = 'This is an example of a DOI: 10.1097/01.ASW.0000443266.17665.19'
|
12
18
|
|
13
19
|
expect(described_class.extract(str)).to contain_exactly('10.1097/01.asw.0000443266.17665.19')
|
14
20
|
end
|
@@ -19,6 +25,10 @@ RSpec.describe Identifiers::DOI do
|
|
19
25
|
expect(described_class.extract(str)).to be_empty
|
20
26
|
end
|
21
27
|
|
28
|
+
it 'returns no DOIs if given nothing' do
|
29
|
+
expect(described_class.extract(nil)).to be_empty
|
30
|
+
end
|
31
|
+
|
22
32
|
it 'extracts ISBN-As' do
|
23
33
|
str = 'This is an ISBN-A: 10.978.8898392/315'
|
24
34
|
|
@@ -30,4 +40,40 @@ RSpec.describe Identifiers::DOI do
|
|
30
40
|
|
31
41
|
expect(described_class.extract(str)).to be_empty
|
32
42
|
end
|
43
|
+
|
44
|
+
it 'retains closing parentheses that are part of the DOI' do
|
45
|
+
str = 'This is an example of a DOI: 10.1130/2013.2502(04)'
|
46
|
+
|
47
|
+
expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502(04)')
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'discards trailing punctuation' do
|
51
|
+
str = 'This is an example of a DOI: 10.1130/2013.2502.'
|
52
|
+
|
53
|
+
expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'discards multiple contiguous trailing punctuation' do
|
57
|
+
str = 'This is an example of a DOI: 10.1130/2013.2502...",'
|
58
|
+
|
59
|
+
expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'discards trailing Unicode punctuation' do
|
63
|
+
str = 'This is an example of a DOI: 10.1130/2013.2502…'
|
64
|
+
|
65
|
+
expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'extracts particularly exotic DOIs' do
|
69
|
+
str = 'This is an example of an exotic DOI: 10.1002/(SICI)1096-8644(199601)99:1<135::AID-AJPA8>3.0.CO;2-#'
|
70
|
+
|
71
|
+
expect(described_class.extract(str)).to contain_exactly('10.1002/(sici)1096-8644(199601)99:1<135::aid-ajpa8>3.0.co;2-#')
|
72
|
+
end
|
73
|
+
|
74
|
+
it 'does not extract a closing parenthesis if not part of the DOI' do
|
75
|
+
str = '(This is an example of a DOI: 10.1130/2013.2502)'
|
76
|
+
|
77
|
+
expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
|
78
|
+
end
|
33
79
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: identifiers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Hernandez
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-
|
12
|
+
date: 2017-04-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: urn
|