identifiers 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +5 -1
- data/lib/identifiers/doi.rb +37 -1
- data/lib/identifiers/isbn.rb +7 -5
- data/spec/identifiers/doi_spec.rb +49 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 20bb1b38cae83fa5173c1c5a955aa799ccb1dee1
|
4
|
+
data.tar.gz: 5885f3af6e8c0c5fef3b0418105602bdf721a0ee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ccfe0444e88dcc61b51c17d8ce431318c7854936604843d50fd56a96da88536ca15da90572af3352b853e9506e1c40638418d187fe359b2c8bc5ec6ab2eaa34
|
7
|
+
data.tar.gz: be40a3196e1c7e8f17786b53df685e7b6c4f70a182ad8363d4a1efb5e6d5471d939c2e44e89e7181a6781fab35a928ad874f086585bfa9317491cfbd7518fce8
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,10 @@
|
|
2
2
|
All notable changes to this project will be documented in this file. This
|
3
3
|
project adheres to [Semantic Versioning](http://semver.org/).
|
4
4
|
|
5
|
+
## [0.6.0] - 2017-04-08
|
6
|
+
### Added
|
7
|
+
- Added support for valid DOIs ending in punctuation
|
8
|
+
|
5
9
|
## [0.5.0] - 2017-01-27
|
6
10
|
### Added
|
7
11
|
- Added support for ISBN-As when extracting DOIs and ISBNs
|
@@ -32,3 +36,4 @@ project adheres to [Semantic Versioning](http://semver.org/).
|
|
32
36
|
[0.3.1]: https://github.com/altmetric/identifiers/releases/tag/v0.3.1
|
33
37
|
[0.4.0]: https://github.com/altmetric/identifiers/releases/tag/v0.4.0
|
34
38
|
[0.5.0]: https://github.com/altmetric/identifiers/releases/tag/v0.5.0
|
39
|
+
[0.6.0]: https://github.com/altmetric/identifiers/releases/tag/v0.6.0
|
data/README.md
CHANGED
@@ -18,7 +18,7 @@ Collection of utilities related to the extraction, validation and normalization
|
|
18
18
|
Add this line to your application's `Gemfile`:
|
19
19
|
|
20
20
|
```ruby
|
21
|
-
gem 'identifiers', '~> 0.
|
21
|
+
gem 'identifiers', '~> 0.6'
|
22
22
|
```
|
23
23
|
|
24
24
|
And then execute:
|
@@ -69,6 +69,10 @@ For `URN`s, please check the [URN gem documentation](https://github.com/altmetri
|
|
69
69
|
|
70
70
|
Bug reports and pull requests are welcome on GitHub at https://github.com/altmetric/identifiers.
|
71
71
|
|
72
|
+
## Contributions
|
73
|
+
|
74
|
+
* Thanks to [Tom Stuart](https://github.com/tomstuart) for [cleaning up the ISBN check digit code](https://github.com/altmetric/identifiers/pull/10).
|
75
|
+
|
72
76
|
## PHP version
|
73
77
|
|
74
78
|
We also maintain [a version of this library for PHP](https://github.com/altmetric/php-identifiers).
|
data/lib/identifiers/doi.rb
CHANGED
@@ -1,7 +1,43 @@
|
|
1
1
|
module Identifiers
|
2
2
|
class DOI
|
3
|
+
PATTERN = %r{
|
4
|
+
\b
|
5
|
+
10 # Directory indicator (always 10)
|
6
|
+
\.
|
7
|
+
(?:
|
8
|
+
# ISBN-A
|
9
|
+
97[89]\. # ISBN (GS1) Bookland prefix
|
10
|
+
\d{2,8} # ISBN registration group element and publisher prefix
|
11
|
+
/ # Prefix/suffix divider
|
12
|
+
\d{1,7} # ISBN title enumerator and check digit
|
13
|
+
|
|
14
|
+
# DOI
|
15
|
+
\d{4,9} # Registrant code
|
16
|
+
/ # Prefix/suffix divider
|
17
|
+
\S+ # DOI suffix
|
18
|
+
)
|
19
|
+
}x
|
20
|
+
VALID_ENDING = /
|
21
|
+
(?:
|
22
|
+
\p{^Punct} # Non-punctuation character
|
23
|
+
|
|
24
|
+
\(.+\) # Balanced parentheses
|
25
|
+
|
|
26
|
+
2-\# # Early Wiley DOI suffix
|
27
|
+
)
|
28
|
+
\z
|
29
|
+
/x
|
30
|
+
|
3
31
|
def self.extract(str)
|
4
|
-
str
|
32
|
+
str
|
33
|
+
.to_s
|
34
|
+
.downcase
|
35
|
+
.scan(PATTERN)
|
36
|
+
.map { |doi|
|
37
|
+
next doi if doi =~ VALID_ENDING
|
38
|
+
|
39
|
+
doi.sub(/\p{Punct}+\z/, '')
|
40
|
+
}
|
5
41
|
end
|
6
42
|
end
|
7
43
|
end
|
data/lib/identifiers/isbn.rb
CHANGED
@@ -27,7 +27,7 @@ module Identifiers
|
|
27
27
|
end
|
28
28
|
|
29
29
|
def self.isbn_13_check_digit(isbn)
|
30
|
-
sum = isbn.
|
30
|
+
sum = digits_of(isbn).zip([1, 3].cycle).map { |digit, weight| digit * weight }.reduce(:+)
|
31
31
|
check_digit = 10 - (sum % 10)
|
32
32
|
|
33
33
|
if check_digit == 10
|
@@ -40,7 +40,7 @@ module Identifiers
|
|
40
40
|
def self.valid_isbn_13?(isbn)
|
41
41
|
return false unless isbn =~ REGEX_13
|
42
42
|
|
43
|
-
result = isbn.
|
43
|
+
result = digits_of(isbn).zip([1, 3].cycle).map { |digit, weight| digit * weight }.reduce(:+)
|
44
44
|
|
45
45
|
(result % 10).zero?
|
46
46
|
end
|
@@ -48,11 +48,13 @@ module Identifiers
|
|
48
48
|
def self.valid_isbn_10?(isbn)
|
49
49
|
return false unless isbn =~ REGEX_10
|
50
50
|
|
51
|
-
result = isbn.
|
52
|
-
sum + (Integer(values[0].sub('X', '10')) * values[1].succ)
|
53
|
-
}
|
51
|
+
result = digits_of(isbn).with_index.map { |digit, weight| digit * weight.succ }.reduce(:+)
|
54
52
|
|
55
53
|
(result % 11).zero?
|
56
54
|
end
|
55
|
+
|
56
|
+
def self.digits_of(isbn)
|
57
|
+
isbn.each_char.map { |char| char == 'X' ? 10 : Integer(char) }.to_enum
|
58
|
+
end
|
57
59
|
end
|
58
60
|
end
|
@@ -2,13 +2,19 @@ require 'identifiers/doi'
|
|
2
2
|
|
3
3
|
RSpec.describe Identifiers::DOI do
|
4
4
|
it 'extracts DOIs from a string' do
|
5
|
-
str = 'This is an example of DOI: 10.1049/el.2013.3006'
|
5
|
+
str = 'This is an example of a DOI: 10.1049/el.2013.3006'
|
6
6
|
|
7
7
|
expect(described_class.extract(str)).to contain_exactly('10.1049/el.2013.3006')
|
8
8
|
end
|
9
9
|
|
10
|
-
it '
|
11
|
-
str = 'This is an example of DOI
|
10
|
+
it 'extracts DOIs from anywhere in a string' do
|
11
|
+
str = 'This is an example of a DOI - 10.1049/el.2013.3006 - which is entirely valid'
|
12
|
+
|
13
|
+
expect(described_class.extract(str)).to contain_exactly('10.1049/el.2013.3006')
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'downcases the DOIs extracted' do
|
17
|
+
str = 'This is an example of a DOI: 10.1097/01.ASW.0000443266.17665.19'
|
12
18
|
|
13
19
|
expect(described_class.extract(str)).to contain_exactly('10.1097/01.asw.0000443266.17665.19')
|
14
20
|
end
|
@@ -19,6 +25,10 @@ RSpec.describe Identifiers::DOI do
|
|
19
25
|
expect(described_class.extract(str)).to be_empty
|
20
26
|
end
|
21
27
|
|
28
|
+
it 'returns no DOIs if given nothing' do
|
29
|
+
expect(described_class.extract(nil)).to be_empty
|
30
|
+
end
|
31
|
+
|
22
32
|
it 'extracts ISBN-As' do
|
23
33
|
str = 'This is an ISBN-A: 10.978.8898392/315'
|
24
34
|
|
@@ -30,4 +40,40 @@ RSpec.describe Identifiers::DOI do
|
|
30
40
|
|
31
41
|
expect(described_class.extract(str)).to be_empty
|
32
42
|
end
|
43
|
+
|
44
|
+
it 'retains closing parentheses that are part of the DOI' do
|
45
|
+
str = 'This is an example of a DOI: 10.1130/2013.2502(04)'
|
46
|
+
|
47
|
+
expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502(04)')
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'discards trailing punctuation' do
|
51
|
+
str = 'This is an example of a DOI: 10.1130/2013.2502.'
|
52
|
+
|
53
|
+
expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'discards multiple contiguous trailing punctuation' do
|
57
|
+
str = 'This is an example of a DOI: 10.1130/2013.2502...",'
|
58
|
+
|
59
|
+
expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'discards trailing Unicode punctuation' do
|
63
|
+
str = 'This is an example of a DOI: 10.1130/2013.2502…'
|
64
|
+
|
65
|
+
expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'extracts particularly exotic DOIs' do
|
69
|
+
str = 'This is an example of an exotic DOI: 10.1002/(SICI)1096-8644(199601)99:1<135::AID-AJPA8>3.0.CO;2-#'
|
70
|
+
|
71
|
+
expect(described_class.extract(str)).to contain_exactly('10.1002/(sici)1096-8644(199601)99:1<135::aid-ajpa8>3.0.co;2-#')
|
72
|
+
end
|
73
|
+
|
74
|
+
it 'does not extract a closing parenthesis if not part of the DOI' do
|
75
|
+
str = '(This is an example of a DOI: 10.1130/2013.2502)'
|
76
|
+
|
77
|
+
expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
|
78
|
+
end
|
33
79
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: identifiers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Hernandez
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-
|
12
|
+
date: 2017-04-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: urn
|