identifiers 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 036d129df184be8b4e577950bbe37c10455a2d6d
4
- data.tar.gz: b35262b8180a444476fd45124ddffd5086ee3af3
3
+ metadata.gz: 20bb1b38cae83fa5173c1c5a955aa799ccb1dee1
4
+ data.tar.gz: 5885f3af6e8c0c5fef3b0418105602bdf721a0ee
5
5
  SHA512:
6
- metadata.gz: 5e1ed385ed71f64287445b99452d8905d597f097be0e235e5057d443de98a0f2762f6e624633814a7fa1d2eb7973750d7439ed7a57fa94f2bc4a9d3eb0d4f09a
7
- data.tar.gz: f2f32ba1157949782ed5db1d3cdbe28004d76b0743ecf1737719790e872dea024b63fc008b3c6d65873ea407f4ff8be3407856cb4b702f27ad5d57ecfa0ca41b
6
+ metadata.gz: 7ccfe0444e88dcc61b51c17d8ce431318c7854936604843d50fd56a96da88536ca15da90572af3352b853e9506e1c40638418d187fe359b2c8bc5ec6ab2eaa34
7
+ data.tar.gz: be40a3196e1c7e8f17786b53df685e7b6c4f70a182ad8363d4a1efb5e6d5471d939c2e44e89e7181a6781fab35a928ad874f086585bfa9317491cfbd7518fce8
@@ -2,6 +2,10 @@
2
2
  All notable changes to this project will be documented in this file. This
3
3
  project adheres to [Semantic Versioning](http://semver.org/).
4
4
 
5
+ ## [0.6.0] - 2017-04-08
6
+ ### Added
7
+ - Added support for valid DOIs ending in punctuation
8
+
5
9
  ## [0.5.0] - 2017-01-27
6
10
  ### Added
7
11
  - Added support for ISBN-As when extracting DOIs and ISBNs
@@ -32,3 +36,4 @@ project adheres to [Semantic Versioning](http://semver.org/).
32
36
  [0.3.1]: https://github.com/altmetric/identifiers/releases/tag/v0.3.1
33
37
  [0.4.0]: https://github.com/altmetric/identifiers/releases/tag/v0.4.0
34
38
  [0.5.0]: https://github.com/altmetric/identifiers/releases/tag/v0.5.0
39
+ [0.6.0]: https://github.com/altmetric/identifiers/releases/tag/v0.6.0
data/README.md CHANGED
@@ -18,7 +18,7 @@ Collection of utilities related to the extraction, validation and normalization
18
18
  Add this line to your application's `Gemfile`:
19
19
 
20
20
  ```ruby
21
- gem 'identifiers', '~> 0.5'
21
+ gem 'identifiers', '~> 0.6'
22
22
  ```
23
23
 
24
24
  And then execute:
@@ -69,6 +69,10 @@ For `URN`s, please check the [URN gem documentation](https://github.com/altmetri
69
69
 
70
70
  Bug reports and pull requests are welcome on GitHub at https://github.com/altmetric/identifiers.
71
71
 
72
+ ## Contributions
73
+
74
+ * Thanks to [Tom Stuart](https://github.com/tomstuart) for [cleaning up the ISBN check digit code](https://github.com/altmetric/identifiers/pull/10).
75
+
72
76
  ## PHP version
73
77
 
74
78
  We also maintain [a version of this library for PHP](https://github.com/altmetric/php-identifiers).
@@ -1,7 +1,43 @@
1
1
  module Identifiers
2
2
  class DOI
3
+ PATTERN = %r{
4
+ \b
5
+ 10 # Directory indicator (always 10)
6
+ \.
7
+ (?:
8
+ # ISBN-A
9
+ 97[89]\. # ISBN (GS1) Bookland prefix
10
+ \d{2,8} # ISBN registration group element and publisher prefix
11
+ / # Prefix/suffix divider
12
+ \d{1,7} # ISBN title enumerator and check digit
13
+ |
14
+ # DOI
15
+ \d{4,9} # Registrant code
16
+ / # Prefix/suffix divider
17
+ \S+ # DOI suffix
18
+ )
19
+ }x
20
+ VALID_ENDING = /
21
+ (?:
22
+ \p{^Punct} # Non-punctuation character
23
+ |
24
+ \(.+\) # Balanced parentheses
25
+ |
26
+ 2-\# # Early Wiley DOI suffix
27
+ )
28
+ \z
29
+ /x
30
+
3
31
  def self.extract(str)
4
- str.scan(%r{\b10\.(?:97[89]\.\d{2,8}/\d{1,7}|\d{4,9}/\S+)\b}).map(&:downcase)
32
+ str
33
+ .to_s
34
+ .downcase
35
+ .scan(PATTERN)
36
+ .map { |doi|
37
+ next doi if doi =~ VALID_ENDING
38
+
39
+ doi.sub(/\p{Punct}+\z/, '')
40
+ }
5
41
  end
6
42
  end
7
43
  end
@@ -27,7 +27,7 @@ module Identifiers
27
27
  end
28
28
 
29
29
  def self.isbn_13_check_digit(isbn)
30
- sum = isbn.each_char.zip([1, 3].cycle).reduce(0) { |sum, values| sum + (Integer(values[0]) * values[1]) }
30
+ sum = digits_of(isbn).zip([1, 3].cycle).map { |digit, weight| digit * weight }.reduce(:+)
31
31
  check_digit = 10 - (sum % 10)
32
32
 
33
33
  if check_digit == 10
@@ -40,7 +40,7 @@ module Identifiers
40
40
  def self.valid_isbn_13?(isbn)
41
41
  return false unless isbn =~ REGEX_13
42
42
 
43
- result = isbn.each_char.zip([1, 3].cycle).reduce(0) { |sum, values| sum + (Integer(values[0]) * values[1]) }
43
+ result = digits_of(isbn).zip([1, 3].cycle).map { |digit, weight| digit * weight }.reduce(:+)
44
44
 
45
45
  (result % 10).zero?
46
46
  end
@@ -48,11 +48,13 @@ module Identifiers
48
48
  def self.valid_isbn_10?(isbn)
49
49
  return false unless isbn =~ REGEX_10
50
50
 
51
- result = isbn.each_char.with_index.reduce(0) { |sum, values|
52
- sum + (Integer(values[0].sub('X', '10')) * values[1].succ)
53
- }
51
+ result = digits_of(isbn).with_index.map { |digit, weight| digit * weight.succ }.reduce(:+)
54
52
 
55
53
  (result % 11).zero?
56
54
  end
55
+
56
+ def self.digits_of(isbn)
57
+ isbn.each_char.map { |char| char == 'X' ? 10 : Integer(char) }.to_enum
58
+ end
57
59
  end
58
60
  end
@@ -2,13 +2,19 @@ require 'identifiers/doi'
2
2
 
3
3
  RSpec.describe Identifiers::DOI do
4
4
  it 'extracts DOIs from a string' do
5
- str = 'This is an example of DOI: 10.1049/el.2013.3006'
5
+ str = 'This is an example of a DOI: 10.1049/el.2013.3006'
6
6
 
7
7
  expect(described_class.extract(str)).to contain_exactly('10.1049/el.2013.3006')
8
8
  end
9
9
 
10
- it 'downcase the DOIs extracted' do
11
- str = 'This is an example of DOI: 10.1097/01.ASW.0000443266.17665.19'
10
+ it 'extracts DOIs from anywhere in a string' do
11
+ str = 'This is an example of a DOI - 10.1049/el.2013.3006 - which is entirely valid'
12
+
13
+ expect(described_class.extract(str)).to contain_exactly('10.1049/el.2013.3006')
14
+ end
15
+
16
+ it 'downcases the DOIs extracted' do
17
+ str = 'This is an example of a DOI: 10.1097/01.ASW.0000443266.17665.19'
12
18
 
13
19
  expect(described_class.extract(str)).to contain_exactly('10.1097/01.asw.0000443266.17665.19')
14
20
  end
@@ -19,6 +25,10 @@ RSpec.describe Identifiers::DOI do
19
25
  expect(described_class.extract(str)).to be_empty
20
26
  end
21
27
 
28
+ it 'returns no DOIs if given nothing' do
29
+ expect(described_class.extract(nil)).to be_empty
30
+ end
31
+
22
32
  it 'extracts ISBN-As' do
23
33
  str = 'This is an ISBN-A: 10.978.8898392/315'
24
34
 
@@ -30,4 +40,40 @@ RSpec.describe Identifiers::DOI do
30
40
 
31
41
  expect(described_class.extract(str)).to be_empty
32
42
  end
43
+
44
+ it 'retains closing parentheses that are part of the DOI' do
45
+ str = 'This is an example of a DOI: 10.1130/2013.2502(04)'
46
+
47
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502(04)')
48
+ end
49
+
50
+ it 'discards trailing punctuation' do
51
+ str = 'This is an example of a DOI: 10.1130/2013.2502.'
52
+
53
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
54
+ end
55
+
56
+ it 'discards multiple contiguous trailing punctuation' do
57
+ str = 'This is an example of a DOI: 10.1130/2013.2502...",'
58
+
59
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
60
+ end
61
+
62
+ it 'discards trailing Unicode punctuation' do
63
+ str = 'This is an example of a DOI: 10.1130/2013.2502…'
64
+
65
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
66
+ end
67
+
68
+ it 'extracts particularly exotic DOIs' do
69
+ str = 'This is an example of an exotic DOI: 10.1002/(SICI)1096-8644(199601)99:1<135::AID-AJPA8>3.0.CO;2-#'
70
+
71
+ expect(described_class.extract(str)).to contain_exactly('10.1002/(sici)1096-8644(199601)99:1<135::aid-ajpa8>3.0.co;2-#')
72
+ end
73
+
74
+ it 'does not extract a closing parenthesis if not part of the DOI' do
75
+ str = '(This is an example of a DOI: 10.1130/2013.2502)'
76
+
77
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
78
+ end
33
79
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: identifiers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Hernandez
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2017-01-27 00:00:00.000000000 Z
12
+ date: 2017-04-08 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: urn