identifiers 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 036d129df184be8b4e577950bbe37c10455a2d6d
4
- data.tar.gz: b35262b8180a444476fd45124ddffd5086ee3af3
3
+ metadata.gz: 20bb1b38cae83fa5173c1c5a955aa799ccb1dee1
4
+ data.tar.gz: 5885f3af6e8c0c5fef3b0418105602bdf721a0ee
5
5
  SHA512:
6
- metadata.gz: 5e1ed385ed71f64287445b99452d8905d597f097be0e235e5057d443de98a0f2762f6e624633814a7fa1d2eb7973750d7439ed7a57fa94f2bc4a9d3eb0d4f09a
7
- data.tar.gz: f2f32ba1157949782ed5db1d3cdbe28004d76b0743ecf1737719790e872dea024b63fc008b3c6d65873ea407f4ff8be3407856cb4b702f27ad5d57ecfa0ca41b
6
+ metadata.gz: 7ccfe0444e88dcc61b51c17d8ce431318c7854936604843d50fd56a96da88536ca15da90572af3352b853e9506e1c40638418d187fe359b2c8bc5ec6ab2eaa34
7
+ data.tar.gz: be40a3196e1c7e8f17786b53df685e7b6c4f70a182ad8363d4a1efb5e6d5471d939c2e44e89e7181a6781fab35a928ad874f086585bfa9317491cfbd7518fce8
@@ -2,6 +2,10 @@
2
2
  All notable changes to this project will be documented in this file. This
3
3
  project adheres to [Semantic Versioning](http://semver.org/).
4
4
 
5
+ ## [0.6.0] - 2017-04-08
6
+ ### Added
7
+ - Added support for valid DOIs ending in punctuation
8
+
5
9
  ## [0.5.0] - 2017-01-27
6
10
  ### Added
7
11
  - Added support for ISBN-As when extracting DOIs and ISBNs
@@ -32,3 +36,4 @@ project adheres to [Semantic Versioning](http://semver.org/).
32
36
  [0.3.1]: https://github.com/altmetric/identifiers/releases/tag/v0.3.1
33
37
  [0.4.0]: https://github.com/altmetric/identifiers/releases/tag/v0.4.0
34
38
  [0.5.0]: https://github.com/altmetric/identifiers/releases/tag/v0.5.0
39
+ [0.6.0]: https://github.com/altmetric/identifiers/releases/tag/v0.6.0
data/README.md CHANGED
@@ -18,7 +18,7 @@ Collection of utilities related to the extraction, validation and normalization
18
18
  Add this line to your application's `Gemfile`:
19
19
 
20
20
  ```ruby
21
- gem 'identifiers', '~> 0.5'
21
+ gem 'identifiers', '~> 0.6'
22
22
  ```
23
23
 
24
24
  And then execute:
@@ -69,6 +69,10 @@ For `URN`s, please check the [URN gem documentation](https://github.com/altmetri
69
69
 
70
70
  Bug reports and pull requests are welcome on GitHub at https://github.com/altmetric/identifiers.
71
71
 
72
+ ## Contributions
73
+
74
+ * Thanks to [Tom Stuart](https://github.com/tomstuart) for [cleaning up the ISBN check digit code](https://github.com/altmetric/identifiers/pull/10).
75
+
72
76
  ## PHP version
73
77
 
74
78
  We also maintain [a version of this library for PHP](https://github.com/altmetric/php-identifiers).
@@ -1,7 +1,43 @@
1
1
  module Identifiers
2
2
  class DOI
3
+ PATTERN = %r{
4
+ \b
5
+ 10 # Directory indicator (always 10)
6
+ \.
7
+ (?:
8
+ # ISBN-A
9
+ 97[89]\. # ISBN (GS1) Bookland prefix
10
+ \d{2,8} # ISBN registration group element and publisher prefix
11
+ / # Prefix/suffix divider
12
+ \d{1,7} # ISBN title enumerator and check digit
13
+ |
14
+ # DOI
15
+ \d{4,9} # Registrant code
16
+ / # Prefix/suffix divider
17
+ \S+ # DOI suffix
18
+ )
19
+ }x
20
+ VALID_ENDING = /
21
+ (?:
22
+ \p{^Punct} # Non-punctuation character
23
+ |
24
+ \(.+\) # Balanced parentheses
25
+ |
26
+ 2-\# # Early Wiley DOI suffix
27
+ )
28
+ \z
29
+ /x
30
+
3
31
  def self.extract(str)
4
- str.scan(%r{\b10\.(?:97[89]\.\d{2,8}/\d{1,7}|\d{4,9}/\S+)\b}).map(&:downcase)
32
+ str
33
+ .to_s
34
+ .downcase
35
+ .scan(PATTERN)
36
+ .map { |doi|
37
+ next doi if doi =~ VALID_ENDING
38
+
39
+ doi.sub(/\p{Punct}+\z/, '')
40
+ }
5
41
  end
6
42
  end
7
43
  end
@@ -27,7 +27,7 @@ module Identifiers
27
27
  end
28
28
 
29
29
  def self.isbn_13_check_digit(isbn)
30
- sum = isbn.each_char.zip([1, 3].cycle).reduce(0) { |sum, values| sum + (Integer(values[0]) * values[1]) }
30
+ sum = digits_of(isbn).zip([1, 3].cycle).map { |digit, weight| digit * weight }.reduce(:+)
31
31
  check_digit = 10 - (sum % 10)
32
32
 
33
33
  if check_digit == 10
@@ -40,7 +40,7 @@ module Identifiers
40
40
  def self.valid_isbn_13?(isbn)
41
41
  return false unless isbn =~ REGEX_13
42
42
 
43
- result = isbn.each_char.zip([1, 3].cycle).reduce(0) { |sum, values| sum + (Integer(values[0]) * values[1]) }
43
+ result = digits_of(isbn).zip([1, 3].cycle).map { |digit, weight| digit * weight }.reduce(:+)
44
44
 
45
45
  (result % 10).zero?
46
46
  end
@@ -48,11 +48,13 @@ module Identifiers
48
48
  def self.valid_isbn_10?(isbn)
49
49
  return false unless isbn =~ REGEX_10
50
50
 
51
- result = isbn.each_char.with_index.reduce(0) { |sum, values|
52
- sum + (Integer(values[0].sub('X', '10')) * values[1].succ)
53
- }
51
+ result = digits_of(isbn).with_index.map { |digit, weight| digit * weight.succ }.reduce(:+)
54
52
 
55
53
  (result % 11).zero?
56
54
  end
55
+
56
+ def self.digits_of(isbn)
57
+ isbn.each_char.map { |char| char == 'X' ? 10 : Integer(char) }.to_enum
58
+ end
57
59
  end
58
60
  end
@@ -2,13 +2,19 @@ require 'identifiers/doi'
2
2
 
3
3
  RSpec.describe Identifiers::DOI do
4
4
  it 'extracts DOIs from a string' do
5
- str = 'This is an example of DOI: 10.1049/el.2013.3006'
5
+ str = 'This is an example of a DOI: 10.1049/el.2013.3006'
6
6
 
7
7
  expect(described_class.extract(str)).to contain_exactly('10.1049/el.2013.3006')
8
8
  end
9
9
 
10
- it 'downcase the DOIs extracted' do
11
- str = 'This is an example of DOI: 10.1097/01.ASW.0000443266.17665.19'
10
+ it 'extracts DOIs from anywhere in a string' do
11
+ str = 'This is an example of a DOI - 10.1049/el.2013.3006 - which is entirely valid'
12
+
13
+ expect(described_class.extract(str)).to contain_exactly('10.1049/el.2013.3006')
14
+ end
15
+
16
+ it 'downcases the DOIs extracted' do
17
+ str = 'This is an example of a DOI: 10.1097/01.ASW.0000443266.17665.19'
12
18
 
13
19
  expect(described_class.extract(str)).to contain_exactly('10.1097/01.asw.0000443266.17665.19')
14
20
  end
@@ -19,6 +25,10 @@ RSpec.describe Identifiers::DOI do
19
25
  expect(described_class.extract(str)).to be_empty
20
26
  end
21
27
 
28
+ it 'returns no DOIs if given nothing' do
29
+ expect(described_class.extract(nil)).to be_empty
30
+ end
31
+
22
32
  it 'extracts ISBN-As' do
23
33
  str = 'This is an ISBN-A: 10.978.8898392/315'
24
34
 
@@ -30,4 +40,40 @@ RSpec.describe Identifiers::DOI do
30
40
 
31
41
  expect(described_class.extract(str)).to be_empty
32
42
  end
43
+
44
+ it 'retains closing parentheses that are part of the DOI' do
45
+ str = 'This is an example of a DOI: 10.1130/2013.2502(04)'
46
+
47
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502(04)')
48
+ end
49
+
50
+ it 'discards trailing punctuation' do
51
+ str = 'This is an example of a DOI: 10.1130/2013.2502.'
52
+
53
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
54
+ end
55
+
56
+ it 'discards multiple contiguous trailing punctuation' do
57
+ str = 'This is an example of a DOI: 10.1130/2013.2502...",'
58
+
59
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
60
+ end
61
+
62
+ it 'discards trailing Unicode punctuation' do
63
+ str = 'This is an example of a DOI: 10.1130/2013.2502…'
64
+
65
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
66
+ end
67
+
68
+ it 'extracts particularly exotic DOIs' do
69
+ str = 'This is an example of an exotic DOI: 10.1002/(SICI)1096-8644(199601)99:1<135::AID-AJPA8>3.0.CO;2-#'
70
+
71
+ expect(described_class.extract(str)).to contain_exactly('10.1002/(sici)1096-8644(199601)99:1<135::aid-ajpa8>3.0.co;2-#')
72
+ end
73
+
74
+ it 'does not extract a closing parenthesis if not part of the DOI' do
75
+ str = '(This is an example of a DOI: 10.1130/2013.2502)'
76
+
77
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
78
+ end
33
79
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: identifiers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Hernandez
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2017-01-27 00:00:00.000000000 Z
12
+ date: 2017-04-08 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: urn