identifiers 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 20bb1b38cae83fa5173c1c5a955aa799ccb1dee1
4
- data.tar.gz: 5885f3af6e8c0c5fef3b0418105602bdf721a0ee
3
+ metadata.gz: d5c6a569245a2c9931eb0fd3a4f4e5e234254c6d
4
+ data.tar.gz: 41d5ea41e4fbf1bc967d27f671e3777991907d3a
5
5
  SHA512:
6
- metadata.gz: 7ccfe0444e88dcc61b51c17d8ce431318c7854936604843d50fd56a96da88536ca15da90572af3352b853e9506e1c40638418d187fe359b2c8bc5ec6ab2eaa34
7
- data.tar.gz: be40a3196e1c7e8f17786b53df685e7b6c4f70a182ad8363d4a1efb5e6d5471d939c2e44e89e7181a6781fab35a928ad874f086585bfa9317491cfbd7518fce8
6
+ metadata.gz: 08707a69b6d6143e13db5f8b3a4d8e58f54089fd37a7382296898ce4f5ac6b6dcb8ccd7a99535009074ce679d47f85fe0c51737cfb7f3b019b39b3855e000716
7
+ data.tar.gz: 03d64f650ebe7e4c411ad77c98d83c1a58be7eb3a034183d158cd615584a4d7fd5beb2a7197cdf39ee64450cc9e09c06162f526bef7238b92c811e6704e2dffd
data/CHANGELOG.md CHANGED
@@ -2,6 +2,10 @@
2
2
  All notable changes to this project will be documented in this file. This
3
3
  project adheres to [Semantic Versioning](http://semver.org/).
4
4
 
5
+ ## [0.7.0] - 2017-04-10
6
+ ### Added
7
+ - Added support for cleaning trailing punctuation from DOIs that also end in punctuation
8
+
5
9
  ## [0.6.0] - 2017-04-08
6
10
  ### Added
7
11
  - Added support for valid DOIs ending in punctuation
@@ -37,3 +41,4 @@ project adheres to [Semantic Versioning](http://semver.org/).
37
41
  [0.4.0]: https://github.com/altmetric/identifiers/releases/tag/v0.4.0
38
42
  [0.5.0]: https://github.com/altmetric/identifiers/releases/tag/v0.5.0
39
43
  [0.6.0]: https://github.com/altmetric/identifiers/releases/tag/v0.6.0
44
+ [0.7.0]: https://github.com/altmetric/identifiers/releases/tag/v0.7.0
data/LICENSE.txt CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2016 Altmetric LLP
3
+ Copyright (c) 2016-2017 Altmetric LLP
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -18,7 +18,7 @@ Collection of utilities related to the extraction, validation and normalization
18
18
  Add this line to your application's `Gemfile`:
19
19
 
20
20
  ```ruby
21
- gem 'identifiers', '~> 0.6'
21
+ gem 'identifiers', '~> 0.7'
22
22
  ```
23
23
 
24
24
  And then execute:
@@ -79,6 +79,6 @@ We also maintain [a version of this library for PHP](https://github.com/altmetri
79
79
 
80
80
  ## License
81
81
 
82
- Copyright © 2016 Altmetric LLP
82
+ Copyright © 2016-2017 Altmetric LLP
83
83
 
84
84
  Distributed under the [MIT License](http://opensource.org/licenses/MIT).
@@ -33,11 +33,21 @@ module Identifiers
33
33
  .to_s
34
34
  .downcase
35
35
  .scan(PATTERN)
36
- .map { |doi|
37
- next doi if doi =~ VALID_ENDING
36
+ .map { |doi| strip_punctuation(doi) }
37
+ .compact
38
+ end
39
+
40
+ def self.extract_one(str)
41
+ match = str.to_s.downcase[PATTERN]
42
+ return unless match
43
+
44
+ strip_punctuation(match)
45
+ end
46
+
47
+ def self.strip_punctuation(doi)
48
+ return doi if doi =~ VALID_ENDING
38
49
 
39
- doi.sub(/\p{Punct}+\z/, '')
40
- }
50
+ extract_one(doi.sub(/\p{Punct}\z/, ''))
41
51
  end
42
52
  end
43
53
  end
@@ -65,8 +65,8 @@ RSpec.describe Identifiers::DOI do
65
65
  expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
66
66
  end
67
67
 
68
- it 'extracts particularly exotic DOIs' do
69
- str = 'This is an example of an exotic DOI: 10.1002/(SICI)1096-8644(199601)99:1<135::AID-AJPA8>3.0.CO;2-#'
68
+ it 'extracts old Wiley DOIs' do
69
+ str = 'This is an example of an old Wiley DOI: 10.1002/(SICI)1096-8644(199601)99:1<135::AID-AJPA8>3.0.CO;2-#'
70
70
 
71
71
  expect(described_class.extract(str)).to contain_exactly('10.1002/(sici)1096-8644(199601)99:1<135::aid-ajpa8>3.0.co;2-#')
72
72
  end
@@ -76,4 +76,38 @@ RSpec.describe Identifiers::DOI do
76
76
 
77
77
  expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
78
78
  end
79
+
80
+ it 'discards trailing punctuation from old Wiley DOIs' do
81
+ str = 'This is an example of an old Wiley DOI: 10.1002/(SICI)1096-8644(199601)99:1<135::AID-AJPA8>3.0.CO;2-#",'
82
+
83
+ expect(described_class.extract(str)).to contain_exactly('10.1002/(sici)1096-8644(199601)99:1<135::aid-ajpa8>3.0.co;2-#')
84
+ end
85
+
86
+ it 'discards trailing punctuation after balanced parentheses' do
87
+ str = 'This is an example of a DOI: This is an example of a DOI: 10.1130/2013.2502(04).'
88
+
89
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502(04)')
90
+ end
91
+
92
+ it 'discards contiguous trailing punctuation after balanced parentheses' do
93
+ str = 'This is an example of a DOI: This is an example of a DOI: 10.1130/2013.2502(04).",'
94
+
95
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502(04)')
96
+ end
97
+
98
+ it 'discards trailing Unicode punctuation after balanced parentheses' do
99
+ str = 'This is an example of a DOI: 10.1130/2013.2502(04)…",'
100
+
101
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502(04)')
102
+ end
103
+
104
+ it 'discards contiguous trailing punctuation after unbalanced parentheses' do
105
+ str = '(This is an example of a DOI: 10.1130/2013.2502).",'
106
+
107
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
108
+ end
109
+
110
+ it 'does not extract DOIs with purely punctuation suffixes' do
111
+ expect(described_class.extract('10.1130/!).",')).to be_empty
112
+ end
79
113
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: identifiers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Hernandez
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2017-04-08 00:00:00.000000000 Z
12
+ date: 2017-04-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: urn