identifiers 0.6.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 20bb1b38cae83fa5173c1c5a955aa799ccb1dee1
4
- data.tar.gz: 5885f3af6e8c0c5fef3b0418105602bdf721a0ee
3
+ metadata.gz: d5c6a569245a2c9931eb0fd3a4f4e5e234254c6d
4
+ data.tar.gz: 41d5ea41e4fbf1bc967d27f671e3777991907d3a
5
5
  SHA512:
6
- metadata.gz: 7ccfe0444e88dcc61b51c17d8ce431318c7854936604843d50fd56a96da88536ca15da90572af3352b853e9506e1c40638418d187fe359b2c8bc5ec6ab2eaa34
7
- data.tar.gz: be40a3196e1c7e8f17786b53df685e7b6c4f70a182ad8363d4a1efb5e6d5471d939c2e44e89e7181a6781fab35a928ad874f086585bfa9317491cfbd7518fce8
6
+ metadata.gz: 08707a69b6d6143e13db5f8b3a4d8e58f54089fd37a7382296898ce4f5ac6b6dcb8ccd7a99535009074ce679d47f85fe0c51737cfb7f3b019b39b3855e000716
7
+ data.tar.gz: 03d64f650ebe7e4c411ad77c98d83c1a58be7eb3a034183d158cd615584a4d7fd5beb2a7197cdf39ee64450cc9e09c06162f526bef7238b92c811e6704e2dffd
data/CHANGELOG.md CHANGED
@@ -2,6 +2,10 @@
2
2
  All notable changes to this project will be documented in this file. This
3
3
  project adheres to [Semantic Versioning](http://semver.org/).
4
4
 
5
+ ## [0.7.0] - 2017-04-10
6
+ ### Added
7
+ - Added support for cleaning trailing punctuation from DOIs that also end in punctuation
8
+
5
9
  ## [0.6.0] - 2017-04-08
6
10
  ### Added
7
11
  - Added support for valid DOIs ending in punctuation
@@ -37,3 +41,4 @@ project adheres to [Semantic Versioning](http://semver.org/).
37
41
  [0.4.0]: https://github.com/altmetric/identifiers/releases/tag/v0.4.0
38
42
  [0.5.0]: https://github.com/altmetric/identifiers/releases/tag/v0.5.0
39
43
  [0.6.0]: https://github.com/altmetric/identifiers/releases/tag/v0.6.0
44
+ [0.7.0]: https://github.com/altmetric/identifiers/releases/tag/v0.7.0
data/LICENSE.txt CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2016 Altmetric LLP
3
+ Copyright (c) 2016-2017 Altmetric LLP
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -18,7 +18,7 @@ Collection of utilities related to the extraction, validation and normalization
18
18
  Add this line to your application's `Gemfile`:
19
19
 
20
20
  ```ruby
21
- gem 'identifiers', '~> 0.6'
21
+ gem 'identifiers', '~> 0.7'
22
22
  ```
23
23
 
24
24
  And then execute:
@@ -79,6 +79,6 @@ We also maintain [a version of this library for PHP](https://github.com/altmetri
79
79
 
80
80
  ## License
81
81
 
82
- Copyright © 2016 Altmetric LLP
82
+ Copyright © 2016-2017 Altmetric LLP
83
83
 
84
84
  Distributed under the [MIT License](http://opensource.org/licenses/MIT).
@@ -33,11 +33,21 @@ module Identifiers
33
33
  .to_s
34
34
  .downcase
35
35
  .scan(PATTERN)
36
- .map { |doi|
37
- next doi if doi =~ VALID_ENDING
36
+ .map { |doi| strip_punctuation(doi) }
37
+ .compact
38
+ end
39
+
40
+ def self.extract_one(str)
41
+ match = str.to_s.downcase[PATTERN]
42
+ return unless match
43
+
44
+ strip_punctuation(match)
45
+ end
46
+
47
+ def self.strip_punctuation(doi)
48
+ return doi if doi =~ VALID_ENDING
38
49
 
39
- doi.sub(/\p{Punct}+\z/, '')
40
- }
50
+ extract_one(doi.sub(/\p{Punct}\z/, ''))
41
51
  end
42
52
  end
43
53
  end
@@ -65,8 +65,8 @@ RSpec.describe Identifiers::DOI do
65
65
  expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
66
66
  end
67
67
 
68
- it 'extracts particularly exotic DOIs' do
69
- str = 'This is an example of an exotic DOI: 10.1002/(SICI)1096-8644(199601)99:1<135::AID-AJPA8>3.0.CO;2-#'
68
+ it 'extracts old Wiley DOIs' do
69
+ str = 'This is an example of an old Wiley DOI: 10.1002/(SICI)1096-8644(199601)99:1<135::AID-AJPA8>3.0.CO;2-#'
70
70
 
71
71
  expect(described_class.extract(str)).to contain_exactly('10.1002/(sici)1096-8644(199601)99:1<135::aid-ajpa8>3.0.co;2-#')
72
72
  end
@@ -76,4 +76,38 @@ RSpec.describe Identifiers::DOI do
76
76
 
77
77
  expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
78
78
  end
79
+
80
+ it 'discards trailing punctuation from old Wiley DOIs' do
81
+ str = 'This is an example of an old Wiley DOI: 10.1002/(SICI)1096-8644(199601)99:1<135::AID-AJPA8>3.0.CO;2-#",'
82
+
83
+ expect(described_class.extract(str)).to contain_exactly('10.1002/(sici)1096-8644(199601)99:1<135::aid-ajpa8>3.0.co;2-#')
84
+ end
85
+
86
+ it 'discards trailing punctuation after balanced parentheses' do
87
+ str = 'This is an example of a DOI: This is an example of a DOI: 10.1130/2013.2502(04).'
88
+
89
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502(04)')
90
+ end
91
+
92
+ it 'discards contiguous trailing punctuation after balanced parentheses' do
93
+ str = 'This is an example of a DOI: This is an example of a DOI: 10.1130/2013.2502(04).",'
94
+
95
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502(04)')
96
+ end
97
+
98
+ it 'discards trailing Unicode punctuation after balanced parentheses' do
99
+ str = 'This is an example of a DOI: 10.1130/2013.2502(04)…",'
100
+
101
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502(04)')
102
+ end
103
+
104
+ it 'discards contiguous trailing punctuation after unbalanced parentheses' do
105
+ str = '(This is an example of a DOI: 10.1130/2013.2502).",'
106
+
107
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
108
+ end
109
+
110
+ it 'does not extract DOIs with purely punctuation suffixes' do
111
+ expect(described_class.extract('10.1130/!).",')).to be_empty
112
+ end
79
113
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: identifiers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Hernandez
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2017-04-08 00:00:00.000000000 Z
12
+ date: 2017-04-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: urn