identifiers 0.12.1 → 0.14.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 423438daad2350706eced26425401a5d97ae715095b54853fe55ba4f8dbbaf2d
4
- data.tar.gz: 2776accbdccfc17965dd69507a0b326b2a57e275811828b05e76b47c0f54bb28
3
+ metadata.gz: 911c52af8788814413165b25ec5dfdace4cd213906dd193216c720c31cb6d3de
4
+ data.tar.gz: fb75b4b356a1e87711f7b0d9a820d938d4dbb2d28730c5c8738d105f947cae00
5
5
  SHA512:
6
- metadata.gz: e319511db960df762b3a646239edf45ff683bef6227736c673072de6ef5d1649dd300232f93c0af0be1d7744308de4f07ebfd53582add6069c6b3f425249722b
7
- data.tar.gz: 1ebd31499facbc5a51ec0dcf30fadb920e86ad6e53c20f4bb5dd66fa11c9f9cc7842a8b703348f13aac2675c0415c2c0f9464c42baafd055669c881342c33217
6
+ metadata.gz: 72881f5981cec05c2273e8c76912a8853edaa5b6a0d0cc69a298daedde4037ee98080903fcf9c80f8290a9fe79d7e0ced24ec680dd9f92c7756c9e76c70e3bbe
7
+ data.tar.gz: 1255cb693b80e63ccd19e2ed0c2c5962319467c6635e8713cf7185e5e6c91dc7f952e6b614f5c1131cef45586c06fff12c4eb38e7803b8bd0cb1dfec8b0134e0
data/CHANGELOG.md CHANGED
@@ -2,6 +2,16 @@
2
2
  All notable changes to this project will be documented in this file. This
3
3
  project adheres to [Semantic Versioning](http://semver.org/).
4
4
 
5
+ ## [0.14.0] - 2024-07-30
6
+ ### Added
7
+ - Added optional prefixes argument to ISBNs extraction.
8
+ If passed `.extract` will only match series of numbers that are preceded by any of the passed prefixes
9
+
10
+ ## [0.13.0] - 2019-09-04
11
+ ### Added
12
+ - Added new mode to the DOI extraction, so that it doesn't strip trailing
13
+ periods when in `strict` mode
14
+
5
15
  ## [0.12.1] - 2018-04-09
6
16
  ### Fixed
7
17
  - Restored support for extracting hyphenated ISBN-10s with registration group
@@ -88,3 +98,4 @@ project adheres to [Semantic Versioning](http://semver.org/).
88
98
  [0.11.0]: https://github.com/altmetric/identifiers/releases/tag/v0.11.0
89
99
  [0.12.0]: https://github.com/altmetric/identifiers/releases/tag/v0.12.0
90
100
  [0.12.1]: https://github.com/altmetric/identifiers/releases/tag/v0.12.1
101
+ [0.13.0]: https://github.com/altmetric/identifiers/releases/tag/v0.13.0
data/README.md CHANGED
@@ -32,18 +32,34 @@ Or install it yourself as:
32
32
  ## Usage
33
33
 
34
34
  ```ruby
35
- Identifiers::DOI.extract('example: 10.123/abcd.efghi')
36
- # => ["10.123/abcd.efghi"]
35
+ Identifiers::DOI.extract('example: 10.1234/5678.ABC')
36
+ # => ["10.1234/5678.abc"]
37
37
 
38
38
  Identifiers::DOI.extract('no DOIs here')
39
39
  # => []
40
40
 
41
41
  Identifiers::URN.new('urn:abc:123')
42
42
  # => #<URN:0x007ff11c13d930 @urn="urn:abc:123", @nid="abc", @nss="123">
43
+
43
44
  Identifiers::URN('urn:abc:123')
44
45
  # => #<URN:0x007ff11c0ff568 @urn="urn:abc:123", @nid="abc", @nss="123">
45
46
  ```
46
47
 
48
+ A small percentage of DOIs end in trailing `.`. However, having trailing periods
49
+ being returned by the default extraction method would possibly return quite a few
50
+ false positives.
51
+ `DOI.extract` accepts a `strict` option, which can be set to true if we prefer to
52
+ return DOIs ending in `.`. By default, this option is set to `false`, which strips
53
+ any trailing `.`:
54
+
55
+ ```ruby
56
+ Identifiers::DOI.extract('example: 10.1234/5678.abc.', strict: true)
57
+ # => ["10.1234/5678.abc."]
58
+
59
+ Identifiers::DOI.extract('example: 10.1234/5678.abc.')
60
+ # => ["10.1234/5678.abc"]
61
+ ```
62
+
47
63
  ## By identifier
48
64
 
49
65
  `.extract` is a common method that works across all the supported identifiers.
@@ -61,6 +77,17 @@ Identifiers::RepecId.extract('')
61
77
  Identifiers::URN.extract('')
62
78
  ```
63
79
 
80
+ For `ISBN`s `.extract`, you can pass an array of prefixes as an optional parameter when you want to exclude matches that are not preceded by those prefixes (it is case insensitive and ignores ':' and extra whitespaces):
81
+
82
+ ```ruby
83
+ Identifiers::ISBN.extract(
84
+ "IsBN:9789992158104 \n isbn-10 9789971502102 \n ISBN-13: 9789604250592 \n 9788090273412",
85
+ ["ISBN", "ISBN-10"]
86
+ )
87
+ # => ["9789992158104", "9789971502102"]
88
+ ```
89
+
90
+
64
91
  But for some identifiers might have more. Check [their implementation](https://github.com/altmetric/identifiers/tree/master/lib/identifiers) to see all the methods available.
65
92
 
66
93
  For `URN`s, please check the [URN gem documentation](https://github.com/altmetric/urn) to see all the available options.
@@ -24,11 +24,17 @@ module Identifiers
24
24
  |
25
25
  [^[:space:]]+(?![[:space:]])\p{^P} # Suffix ending in non-punctuation
26
26
  )
27
+ \.{0,3} # Allow a DOI to end with up to 3 .
27
28
  )
28
29
  }x
29
30
 
30
- def self.extract(str)
31
- str.to_s.downcase.scan(REGEXP)
31
+ def self.extract(str, options = {})
32
+ strict = options.fetch(:strict, false)
33
+
34
+ dois = str.to_s.downcase.scan(REGEXP)
35
+ dois = dois.map { |doi| doi.gsub(/\.+$/, '') } unless strict
36
+
37
+ dois
32
38
  end
33
39
  end
34
40
  end
@@ -39,11 +39,25 @@ module Identifiers
39
39
  \d{1,7} # ISBN title enumerator and check digit
40
40
  \b
41
41
  }x
42
+ TEXT_AFTER_PREFIX_REGEXP = ':?\s*(\d.*)$'.freeze
43
+
44
+ def self.extract(str , prefixes = [])
45
+ str = match_strings_with_prefix(str , prefixes) if prefixes.any?
42
46
 
43
- def self.extract(str)
44
47
  extract_isbn_as(str) + extract_thirteen_digit_isbns(str) + extract_ten_digit_isbns(str)
45
48
  end
46
49
 
50
+ def self.match_strings_with_prefix(str, prefixes)
51
+ prefix_regexp = prefixes.join('|')
52
+
53
+ str
54
+ .to_s
55
+ .scan(/(#{prefix_regexp})#{TEXT_AFTER_PREFIX_REGEXP}/i)
56
+ .inject('') do |acum, (_prefix, match)|
57
+ acum + "#{match} \n "
58
+ end
59
+ end
60
+
47
61
  def self.extract_isbn_as(str)
48
62
  extract_thirteen_digit_isbns(str.to_s.scan(ISBN_A_REGEXP).join("\n").tr('/.', ''))
49
63
  end
@@ -1,151 +1,195 @@
1
1
  require 'identifiers/doi'
2
2
 
3
3
  RSpec.describe Identifiers::DOI do
4
- it 'extracts DOIs from a string' do
5
- str = 'This is an example of a DOI: 10.1049/el.2013.3006'
4
+ OPTIONS = [{ strict: false }, { strict: true }].freeze
6
5
 
7
- expect(described_class.extract(str)).to contain_exactly('10.1049/el.2013.3006')
6
+ def each_doi(file)
7
+ Pathname.new(__FILE__).join('..', '..', 'fixtures', file).each_line do |doi|
8
+ yield(doi.chomp!)
9
+ end
8
10
  end
9
11
 
10
- it 'extracts DOIs from anywhere in a string' do
11
- str = 'This is an example of a DOI - 10.1049/el.2013.3006 - which is entirely valid'
12
+ OPTIONS.each do |options|
13
+ context "when extracting with options set to #{options.inspect}" do
14
+ it 'extracts DOIs from a string' do
15
+ str = 'This is an example of a DOI: 10.1049/el.2013.3006'
12
16
 
13
- expect(described_class.extract(str)).to contain_exactly('10.1049/el.2013.3006')
14
- end
17
+ expect(described_class.extract(str, options)).to contain_exactly('10.1049/el.2013.3006')
18
+ end
15
19
 
16
- it 'downcases the DOIs extracted' do
17
- str = 'This is an example of a DOI: 10.1097/01.ASW.0000443266.17665.19'
20
+ it 'extracts DOIs from anywhere in a string' do
21
+ str = 'This is an example of a DOI - 10.1049/el.2013.3006 - which is entirely valid'
18
22
 
19
- expect(described_class.extract(str)).to contain_exactly('10.1097/01.asw.0000443266.17665.19')
20
- end
23
+ expect(described_class.extract(str, options)).to contain_exactly('10.1049/el.2013.3006')
24
+ end
21
25
 
22
- it 'does not extract a PubMed ID' do
23
- str = 'This is NOT a DOI: 123456'
26
+ it 'downcases the DOIs extracted' do
27
+ str = 'This is an example of a DOI: 10.1097/01.ASW.0000443266.17665.19'
24
28
 
25
- expect(described_class.extract(str)).to be_empty
26
- end
29
+ expect(described_class.extract(str, options)).to contain_exactly('10.1097/01.asw.0000443266.17665.19')
30
+ end
27
31
 
28
- it 'returns no DOIs if given nothing' do
29
- expect(described_class.extract(nil)).to be_empty
30
- end
32
+ it 'does not extract a PubMed ID' do
33
+ str = 'This is NOT a DOI: 123456'
31
34
 
32
- it 'extracts ISBN-As' do
33
- str = 'This is an ISBN-A: 10.978.8898392/315'
35
+ expect(described_class.extract(str, options)).to be_empty
36
+ end
34
37
 
35
- expect(described_class.extract(str)).to contain_exactly('10.978.8898392/315')
36
- end
38
+ it 'returns no DOIs if given nothing' do
39
+ expect(described_class.extract(nil)).to be_empty
40
+ end
37
41
 
38
- it 'does not extract invalid ISBN-As' do
39
- str = 'This is not an ISBN-A: 10.978.8898392/NotARealIsbnA'
42
+ it 'extracts ISBN-As' do
43
+ str = 'This is an ISBN-A: 10.978.8898392/315'
40
44
 
41
- expect(described_class.extract(str)).to be_empty
42
- end
45
+ expect(described_class.extract(str, options)).to contain_exactly('10.978.8898392/315')
46
+ end
43
47
 
44
- it 'retains closing parentheses that are part of the DOI' do
45
- str = 'This is an example of a DOI: 10.1130/2013.2502(04)'
48
+ it 'does not extract invalid ISBN-As' do
49
+ str = 'This is not an ISBN-A: 10.978.8898392/NotARealIsbnA'
46
50
 
47
- expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502(04)')
48
- end
51
+ expect(described_class.extract(str, options)).to be_empty
52
+ end
49
53
 
50
- it 'discards trailing punctuation' do
51
- str = 'This is an example of a DOI: 10.1130/2013.2502.'
54
+ it 'retains closing parentheses that are part of the DOI' do
55
+ str = 'This is an example of a DOI: 10.1130/2013.2502(04)'
52
56
 
53
- expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
54
- end
57
+ expect(described_class.extract(str, options)).to contain_exactly('10.1130/2013.2502(04)')
58
+ end
55
59
 
56
- it 'discards multiple contiguous trailing punctuation' do
57
- str = 'This is an example of a DOI: 10.1130/2013.2502...",'
60
+ it 'discards ellipses' do
61
+ str = 'This is an example of a DOI: 10.1130/2013.2502'
58
62
 
59
- expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
60
- end
63
+ expect(described_class.extract(str, options)).to contain_exactly('10.1130/2013.2502')
64
+ end
61
65
 
62
- it 'discards trailing Unicode punctuation' do
63
- str = 'This is an example of a DOI: 10.1130/2013.2502…'
66
+ it 'extracts old Wiley DOIs' do
67
+ str = 'This is an example of an old Wiley DOI: 10.1002/(SICI)1096-8644(199601)99:1<135::AID-AJPA8>3.0.CO;2-# 10.1002/(sici)1099-0690(199806)1998:6<1071::aid-ejoc1071>3.0.co;2-5'
64
68
 
65
- expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
66
- end
69
+ expect(described_class.extract(str, options)).to contain_exactly('10.1002/(sici)1096-8644(199601)99:1<135::aid-ajpa8>3.0.co;2-#', '10.1002/(sici)1099-0690(199806)1998:6<1071::aid-ejoc1071>3.0.co;2-5')
70
+ end
67
71
 
68
- it 'extracts old Wiley DOIs' do
69
- str = 'This is an example of an old Wiley DOI: 10.1002/(SICI)1096-8644(199601)99:1<135::AID-AJPA8>3.0.CO;2-# 10.1002/(sici)1099-0690(199806)1998:6<1071::aid-ejoc1071>3.0.co;2-5'
72
+ it 'does not extract a closing parenthesis if not part of the DOI' do
73
+ str = '(This is an example of a DOI: 10.1130/2013.2502)'
70
74
 
71
- expect(described_class.extract(str)).to contain_exactly('10.1002/(sici)1096-8644(199601)99:1<135::aid-ajpa8>3.0.co;2-#', '10.1002/(sici)1099-0690(199806)1998:6<1071::aid-ejoc1071>3.0.co;2-5')
72
- end
75
+ expect(described_class.extract(str, options)).to contain_exactly('10.1130/2013.2502')
76
+ end
73
77
 
74
- it 'does not extract a closing parenthesis if not part of the DOI' do
75
- str = '(This is an example of a DOI: 10.1130/2013.2502)'
78
+ it 'discards trailing punctuation from old Wiley DOIs' do
79
+ str = 'This is an example of an old Wiley DOI: 10.1002/(SICI)1096-8644(199601)99:1<135::AID-AJPA8>3.0.CO;2-#",'
76
80
 
77
- expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
78
- end
81
+ expect(described_class.extract(str, options)).to contain_exactly('10.1002/(sici)1096-8644(199601)99:1<135::aid-ajpa8>3.0.co;2-#')
82
+ end
79
83
 
80
- it 'discards trailing punctuation from old Wiley DOIs' do
81
- str = 'This is an example of an old Wiley DOI: 10.1002/(SICI)1096-8644(199601)99:1<135::AID-AJPA8>3.0.CO;2-#",'
84
+ it 'discards trailing Unicode punctuation after balanced parentheses' do
85
+ str = 'This is an example of a DOI: 10.1130/2013.2502(04)",'
82
86
 
83
- expect(described_class.extract(str)).to contain_exactly('10.1002/(sici)1096-8644(199601)99:1<135::aid-ajpa8>3.0.co;2-#')
84
- end
87
+ expect(described_class.extract(str, options)).to contain_exactly('10.1130/2013.2502(04)')
88
+ end
85
89
 
86
- it 'discards trailing punctuation after balanced parentheses' do
87
- str = 'This is an example of a DOI: This is an example of a DOI: 10.1130/2013.2502(04).'
90
+ it 'discards contiguous trailing punctuation after unbalanced parentheses' do
91
+ str = '(This is an example of a DOI: 10.1130/2013.2502).",'
88
92
 
89
- expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502(04)')
90
- end
93
+ expect(described_class.extract(str, options)).to contain_exactly('10.1130/2013.2502')
94
+ end
91
95
 
92
- it 'discards contiguous trailing punctuation after balanced parentheses' do
93
- str = 'This is an example of a DOI: This is an example of a DOI: 10.1130/2013.2502(04).",'
96
+ it 'does not extract DOIs with purely punctuation suffixes' do
97
+ expect(described_class.extract('10.1130/!).",', options)).to be_empty
98
+ end
94
99
 
95
- expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502(04)')
96
- end
100
+ it 'extracts DOIs with emoji in them' do
101
+ expect(described_class.extract('10.1234/🐔💩123🐔🐔🐔123', options)).to contain_exactly('10.1234/🐔💩123🐔🐔🐔123')
102
+ end
97
103
 
98
- it 'discards trailing Unicode punctuation after balanced parentheses' do
99
- str = 'This is an example of a DOI: 10.1130/2013.2502(04)…",'
104
+ it 'extracts DOIs separated by Unicode whitespace' do
105
+ expect(described_class.extract('10.1234/foo  10.1234/bar', options)).to contain_exactly('10.1234/foo', '10.1234/bar')
106
+ end
100
107
 
101
- expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502(04)')
102
- end
108
+ it 'does not extract DOIs with extra digits prefixed' do
109
+ expect(described_class.extract('110.1234/foo', options)).to be_empty
110
+ end
103
111
 
104
- it 'discards contiguous trailing punctuation after unbalanced parentheses' do
105
- str = '(This is an example of a DOI: 10.1130/2013.2502).",'
112
+ it 'extracts DOIs from a string with trailing closing parentheses' do
113
+ expect(described_class.extract('(10.1130/2013.2502(04))', options)).to contain_exactly('10.1130/2013.2502(04)')
114
+ end
106
115
 
107
- expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
108
- end
116
+ it 'extracts DOIs from a string with multiple trailing closing parentheses' do
117
+ expect(described_class.extract('10.1130/2013.2502(04))))', options)).to contain_exactly('10.1130/2013.2502(04)')
118
+ end
109
119
 
110
- it 'does not overflow when given lots of trailing punctuation' do
111
- str = '10.1130/2013.2502' + ('.' * 10000)
120
+ it 'extracts DOIs with parentheses within the suffix' do
121
+ expect(described_class.extract('10.1016/0005-2744(70)90072-0', options)).to contain_exactly('10.1016/0005-2744(70)90072-0')
122
+ end
112
123
 
113
- expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
124
+ it 'extracts all DOIs from a Crossref sample' do
125
+ each_doi('dois.txt') { |doi|
126
+ expect(described_class.extract(doi, options)).to contain_exactly(doi)
127
+ }
128
+ end
129
+ end
114
130
  end
115
131
 
116
- it 'does not extract DOIs with purely punctuation suffixes' do
117
- expect(described_class.extract('10.1130/!).",')).to be_empty
118
- end
132
+ context 'when no options are provided' do
133
+ it 'discards trailing punctuation' do
134
+ str = 'This is an example of a DOI: 10.1130/2013.2502.'
119
135
 
120
- it 'extracts DOIs with emoji in them' do
121
- expect(described_class.extract('10.1234/🐔💩123🐔🐔🐔123')).to contain_exactly('10.1234/🐔💩123🐔🐔🐔123')
122
- end
136
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
137
+ end
123
138
 
124
- it 'extracts DOIs separated by Unicode whitespace' do
125
- expect(described_class.extract('10.1234/foo  10.1234/bar')).to contain_exactly('10.1234/foo', '10.1234/bar')
126
- end
139
+ it 'discards multiple contiguous trailing punctuation' do
140
+ str = 'This is an example of a DOI: 10.1130/2013.2502...",'
127
141
 
128
- it 'does not extract DOIs with extra digits prefixed' do
129
- expect(described_class.extract('110.1234/foo')).to be_empty
130
- end
142
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
143
+ end
131
144
 
132
- it 'extracts DOIs from a string with trailing closing parentheses' do
133
- expect(described_class.extract('10.1130/2013.2502(04))')).to contain_exactly('10.1130/2013.2502(04)')
134
- end
145
+ it 'discards trailing punctuation after balanced parentheses' do
146
+ str = 'This is an example of a DOI: This is an example of a DOI: 10.1130/2013.2502(04).'
135
147
 
136
- it 'extracts DOIs from a string with multiple trailing closing parentheses' do
137
- expect(described_class.extract('10.1130/2013.2502(04))))')).to contain_exactly('10.1130/2013.2502(04)')
138
- end
148
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502(04)')
149
+ end
139
150
 
140
- it 'extracts DOIs with parentheses within the suffix' do
141
- expect(described_class.extract('10.1016/0005-2744(70)90072-0')).to contain_exactly('10.1016/0005-2744(70)90072-0')
151
+ it 'discards contiguous trailing punctuation after balanced parentheses' do
152
+ str = 'This is an example of a DOI: This is an example of a DOI: 10.1130/2013.2502(04).",'
153
+
154
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502(04)')
155
+ end
156
+
157
+ it 'does not overflow when given lots of trailing punctuation' do
158
+ str = '10.1130/2013.2502' + ('.' * 10000)
159
+
160
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
161
+ end
142
162
  end
143
163
 
144
- it 'extracts all DOIs from a Crossref sample' do
145
- Pathname.new(__FILE__).join('..', '..', 'fixtures', 'dois.txt').each_line do |doi|
146
- doi.chomp!
164
+ context 'with strict mode on' do
165
+ it 'extracts DOIs ending with trailing periods' do
166
+ str = 'This is an example of a DOI: 10.1130/2013.2502...",'
167
+
168
+ expect(described_class.extract(str, strict: true)).to contain_exactly('10.1130/2013.2502...')
169
+ end
170
+
171
+ it 'keeps trailing punctuation after balanced parentheses' do
172
+ str = 'This is an example of a DOI: This is an example of a DOI: 10.1130/2013.2502(04).'
173
+
174
+ expect(described_class.extract(str, strict: true)).to contain_exactly('10.1130/2013.2502(04).')
175
+ end
176
+
177
+ it 'discards contiguous trailing punctuation after balanced parentheses' do
178
+ str = 'This is an example of a DOI: This is an example of a DOI: 10.1130/2013.2502(04).",'
179
+
180
+ expect(described_class.extract(str, strict: true)).to contain_exactly('10.1130/2013.2502(04).')
181
+ end
182
+
183
+ it 'limits the trailing periods to 3' do
184
+ str = 'This is an example of a DOI: 10.1130/2013.2502.......'
185
+
186
+ expect(described_class.extract(str, strict: true)).to contain_exactly('10.1130/2013.2502...')
187
+ end
147
188
 
148
- expect(described_class.extract(doi)).to contain_exactly(doi)
189
+ it 'extracts all DOIs from a Crossref sample, keeping the trailing periods' do
190
+ each_doi('strict_mode_dois.txt') { |doi|
191
+ expect(described_class.extract(doi, strict: true)).to contain_exactly(doi)
192
+ }
149
193
  end
150
194
  end
151
195
  end
@@ -123,4 +123,29 @@ RSpec.describe Identifiers::ISBN do
123
123
  expect(described_class.extract('99921-58-10-7 9971-5-0210-0 960-425-059-0 80-902734-1-6'))
124
124
  .to contain_exactly('9789992158104', '9789971502102', '9789604250592', '9788090273412')
125
125
  end
126
+
127
+ context 'when passing prefixes' do
128
+ it 'extracts only prefixed ISBNs' do
129
+ text = "ISBN:9789992158104 \n ISBN-10 9789971502102 \n IsbN-13: 9789604250592 \n 9788090273412"
130
+ prefixes = ['IsBn', 'ISBN-10', 'ISBN-13']
131
+
132
+ expect(described_class.extract(text, prefixes))
133
+ .to contain_exactly('9789992158104', '9789971502102', '9789604250592')
134
+ end
135
+
136
+ it 'does not extract ISBNs with different prefixes' do
137
+ text = "ISBN:9789992158104 \n ISBN-10 9789971502102 \n IsbN-13: 9789604250592 \n 9788090273412"
138
+ prefixes = ['IsBn', 'ISBN-10']
139
+
140
+ expect(described_class.extract(text, prefixes))
141
+ .to contain_exactly('9789992158104', '9789971502102')
142
+ end
143
+
144
+ it 'does not extract ISBNs without prefixes' do
145
+ text = "9789992158104 9789971502102 9789604250592 \n 9788090273412"
146
+ prefixes = ['IsBn', 'ISBN-10', 'ISBN-13']
147
+
148
+ expect(described_class.extract(text, prefixes)).to be_empty
149
+ end
150
+ end
126
151
  end
metadata CHANGED
@@ -1,15 +1,16 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: identifiers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.1
4
+ version: 0.14.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Hernandez
8
8
  - Paul Mucur
9
- autorequire:
9
+ - PatoSoft
10
+ autorequire:
10
11
  bindir: exe
11
12
  cert_chain: []
12
- date: 2018-04-09 00:00:00.000000000 Z
13
+ date: 2024-08-01 00:00:00.000000000 Z
13
14
  dependencies:
14
15
  - !ruby/object:Gem::Dependency
15
16
  name: urn
@@ -25,20 +26,6 @@ dependencies:
25
26
  - - "~>"
26
27
  - !ruby/object:Gem::Version
27
28
  version: '2.0'
28
- - !ruby/object:Gem::Dependency
29
- name: bundler
30
- requirement: !ruby/object:Gem::Requirement
31
- requirements:
32
- - - "~>"
33
- - !ruby/object:Gem::Version
34
- version: '1.10'
35
- type: :development
36
- prerelease: false
37
- version_requirements: !ruby/object:Gem::Requirement
38
- requirements:
39
- - - "~>"
40
- - !ruby/object:Gem::Version
41
- version: '1.10'
42
29
  - !ruby/object:Gem::Dependency
43
30
  name: rake
44
31
  requirement: !ruby/object:Gem::Requirement
@@ -67,7 +54,7 @@ dependencies:
67
54
  - - "~>"
68
55
  - !ruby/object:Gem::Version
69
56
  version: '3.4'
70
- description:
57
+ description:
71
58
  email:
72
59
  - support@altmetric.com
73
60
  executables: []
@@ -103,7 +90,7 @@ homepage: https://github.com/altmetric/identifiers
103
90
  licenses:
104
91
  - MIT
105
92
  metadata: {}
106
- post_install_message:
93
+ post_install_message:
107
94
  rdoc_options: []
108
95
  require_paths:
109
96
  - lib
@@ -118,20 +105,19 @@ required_rubygems_version: !ruby/object:Gem::Requirement
118
105
  - !ruby/object:Gem::Version
119
106
  version: '0'
120
107
  requirements: []
121
- rubyforge_project:
122
- rubygems_version: 2.7.3
123
- signing_key:
108
+ rubygems_version: 3.3.27
109
+ signing_key:
124
110
  specification_version: 4
125
111
  summary: Utilities library for various scholarly identifiers used by Altmetric
126
112
  test_files:
127
- - spec/spec_helper.rb
128
- - spec/identifiers/repec_id_spec.rb
129
- - spec/identifiers/pubmed_id_spec.rb
113
+ - spec/identifiers/ads_bibcode_spec.rb
130
114
  - spec/identifiers/arxiv_id_spec.rb
131
- - spec/identifiers/urn_spec.rb
115
+ - spec/identifiers/doi_spec.rb
132
116
  - spec/identifiers/handle_spec.rb
133
- - spec/identifiers/ads_bibcode_spec.rb
134
117
  - spec/identifiers/isbn_spec.rb
135
- - spec/identifiers/doi_spec.rb
136
118
  - spec/identifiers/national_clinical_trial_id_spec.rb
137
119
  - spec/identifiers/orcid_spec.rb
120
+ - spec/identifiers/pubmed_id_spec.rb
121
+ - spec/identifiers/repec_id_spec.rb
122
+ - spec/identifiers/urn_spec.rb
123
+ - spec/spec_helper.rb