identifiers 0.12.1 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 423438daad2350706eced26425401a5d97ae715095b54853fe55ba4f8dbbaf2d
4
- data.tar.gz: 2776accbdccfc17965dd69507a0b326b2a57e275811828b05e76b47c0f54bb28
3
+ metadata.gz: 911c52af8788814413165b25ec5dfdace4cd213906dd193216c720c31cb6d3de
4
+ data.tar.gz: fb75b4b356a1e87711f7b0d9a820d938d4dbb2d28730c5c8738d105f947cae00
5
5
  SHA512:
6
- metadata.gz: e319511db960df762b3a646239edf45ff683bef6227736c673072de6ef5d1649dd300232f93c0af0be1d7744308de4f07ebfd53582add6069c6b3f425249722b
7
- data.tar.gz: 1ebd31499facbc5a51ec0dcf30fadb920e86ad6e53c20f4bb5dd66fa11c9f9cc7842a8b703348f13aac2675c0415c2c0f9464c42baafd055669c881342c33217
6
+ metadata.gz: 72881f5981cec05c2273e8c76912a8853edaa5b6a0d0cc69a298daedde4037ee98080903fcf9c80f8290a9fe79d7e0ced24ec680dd9f92c7756c9e76c70e3bbe
7
+ data.tar.gz: 1255cb693b80e63ccd19e2ed0c2c5962319467c6635e8713cf7185e5e6c91dc7f952e6b614f5c1131cef45586c06fff12c4eb38e7803b8bd0cb1dfec8b0134e0
data/CHANGELOG.md CHANGED
@@ -2,6 +2,16 @@
2
2
  All notable changes to this project will be documented in this file. This
3
3
  project adheres to [Semantic Versioning](http://semver.org/).
4
4
 
5
+ ## [0.14.0] - 2024-07-30
6
+ ### Added
7
+ - Added optional prefixes argument to ISBNs extraction.
8
+ If passed `.extract` will only match series of numbers that are preceded by any of the passed prefixes
9
+
10
+ ## [0.13.0] - 2019-09-04
11
+ ### Added
12
+ - Added new mode to the DOI extraction, so that it doesn't strip trailing
13
+ periods when in `strict` mode
14
+
5
15
  ## [0.12.1] - 2018-04-09
6
16
  ### Fixed
7
17
  - Restored support for extracting hyphenated ISBN-10s with registration group
@@ -88,3 +98,4 @@ project adheres to [Semantic Versioning](http://semver.org/).
88
98
  [0.11.0]: https://github.com/altmetric/identifiers/releases/tag/v0.11.0
89
99
  [0.12.0]: https://github.com/altmetric/identifiers/releases/tag/v0.12.0
90
100
  [0.12.1]: https://github.com/altmetric/identifiers/releases/tag/v0.12.1
101
+ [0.13.0]: https://github.com/altmetric/identifiers/releases/tag/v0.13.0
data/README.md CHANGED
@@ -32,18 +32,34 @@ Or install it yourself as:
32
32
  ## Usage
33
33
 
34
34
  ```ruby
35
- Identifiers::DOI.extract('example: 10.123/abcd.efghi')
36
- # => ["10.123/abcd.efghi"]
35
+ Identifiers::DOI.extract('example: 10.1234/5678.ABC')
36
+ # => ["10.1234/5678.abc"]
37
37
 
38
38
  Identifiers::DOI.extract('no DOIs here')
39
39
  # => []
40
40
 
41
41
  Identifiers::URN.new('urn:abc:123')
42
42
  # => #<URN:0x007ff11c13d930 @urn="urn:abc:123", @nid="abc", @nss="123">
43
+
43
44
  Identifiers::URN('urn:abc:123')
44
45
  # => #<URN:0x007ff11c0ff568 @urn="urn:abc:123", @nid="abc", @nss="123">
45
46
  ```
46
47
 
48
+ A small percentage of DOIs end in trailing `.`. However, having trailing periods
49
+ being returned by the default extraction method would possibly return quite a few
50
+ false positives.
51
+ `DOI.extract` accepts a `strict` option, which can be set to true if we prefer to
52
+ return DOIs ending in `.`. By default, this option is set to `false`, which strips
53
+ any trailing `.`:
54
+
55
+ ```ruby
56
+ Identifiers::DOI.extract('example: 10.1234/5678.abc.', strict: true)
57
+ # => ["10.1234/5678.abc."]
58
+
59
+ Identifiers::DOI.extract('example: 10.1234/5678.abc.')
60
+ # => ["10.1234/5678.abc"]
61
+ ```
62
+
47
63
  ## By identifier
48
64
 
49
65
  `.extract` is a common method that works across all the supported identifiers.
@@ -61,6 +77,17 @@ Identifiers::RepecId.extract('')
61
77
  Identifiers::URN.extract('')
62
78
  ```
63
79
 
80
+ For `ISBN`s `.extract`, you can pass an array of prefixes as an optional parameter when you want to exclude matches that are not preceded by those prefixes (it is case insensitive and ignores ':' and extra whitespaces):
81
+
82
+ ```ruby
83
+ Identifiers::ISBN.extract(
84
+ "IsBN:9789992158104 \n isbn-10 9789971502102 \n ISBN-13: 9789604250592 \n 9788090273412",
85
+ ["ISBN", "ISBN-10"]
86
+ )
87
+ # => ["9789992158104", "9789971502102"]
88
+ ```
89
+
90
+
64
91
  But for some identifiers might have more. Check [their implementation](https://github.com/altmetric/identifiers/tree/master/lib/identifiers) to see all the methods available.
65
92
 
66
93
  For `URN`s, please check the [URN gem documentation](https://github.com/altmetric/urn) to see all the available options.
@@ -24,11 +24,17 @@ module Identifiers
24
24
  |
25
25
  [^[:space:]]+(?![[:space:]])\p{^P} # Suffix ending in non-punctuation
26
26
  )
27
+ \.{0,3} # Allow a DOI to end with up to 3 .
27
28
  )
28
29
  }x
29
30
 
30
- def self.extract(str)
31
- str.to_s.downcase.scan(REGEXP)
31
+ def self.extract(str, options = {})
32
+ strict = options.fetch(:strict, false)
33
+
34
+ dois = str.to_s.downcase.scan(REGEXP)
35
+ dois = dois.map { |doi| doi.gsub(/\.+$/, '') } unless strict
36
+
37
+ dois
32
38
  end
33
39
  end
34
40
  end
@@ -39,11 +39,25 @@ module Identifiers
39
39
  \d{1,7} # ISBN title enumerator and check digit
40
40
  \b
41
41
  }x
42
+ TEXT_AFTER_PREFIX_REGEXP = ':?\s*(\d.*)$'.freeze
43
+
44
+ def self.extract(str , prefixes = [])
45
+ str = match_strings_with_prefix(str , prefixes) if prefixes.any?
42
46
 
43
- def self.extract(str)
44
47
  extract_isbn_as(str) + extract_thirteen_digit_isbns(str) + extract_ten_digit_isbns(str)
45
48
  end
46
49
 
50
+ def self.match_strings_with_prefix(str, prefixes)
51
+ prefix_regexp = prefixes.join('|')
52
+
53
+ str
54
+ .to_s
55
+ .scan(/(#{prefix_regexp})#{TEXT_AFTER_PREFIX_REGEXP}/i)
56
+ .inject('') do |acum, (_prefix, match)|
57
+ acum + "#{match} \n "
58
+ end
59
+ end
60
+
47
61
  def self.extract_isbn_as(str)
48
62
  extract_thirteen_digit_isbns(str.to_s.scan(ISBN_A_REGEXP).join("\n").tr('/.', ''))
49
63
  end
@@ -1,151 +1,195 @@
1
1
  require 'identifiers/doi'
2
2
 
3
3
  RSpec.describe Identifiers::DOI do
4
- it 'extracts DOIs from a string' do
5
- str = 'This is an example of a DOI: 10.1049/el.2013.3006'
4
+ OPTIONS = [{ strict: false }, { strict: true }].freeze
6
5
 
7
- expect(described_class.extract(str)).to contain_exactly('10.1049/el.2013.3006')
6
+ def each_doi(file)
7
+ Pathname.new(__FILE__).join('..', '..', 'fixtures', file).each_line do |doi|
8
+ yield(doi.chomp!)
9
+ end
8
10
  end
9
11
 
10
- it 'extracts DOIs from anywhere in a string' do
11
- str = 'This is an example of a DOI - 10.1049/el.2013.3006 - which is entirely valid'
12
+ OPTIONS.each do |options|
13
+ context "when extracting with options set to #{options.inspect}" do
14
+ it 'extracts DOIs from a string' do
15
+ str = 'This is an example of a DOI: 10.1049/el.2013.3006'
12
16
 
13
- expect(described_class.extract(str)).to contain_exactly('10.1049/el.2013.3006')
14
- end
17
+ expect(described_class.extract(str, options)).to contain_exactly('10.1049/el.2013.3006')
18
+ end
15
19
 
16
- it 'downcases the DOIs extracted' do
17
- str = 'This is an example of a DOI: 10.1097/01.ASW.0000443266.17665.19'
20
+ it 'extracts DOIs from anywhere in a string' do
21
+ str = 'This is an example of a DOI - 10.1049/el.2013.3006 - which is entirely valid'
18
22
 
19
- expect(described_class.extract(str)).to contain_exactly('10.1097/01.asw.0000443266.17665.19')
20
- end
23
+ expect(described_class.extract(str, options)).to contain_exactly('10.1049/el.2013.3006')
24
+ end
21
25
 
22
- it 'does not extract a PubMed ID' do
23
- str = 'This is NOT a DOI: 123456'
26
+ it 'downcases the DOIs extracted' do
27
+ str = 'This is an example of a DOI: 10.1097/01.ASW.0000443266.17665.19'
24
28
 
25
- expect(described_class.extract(str)).to be_empty
26
- end
29
+ expect(described_class.extract(str, options)).to contain_exactly('10.1097/01.asw.0000443266.17665.19')
30
+ end
27
31
 
28
- it 'returns no DOIs if given nothing' do
29
- expect(described_class.extract(nil)).to be_empty
30
- end
32
+ it 'does not extract a PubMed ID' do
33
+ str = 'This is NOT a DOI: 123456'
31
34
 
32
- it 'extracts ISBN-As' do
33
- str = 'This is an ISBN-A: 10.978.8898392/315'
35
+ expect(described_class.extract(str, options)).to be_empty
36
+ end
34
37
 
35
- expect(described_class.extract(str)).to contain_exactly('10.978.8898392/315')
36
- end
38
+ it 'returns no DOIs if given nothing' do
39
+ expect(described_class.extract(nil)).to be_empty
40
+ end
37
41
 
38
- it 'does not extract invalid ISBN-As' do
39
- str = 'This is not an ISBN-A: 10.978.8898392/NotARealIsbnA'
42
+ it 'extracts ISBN-As' do
43
+ str = 'This is an ISBN-A: 10.978.8898392/315'
40
44
 
41
- expect(described_class.extract(str)).to be_empty
42
- end
45
+ expect(described_class.extract(str, options)).to contain_exactly('10.978.8898392/315')
46
+ end
43
47
 
44
- it 'retains closing parentheses that are part of the DOI' do
45
- str = 'This is an example of a DOI: 10.1130/2013.2502(04)'
48
+ it 'does not extract invalid ISBN-As' do
49
+ str = 'This is not an ISBN-A: 10.978.8898392/NotARealIsbnA'
46
50
 
47
- expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502(04)')
48
- end
51
+ expect(described_class.extract(str, options)).to be_empty
52
+ end
49
53
 
50
- it 'discards trailing punctuation' do
51
- str = 'This is an example of a DOI: 10.1130/2013.2502.'
54
+ it 'retains closing parentheses that are part of the DOI' do
55
+ str = 'This is an example of a DOI: 10.1130/2013.2502(04)'
52
56
 
53
- expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
54
- end
57
+ expect(described_class.extract(str, options)).to contain_exactly('10.1130/2013.2502(04)')
58
+ end
55
59
 
56
- it 'discards multiple contiguous trailing punctuation' do
57
- str = 'This is an example of a DOI: 10.1130/2013.2502...",'
60
+ it 'discards ellipses' do
61
+ str = 'This is an example of a DOI: 10.1130/2013.2502'
58
62
 
59
- expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
60
- end
63
+ expect(described_class.extract(str, options)).to contain_exactly('10.1130/2013.2502')
64
+ end
61
65
 
62
- it 'discards trailing Unicode punctuation' do
63
- str = 'This is an example of a DOI: 10.1130/2013.2502…'
66
+ it 'extracts old Wiley DOIs' do
67
+ str = 'This is an example of an old Wiley DOI: 10.1002/(SICI)1096-8644(199601)99:1<135::AID-AJPA8>3.0.CO;2-# 10.1002/(sici)1099-0690(199806)1998:6<1071::aid-ejoc1071>3.0.co;2-5'
64
68
 
65
- expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
66
- end
69
+ expect(described_class.extract(str, options)).to contain_exactly('10.1002/(sici)1096-8644(199601)99:1<135::aid-ajpa8>3.0.co;2-#', '10.1002/(sici)1099-0690(199806)1998:6<1071::aid-ejoc1071>3.0.co;2-5')
70
+ end
67
71
 
68
- it 'extracts old Wiley DOIs' do
69
- str = 'This is an example of an old Wiley DOI: 10.1002/(SICI)1096-8644(199601)99:1<135::AID-AJPA8>3.0.CO;2-# 10.1002/(sici)1099-0690(199806)1998:6<1071::aid-ejoc1071>3.0.co;2-5'
72
+ it 'does not extract a closing parenthesis if not part of the DOI' do
73
+ str = '(This is an example of a DOI: 10.1130/2013.2502)'
70
74
 
71
- expect(described_class.extract(str)).to contain_exactly('10.1002/(sici)1096-8644(199601)99:1<135::aid-ajpa8>3.0.co;2-#', '10.1002/(sici)1099-0690(199806)1998:6<1071::aid-ejoc1071>3.0.co;2-5')
72
- end
75
+ expect(described_class.extract(str, options)).to contain_exactly('10.1130/2013.2502')
76
+ end
73
77
 
74
- it 'does not extract a closing parenthesis if not part of the DOI' do
75
- str = '(This is an example of a DOI: 10.1130/2013.2502)'
78
+ it 'discards trailing punctuation from old Wiley DOIs' do
79
+ str = 'This is an example of an old Wiley DOI: 10.1002/(SICI)1096-8644(199601)99:1<135::AID-AJPA8>3.0.CO;2-#",'
76
80
 
77
- expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
78
- end
81
+ expect(described_class.extract(str, options)).to contain_exactly('10.1002/(sici)1096-8644(199601)99:1<135::aid-ajpa8>3.0.co;2-#')
82
+ end
79
83
 
80
- it 'discards trailing punctuation from old Wiley DOIs' do
81
- str = 'This is an example of an old Wiley DOI: 10.1002/(SICI)1096-8644(199601)99:1<135::AID-AJPA8>3.0.CO;2-#",'
84
+ it 'discards trailing Unicode punctuation after balanced parentheses' do
85
+ str = 'This is an example of a DOI: 10.1130/2013.2502(04)",'
82
86
 
83
- expect(described_class.extract(str)).to contain_exactly('10.1002/(sici)1096-8644(199601)99:1<135::aid-ajpa8>3.0.co;2-#')
84
- end
87
+ expect(described_class.extract(str, options)).to contain_exactly('10.1130/2013.2502(04)')
88
+ end
85
89
 
86
- it 'discards trailing punctuation after balanced parentheses' do
87
- str = 'This is an example of a DOI: This is an example of a DOI: 10.1130/2013.2502(04).'
90
+ it 'discards contiguous trailing punctuation after unbalanced parentheses' do
91
+ str = '(This is an example of a DOI: 10.1130/2013.2502).",'
88
92
 
89
- expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502(04)')
90
- end
93
+ expect(described_class.extract(str, options)).to contain_exactly('10.1130/2013.2502')
94
+ end
91
95
 
92
- it 'discards contiguous trailing punctuation after balanced parentheses' do
93
- str = 'This is an example of a DOI: This is an example of a DOI: 10.1130/2013.2502(04).",'
96
+ it 'does not extract DOIs with purely punctuation suffixes' do
97
+ expect(described_class.extract('10.1130/!).",', options)).to be_empty
98
+ end
94
99
 
95
- expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502(04)')
96
- end
100
+ it 'extracts DOIs with emoji in them' do
101
+ expect(described_class.extract('10.1234/🐔💩123🐔🐔🐔123', options)).to contain_exactly('10.1234/🐔💩123🐔🐔🐔123')
102
+ end
97
103
 
98
- it 'discards trailing Unicode punctuation after balanced parentheses' do
99
- str = 'This is an example of a DOI: 10.1130/2013.2502(04)…",'
104
+ it 'extracts DOIs separated by Unicode whitespace' do
105
+ expect(described_class.extract('10.1234/foo  10.1234/bar', options)).to contain_exactly('10.1234/foo', '10.1234/bar')
106
+ end
100
107
 
101
- expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502(04)')
102
- end
108
+ it 'does not extract DOIs with extra digits prefixed' do
109
+ expect(described_class.extract('110.1234/foo', options)).to be_empty
110
+ end
103
111
 
104
- it 'discards contiguous trailing punctuation after unbalanced parentheses' do
105
- str = '(This is an example of a DOI: 10.1130/2013.2502).",'
112
+ it 'extracts DOIs from a string with trailing closing parentheses' do
113
+ expect(described_class.extract('(10.1130/2013.2502(04))', options)).to contain_exactly('10.1130/2013.2502(04)')
114
+ end
106
115
 
107
- expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
108
- end
116
+ it 'extracts DOIs from a string with multiple trailing closing parentheses' do
117
+ expect(described_class.extract('10.1130/2013.2502(04))))', options)).to contain_exactly('10.1130/2013.2502(04)')
118
+ end
109
119
 
110
- it 'does not overflow when given lots of trailing punctuation' do
111
- str = '10.1130/2013.2502' + ('.' * 10000)
120
+ it 'extracts DOIs with parentheses within the suffix' do
121
+ expect(described_class.extract('10.1016/0005-2744(70)90072-0', options)).to contain_exactly('10.1016/0005-2744(70)90072-0')
122
+ end
112
123
 
113
- expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
124
+ it 'extracts all DOIs from a Crossref sample' do
125
+ each_doi('dois.txt') { |doi|
126
+ expect(described_class.extract(doi, options)).to contain_exactly(doi)
127
+ }
128
+ end
129
+ end
114
130
  end
115
131
 
116
- it 'does not extract DOIs with purely punctuation suffixes' do
117
- expect(described_class.extract('10.1130/!).",')).to be_empty
118
- end
132
+ context 'when no options are provided' do
133
+ it 'discards trailing punctuation' do
134
+ str = 'This is an example of a DOI: 10.1130/2013.2502.'
119
135
 
120
- it 'extracts DOIs with emoji in them' do
121
- expect(described_class.extract('10.1234/🐔💩123🐔🐔🐔123')).to contain_exactly('10.1234/🐔💩123🐔🐔🐔123')
122
- end
136
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
137
+ end
123
138
 
124
- it 'extracts DOIs separated by Unicode whitespace' do
125
- expect(described_class.extract('10.1234/foo  10.1234/bar')).to contain_exactly('10.1234/foo', '10.1234/bar')
126
- end
139
+ it 'discards multiple contiguous trailing punctuation' do
140
+ str = 'This is an example of a DOI: 10.1130/2013.2502...",'
127
141
 
128
- it 'does not extract DOIs with extra digits prefixed' do
129
- expect(described_class.extract('110.1234/foo')).to be_empty
130
- end
142
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
143
+ end
131
144
 
132
- it 'extracts DOIs from a string with trailing closing parentheses' do
133
- expect(described_class.extract('10.1130/2013.2502(04))')).to contain_exactly('10.1130/2013.2502(04)')
134
- end
145
+ it 'discards trailing punctuation after balanced parentheses' do
146
+ str = 'This is an example of a DOI: This is an example of a DOI: 10.1130/2013.2502(04).'
135
147
 
136
- it 'extracts DOIs from a string with multiple trailing closing parentheses' do
137
- expect(described_class.extract('10.1130/2013.2502(04))))')).to contain_exactly('10.1130/2013.2502(04)')
138
- end
148
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502(04)')
149
+ end
139
150
 
140
- it 'extracts DOIs with parentheses within the suffix' do
141
- expect(described_class.extract('10.1016/0005-2744(70)90072-0')).to contain_exactly('10.1016/0005-2744(70)90072-0')
151
+ it 'discards contiguous trailing punctuation after balanced parentheses' do
152
+ str = 'This is an example of a DOI: This is an example of a DOI: 10.1130/2013.2502(04).",'
153
+
154
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502(04)')
155
+ end
156
+
157
+ it 'does not overflow when given lots of trailing punctuation' do
158
+ str = '10.1130/2013.2502' + ('.' * 10000)
159
+
160
+ expect(described_class.extract(str)).to contain_exactly('10.1130/2013.2502')
161
+ end
142
162
  end
143
163
 
144
- it 'extracts all DOIs from a Crossref sample' do
145
- Pathname.new(__FILE__).join('..', '..', 'fixtures', 'dois.txt').each_line do |doi|
146
- doi.chomp!
164
+ context 'with strict mode on' do
165
+ it 'extracts DOIs ending with trailing periods' do
166
+ str = 'This is an example of a DOI: 10.1130/2013.2502...",'
167
+
168
+ expect(described_class.extract(str, strict: true)).to contain_exactly('10.1130/2013.2502...')
169
+ end
170
+
171
+ it 'keeps trailing punctuation after balanced parentheses' do
172
+ str = 'This is an example of a DOI: This is an example of a DOI: 10.1130/2013.2502(04).'
173
+
174
+ expect(described_class.extract(str, strict: true)).to contain_exactly('10.1130/2013.2502(04).')
175
+ end
176
+
177
+ it 'discards contiguous trailing punctuation after balanced parentheses' do
178
+ str = 'This is an example of a DOI: This is an example of a DOI: 10.1130/2013.2502(04).",'
179
+
180
+ expect(described_class.extract(str, strict: true)).to contain_exactly('10.1130/2013.2502(04).')
181
+ end
182
+
183
+ it 'limits the trailing periods to 3' do
184
+ str = 'This is an example of a DOI: 10.1130/2013.2502.......'
185
+
186
+ expect(described_class.extract(str, strict: true)).to contain_exactly('10.1130/2013.2502...')
187
+ end
147
188
 
148
- expect(described_class.extract(doi)).to contain_exactly(doi)
189
+ it 'extracts all DOIs from a Crossref sample, keeping the trailing periods' do
190
+ each_doi('strict_mode_dois.txt') { |doi|
191
+ expect(described_class.extract(doi, strict: true)).to contain_exactly(doi)
192
+ }
149
193
  end
150
194
  end
151
195
  end
@@ -123,4 +123,29 @@ RSpec.describe Identifiers::ISBN do
123
123
  expect(described_class.extract('99921-58-10-7 9971-5-0210-0 960-425-059-0 80-902734-1-6'))
124
124
  .to contain_exactly('9789992158104', '9789971502102', '9789604250592', '9788090273412')
125
125
  end
126
+
127
+ context 'when passing prefixes' do
128
+ it 'extracts only prefixed ISBNs' do
129
+ text = "ISBN:9789992158104 \n ISBN-10 9789971502102 \n IsbN-13: 9789604250592 \n 9788090273412"
130
+ prefixes = ['IsBn', 'ISBN-10', 'ISBN-13']
131
+
132
+ expect(described_class.extract(text, prefixes))
133
+ .to contain_exactly('9789992158104', '9789971502102', '9789604250592')
134
+ end
135
+
136
+ it 'does not extract ISBNs with different prefixes' do
137
+ text = "ISBN:9789992158104 \n ISBN-10 9789971502102 \n IsbN-13: 9789604250592 \n 9788090273412"
138
+ prefixes = ['IsBn', 'ISBN-10']
139
+
140
+ expect(described_class.extract(text, prefixes))
141
+ .to contain_exactly('9789992158104', '9789971502102')
142
+ end
143
+
144
+ it 'does not extract ISBNs without prefixes' do
145
+ text = "9789992158104 9789971502102 9789604250592 \n 9788090273412"
146
+ prefixes = ['IsBn', 'ISBN-10', 'ISBN-13']
147
+
148
+ expect(described_class.extract(text, prefixes)).to be_empty
149
+ end
150
+ end
126
151
  end
metadata CHANGED
@@ -1,15 +1,16 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: identifiers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.1
4
+ version: 0.14.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Hernandez
8
8
  - Paul Mucur
9
- autorequire:
9
+ - PatoSoft
10
+ autorequire:
10
11
  bindir: exe
11
12
  cert_chain: []
12
- date: 2018-04-09 00:00:00.000000000 Z
13
+ date: 2024-08-01 00:00:00.000000000 Z
13
14
  dependencies:
14
15
  - !ruby/object:Gem::Dependency
15
16
  name: urn
@@ -25,20 +26,6 @@ dependencies:
25
26
  - - "~>"
26
27
  - !ruby/object:Gem::Version
27
28
  version: '2.0'
28
- - !ruby/object:Gem::Dependency
29
- name: bundler
30
- requirement: !ruby/object:Gem::Requirement
31
- requirements:
32
- - - "~>"
33
- - !ruby/object:Gem::Version
34
- version: '1.10'
35
- type: :development
36
- prerelease: false
37
- version_requirements: !ruby/object:Gem::Requirement
38
- requirements:
39
- - - "~>"
40
- - !ruby/object:Gem::Version
41
- version: '1.10'
42
29
  - !ruby/object:Gem::Dependency
43
30
  name: rake
44
31
  requirement: !ruby/object:Gem::Requirement
@@ -67,7 +54,7 @@ dependencies:
67
54
  - - "~>"
68
55
  - !ruby/object:Gem::Version
69
56
  version: '3.4'
70
- description:
57
+ description:
71
58
  email:
72
59
  - support@altmetric.com
73
60
  executables: []
@@ -103,7 +90,7 @@ homepage: https://github.com/altmetric/identifiers
103
90
  licenses:
104
91
  - MIT
105
92
  metadata: {}
106
- post_install_message:
93
+ post_install_message:
107
94
  rdoc_options: []
108
95
  require_paths:
109
96
  - lib
@@ -118,20 +105,19 @@ required_rubygems_version: !ruby/object:Gem::Requirement
118
105
  - !ruby/object:Gem::Version
119
106
  version: '0'
120
107
  requirements: []
121
- rubyforge_project:
122
- rubygems_version: 2.7.3
123
- signing_key:
108
+ rubygems_version: 3.3.27
109
+ signing_key:
124
110
  specification_version: 4
125
111
  summary: Utilities library for various scholarly identifiers used by Altmetric
126
112
  test_files:
127
- - spec/spec_helper.rb
128
- - spec/identifiers/repec_id_spec.rb
129
- - spec/identifiers/pubmed_id_spec.rb
113
+ - spec/identifiers/ads_bibcode_spec.rb
130
114
  - spec/identifiers/arxiv_id_spec.rb
131
- - spec/identifiers/urn_spec.rb
115
+ - spec/identifiers/doi_spec.rb
132
116
  - spec/identifiers/handle_spec.rb
133
- - spec/identifiers/ads_bibcode_spec.rb
134
117
  - spec/identifiers/isbn_spec.rb
135
- - spec/identifiers/doi_spec.rb
136
118
  - spec/identifiers/national_clinical_trial_id_spec.rb
137
119
  - spec/identifiers/orcid_spec.rb
120
+ - spec/identifiers/pubmed_id_spec.rb
121
+ - spec/identifiers/repec_id_spec.rb
122
+ - spec/identifiers/urn_spec.rb
123
+ - spec/spec_helper.rb