identifiers 0.13.0 → 0.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +11 -0
- data/lib/identifiers/isbn.rb +15 -1
- data/spec/identifiers/isbn_spec.rb +25 -0
- metadata +14 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 911c52af8788814413165b25ec5dfdace4cd213906dd193216c720c31cb6d3de
|
4
|
+
data.tar.gz: fb75b4b356a1e87711f7b0d9a820d938d4dbb2d28730c5c8738d105f947cae00
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 72881f5981cec05c2273e8c76912a8853edaa5b6a0d0cc69a298daedde4037ee98080903fcf9c80f8290a9fe79d7e0ced24ec680dd9f92c7756c9e76c70e3bbe
|
7
|
+
data.tar.gz: 1255cb693b80e63ccd19e2ed0c2c5962319467c6635e8713cf7185e5e6c91dc7f952e6b614f5c1131cef45586c06fff12c4eb38e7803b8bd0cb1dfec8b0134e0
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,11 @@
|
|
2
2
|
All notable changes to this project will be documented in this file. This
|
3
3
|
project adheres to [Semantic Versioning](http://semver.org/).
|
4
4
|
|
5
|
+
## [0.14.0] - 2024-07-30
|
6
|
+
### Added
|
7
|
+
- Added optional prefixes argument to ISBNs extraction.
|
8
|
+
If passed `.extract` will only match series of numbers that are preceded by any of the passed prefixes
|
9
|
+
|
5
10
|
## [0.13.0] - 2019-09-04
|
6
11
|
### Added
|
7
12
|
- Added new mode to the DOI extraction, so that it doesn't strip trailing
|
data/README.md
CHANGED
@@ -77,6 +77,17 @@ Identifiers::RepecId.extract('')
|
|
77
77
|
Identifiers::URN.extract('')
|
78
78
|
```
|
79
79
|
|
80
|
+
For `ISBN`s `.extract`, you can pass an array of prefixes as an optional parameter when you want to exclude matches that are not preceded by those prefixes (it is case insensitive and ignores ':' and extra whitespaces):
|
81
|
+
|
82
|
+
```ruby
|
83
|
+
Identifiers::ISBN.extract(
|
84
|
+
"IsBN:9789992158104 \n isbn-10 9789971502102 \n ISBN-13: 9789604250592 \n 9788090273412",
|
85
|
+
["ISBN", "ISBN-10"]
|
86
|
+
)
|
87
|
+
# => ["9789992158104", "9789971502102"]
|
88
|
+
```
|
89
|
+
|
90
|
+
|
80
91
|
But for some identifiers might have more. Check [their implementation](https://github.com/altmetric/identifiers/tree/master/lib/identifiers) to see all the methods available.
|
81
92
|
|
82
93
|
For `URN`s, please check the [URN gem documentation](https://github.com/altmetric/urn) to see all the available options.
|
data/lib/identifiers/isbn.rb
CHANGED
@@ -39,11 +39,25 @@ module Identifiers
|
|
39
39
|
\d{1,7} # ISBN title enumerator and check digit
|
40
40
|
\b
|
41
41
|
}x
|
42
|
+
TEXT_AFTER_PREFIX_REGEXP = ':?\s*(\d.*)$'.freeze
|
43
|
+
|
44
|
+
def self.extract(str , prefixes = [])
|
45
|
+
str = match_strings_with_prefix(str , prefixes) if prefixes.any?
|
42
46
|
|
43
|
-
def self.extract(str)
|
44
47
|
extract_isbn_as(str) + extract_thirteen_digit_isbns(str) + extract_ten_digit_isbns(str)
|
45
48
|
end
|
46
49
|
|
50
|
+
def self.match_strings_with_prefix(str, prefixes)
|
51
|
+
prefix_regexp = prefixes.join('|')
|
52
|
+
|
53
|
+
str
|
54
|
+
.to_s
|
55
|
+
.scan(/(#{prefix_regexp})#{TEXT_AFTER_PREFIX_REGEXP}/i)
|
56
|
+
.inject('') do |acum, (_prefix, match)|
|
57
|
+
acum + "#{match} \n "
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
47
61
|
def self.extract_isbn_as(str)
|
48
62
|
extract_thirteen_digit_isbns(str.to_s.scan(ISBN_A_REGEXP).join("\n").tr('/.', ''))
|
49
63
|
end
|
@@ -123,4 +123,29 @@ RSpec.describe Identifiers::ISBN do
|
|
123
123
|
expect(described_class.extract('99921-58-10-7 9971-5-0210-0 960-425-059-0 80-902734-1-6'))
|
124
124
|
.to contain_exactly('9789992158104', '9789971502102', '9789604250592', '9788090273412')
|
125
125
|
end
|
126
|
+
|
127
|
+
context 'when passing prefixes' do
|
128
|
+
it 'extracts only prefixed ISBNs' do
|
129
|
+
text = "ISBN:9789992158104 \n ISBN-10 9789971502102 \n IsbN-13: 9789604250592 \n 9788090273412"
|
130
|
+
prefixes = ['IsBn', 'ISBN-10', 'ISBN-13']
|
131
|
+
|
132
|
+
expect(described_class.extract(text, prefixes))
|
133
|
+
.to contain_exactly('9789992158104', '9789971502102', '9789604250592')
|
134
|
+
end
|
135
|
+
|
136
|
+
it 'does not extract ISBNs with different prefixes' do
|
137
|
+
text = "ISBN:9789992158104 \n ISBN-10 9789971502102 \n IsbN-13: 9789604250592 \n 9788090273412"
|
138
|
+
prefixes = ['IsBn', 'ISBN-10']
|
139
|
+
|
140
|
+
expect(described_class.extract(text, prefixes))
|
141
|
+
.to contain_exactly('9789992158104', '9789971502102')
|
142
|
+
end
|
143
|
+
|
144
|
+
it 'does not extract ISBNs without prefixes' do
|
145
|
+
text = "9789992158104 9789971502102 9789604250592 \n 9788090273412"
|
146
|
+
prefixes = ['IsBn', 'ISBN-10', 'ISBN-13']
|
147
|
+
|
148
|
+
expect(described_class.extract(text, prefixes)).to be_empty
|
149
|
+
end
|
150
|
+
end
|
126
151
|
end
|
metadata
CHANGED
@@ -1,15 +1,16 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: identifiers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.14.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Hernandez
|
8
8
|
- Paul Mucur
|
9
|
-
|
9
|
+
- PatoSoft
|
10
|
+
autorequire:
|
10
11
|
bindir: exe
|
11
12
|
cert_chain: []
|
12
|
-
date:
|
13
|
+
date: 2024-08-01 00:00:00.000000000 Z
|
13
14
|
dependencies:
|
14
15
|
- !ruby/object:Gem::Dependency
|
15
16
|
name: urn
|
@@ -53,7 +54,7 @@ dependencies:
|
|
53
54
|
- - "~>"
|
54
55
|
- !ruby/object:Gem::Version
|
55
56
|
version: '3.4'
|
56
|
-
description:
|
57
|
+
description:
|
57
58
|
email:
|
58
59
|
- support@altmetric.com
|
59
60
|
executables: []
|
@@ -89,7 +90,7 @@ homepage: https://github.com/altmetric/identifiers
|
|
89
90
|
licenses:
|
90
91
|
- MIT
|
91
92
|
metadata: {}
|
92
|
-
post_install_message:
|
93
|
+
post_install_message:
|
93
94
|
rdoc_options: []
|
94
95
|
require_paths:
|
95
96
|
- lib
|
@@ -104,19 +105,19 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
104
105
|
- !ruby/object:Gem::Version
|
105
106
|
version: '0'
|
106
107
|
requirements: []
|
107
|
-
rubygems_version: 3.
|
108
|
-
signing_key:
|
108
|
+
rubygems_version: 3.3.27
|
109
|
+
signing_key:
|
109
110
|
specification_version: 4
|
110
111
|
summary: Utilities library for various scholarly identifiers used by Altmetric
|
111
112
|
test_files:
|
112
|
-
- spec/
|
113
|
-
- spec/identifiers/repec_id_spec.rb
|
114
|
-
- spec/identifiers/pubmed_id_spec.rb
|
113
|
+
- spec/identifiers/ads_bibcode_spec.rb
|
115
114
|
- spec/identifiers/arxiv_id_spec.rb
|
116
|
-
- spec/identifiers/
|
115
|
+
- spec/identifiers/doi_spec.rb
|
117
116
|
- spec/identifiers/handle_spec.rb
|
118
|
-
- spec/identifiers/ads_bibcode_spec.rb
|
119
117
|
- spec/identifiers/isbn_spec.rb
|
120
|
-
- spec/identifiers/doi_spec.rb
|
121
118
|
- spec/identifiers/national_clinical_trial_id_spec.rb
|
122
119
|
- spec/identifiers/orcid_spec.rb
|
120
|
+
- spec/identifiers/pubmed_id_spec.rb
|
121
|
+
- spec/identifiers/repec_id_spec.rb
|
122
|
+
- spec/identifiers/urn_spec.rb
|
123
|
+
- spec/spec_helper.rb
|