identifiers 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +11 -0
- data/lib/identifiers/isbn.rb +15 -1
- data/spec/identifiers/isbn_spec.rb +25 -0
- metadata +14 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 911c52af8788814413165b25ec5dfdace4cd213906dd193216c720c31cb6d3de
|
4
|
+
data.tar.gz: fb75b4b356a1e87711f7b0d9a820d938d4dbb2d28730c5c8738d105f947cae00
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 72881f5981cec05c2273e8c76912a8853edaa5b6a0d0cc69a298daedde4037ee98080903fcf9c80f8290a9fe79d7e0ced24ec680dd9f92c7756c9e76c70e3bbe
|
7
|
+
data.tar.gz: 1255cb693b80e63ccd19e2ed0c2c5962319467c6635e8713cf7185e5e6c91dc7f952e6b614f5c1131cef45586c06fff12c4eb38e7803b8bd0cb1dfec8b0134e0
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,11 @@
|
|
2
2
|
All notable changes to this project will be documented in this file. This
|
3
3
|
project adheres to [Semantic Versioning](http://semver.org/).
|
4
4
|
|
5
|
+
## [0.14.0] - 2024-07-30
|
6
|
+
### Added
|
7
|
+
- Added optional prefixes argument to ISBNs extraction.
|
8
|
+
If passed `.extract` will only match series of numbers that are preceded by any of the passed prefixes
|
9
|
+
|
5
10
|
## [0.13.0] - 2019-09-04
|
6
11
|
### Added
|
7
12
|
- Added new mode to the DOI extraction, so that it doesn't strip trailing
|
data/README.md
CHANGED
@@ -77,6 +77,17 @@ Identifiers::RepecId.extract('')
|
|
77
77
|
Identifiers::URN.extract('')
|
78
78
|
```
|
79
79
|
|
80
|
+
For `ISBN`s `.extract`, you can pass an array of prefixes as an optional parameter when you want to exclude matches that are not preceded by those prefixes (it is case insensitive and ignores ':' and extra whitespaces):
|
81
|
+
|
82
|
+
```ruby
|
83
|
+
Identifiers::ISBN.extract(
|
84
|
+
"IsBN:9789992158104 \n isbn-10 9789971502102 \n ISBN-13: 9789604250592 \n 9788090273412",
|
85
|
+
["ISBN", "ISBN-10"]
|
86
|
+
)
|
87
|
+
# => ["9789992158104", "9789971502102"]
|
88
|
+
```
|
89
|
+
|
90
|
+
|
80
91
|
But for some identifiers might have more. Check [their implementation](https://github.com/altmetric/identifiers/tree/master/lib/identifiers) to see all the methods available.
|
81
92
|
|
82
93
|
For `URN`s, please check the [URN gem documentation](https://github.com/altmetric/urn) to see all the available options.
|
data/lib/identifiers/isbn.rb
CHANGED
@@ -39,11 +39,25 @@ module Identifiers
|
|
39
39
|
\d{1,7} # ISBN title enumerator and check digit
|
40
40
|
\b
|
41
41
|
}x
|
42
|
+
TEXT_AFTER_PREFIX_REGEXP = ':?\s*(\d.*)$'.freeze
|
43
|
+
|
44
|
+
def self.extract(str , prefixes = [])
|
45
|
+
str = match_strings_with_prefix(str , prefixes) if prefixes.any?
|
42
46
|
|
43
|
-
def self.extract(str)
|
44
47
|
extract_isbn_as(str) + extract_thirteen_digit_isbns(str) + extract_ten_digit_isbns(str)
|
45
48
|
end
|
46
49
|
|
50
|
+
def self.match_strings_with_prefix(str, prefixes)
|
51
|
+
prefix_regexp = prefixes.join('|')
|
52
|
+
|
53
|
+
str
|
54
|
+
.to_s
|
55
|
+
.scan(/(#{prefix_regexp})#{TEXT_AFTER_PREFIX_REGEXP}/i)
|
56
|
+
.inject('') do |acum, (_prefix, match)|
|
57
|
+
acum + "#{match} \n "
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
47
61
|
def self.extract_isbn_as(str)
|
48
62
|
extract_thirteen_digit_isbns(str.to_s.scan(ISBN_A_REGEXP).join("\n").tr('/.', ''))
|
49
63
|
end
|
@@ -123,4 +123,29 @@ RSpec.describe Identifiers::ISBN do
|
|
123
123
|
expect(described_class.extract('99921-58-10-7 9971-5-0210-0 960-425-059-0 80-902734-1-6'))
|
124
124
|
.to contain_exactly('9789992158104', '9789971502102', '9789604250592', '9788090273412')
|
125
125
|
end
|
126
|
+
|
127
|
+
context 'when passing prefixes' do
|
128
|
+
it 'extracts only prefixed ISBNs' do
|
129
|
+
text = "ISBN:9789992158104 \n ISBN-10 9789971502102 \n IsbN-13: 9789604250592 \n 9788090273412"
|
130
|
+
prefixes = ['IsBn', 'ISBN-10', 'ISBN-13']
|
131
|
+
|
132
|
+
expect(described_class.extract(text, prefixes))
|
133
|
+
.to contain_exactly('9789992158104', '9789971502102', '9789604250592')
|
134
|
+
end
|
135
|
+
|
136
|
+
it 'does not extract ISBNs with different prefixes' do
|
137
|
+
text = "ISBN:9789992158104 \n ISBN-10 9789971502102 \n IsbN-13: 9789604250592 \n 9788090273412"
|
138
|
+
prefixes = ['IsBn', 'ISBN-10']
|
139
|
+
|
140
|
+
expect(described_class.extract(text, prefixes))
|
141
|
+
.to contain_exactly('9789992158104', '9789971502102')
|
142
|
+
end
|
143
|
+
|
144
|
+
it 'does not extract ISBNs without prefixes' do
|
145
|
+
text = "9789992158104 9789971502102 9789604250592 \n 9788090273412"
|
146
|
+
prefixes = ['IsBn', 'ISBN-10', 'ISBN-13']
|
147
|
+
|
148
|
+
expect(described_class.extract(text, prefixes)).to be_empty
|
149
|
+
end
|
150
|
+
end
|
126
151
|
end
|
metadata
CHANGED
@@ -1,15 +1,16 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: identifiers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.14.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Hernandez
|
8
8
|
- Paul Mucur
|
9
|
-
|
9
|
+
- PatoSoft
|
10
|
+
autorequire:
|
10
11
|
bindir: exe
|
11
12
|
cert_chain: []
|
12
|
-
date:
|
13
|
+
date: 2024-08-01 00:00:00.000000000 Z
|
13
14
|
dependencies:
|
14
15
|
- !ruby/object:Gem::Dependency
|
15
16
|
name: urn
|
@@ -53,7 +54,7 @@ dependencies:
|
|
53
54
|
- - "~>"
|
54
55
|
- !ruby/object:Gem::Version
|
55
56
|
version: '3.4'
|
56
|
-
description:
|
57
|
+
description:
|
57
58
|
email:
|
58
59
|
- support@altmetric.com
|
59
60
|
executables: []
|
@@ -89,7 +90,7 @@ homepage: https://github.com/altmetric/identifiers
|
|
89
90
|
licenses:
|
90
91
|
- MIT
|
91
92
|
metadata: {}
|
92
|
-
post_install_message:
|
93
|
+
post_install_message:
|
93
94
|
rdoc_options: []
|
94
95
|
require_paths:
|
95
96
|
- lib
|
@@ -104,19 +105,19 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
104
105
|
- !ruby/object:Gem::Version
|
105
106
|
version: '0'
|
106
107
|
requirements: []
|
107
|
-
rubygems_version: 3.
|
108
|
-
signing_key:
|
108
|
+
rubygems_version: 3.3.27
|
109
|
+
signing_key:
|
109
110
|
specification_version: 4
|
110
111
|
summary: Utilities library for various scholarly identifiers used by Altmetric
|
111
112
|
test_files:
|
112
|
-
- spec/
|
113
|
-
- spec/identifiers/repec_id_spec.rb
|
114
|
-
- spec/identifiers/pubmed_id_spec.rb
|
113
|
+
- spec/identifiers/ads_bibcode_spec.rb
|
115
114
|
- spec/identifiers/arxiv_id_spec.rb
|
116
|
-
- spec/identifiers/
|
115
|
+
- spec/identifiers/doi_spec.rb
|
117
116
|
- spec/identifiers/handle_spec.rb
|
118
|
-
- spec/identifiers/ads_bibcode_spec.rb
|
119
117
|
- spec/identifiers/isbn_spec.rb
|
120
|
-
- spec/identifiers/doi_spec.rb
|
121
118
|
- spec/identifiers/national_clinical_trial_id_spec.rb
|
122
119
|
- spec/identifiers/orcid_spec.rb
|
120
|
+
- spec/identifiers/pubmed_id_spec.rb
|
121
|
+
- spec/identifiers/repec_id_spec.rb
|
122
|
+
- spec/identifiers/urn_spec.rb
|
123
|
+
- spec/spec_helper.rb
|