identifiers 0.13.0 → 0.14.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a4546231ed2288fa0d807ac0fc71f268f92de75dbc38fb057410441b3b77ca4c
4
- data.tar.gz: b55eaf0b183185203c0957faf86335ac3b1996ed7312692d92daf013162798ff
3
+ metadata.gz: 911c52af8788814413165b25ec5dfdace4cd213906dd193216c720c31cb6d3de
4
+ data.tar.gz: fb75b4b356a1e87711f7b0d9a820d938d4dbb2d28730c5c8738d105f947cae00
5
5
  SHA512:
6
- metadata.gz: 81cfdbbd15e12d7394deeaf45463837ecb01a995fe9bc713736578153f1725d0a6cb71913453d918ad2a1d2cd5970caa6d18656df70f3b175628c4f753f07c37
7
- data.tar.gz: 5472c1c3f7b4c04d6a51f4d6fddfd476d3c8823d6c98a74de295cc730fe72bfb637b88e9c9f606d0144418a3d2b7828faf5ca145c1b53b19d8691c937fcc600f
6
+ metadata.gz: 72881f5981cec05c2273e8c76912a8853edaa5b6a0d0cc69a298daedde4037ee98080903fcf9c80f8290a9fe79d7e0ced24ec680dd9f92c7756c9e76c70e3bbe
7
+ data.tar.gz: 1255cb693b80e63ccd19e2ed0c2c5962319467c6635e8713cf7185e5e6c91dc7f952e6b614f5c1131cef45586c06fff12c4eb38e7803b8bd0cb1dfec8b0134e0
data/CHANGELOG.md CHANGED
@@ -2,6 +2,11 @@
2
2
  All notable changes to this project will be documented in this file. This
3
3
  project adheres to [Semantic Versioning](http://semver.org/).
4
4
 
5
+ ## [0.14.0] - 2024-07-30
6
+ ### Added
7
+ - Added optional prefixes argument to ISBNs extraction.
8
+ If passed `.extract` will only match series of numbers that are preceded by any of the passed prefixes
9
+
5
10
  ## [0.13.0] - 2019-09-04
6
11
  ### Added
7
12
  - Added new mode to the DOI extraction, so that it doesn't strip trailing
data/README.md CHANGED
@@ -77,6 +77,17 @@ Identifiers::RepecId.extract('')
77
77
  Identifiers::URN.extract('')
78
78
  ```
79
79
 
80
+ For `ISBN`s `.extract`, you can pass an array of prefixes as an optional parameter when you want to exclude matches that are not preceded by those prefixes (it is case insensitive and ignores ':' and extra whitespaces):
81
+
82
+ ```ruby
83
+ Identifiers::ISBN.extract(
84
+ "IsBN:9789992158104 \n isbn-10 9789971502102 \n ISBN-13: 9789604250592 \n 9788090273412",
85
+ ["ISBN", "ISBN-10"]
86
+ )
87
+ # => ["9789992158104", "9789971502102"]
88
+ ```
89
+
90
+
80
91
  But for some identifiers might have more. Check [their implementation](https://github.com/altmetric/identifiers/tree/master/lib/identifiers) to see all the methods available.
81
92
 
82
93
  For `URN`s, please check the [URN gem documentation](https://github.com/altmetric/urn) to see all the available options.
@@ -39,11 +39,25 @@ module Identifiers
39
39
  \d{1,7} # ISBN title enumerator and check digit
40
40
  \b
41
41
  }x
42
+ TEXT_AFTER_PREFIX_REGEXP = ':?\s*(\d.*)$'.freeze
43
+
44
+ def self.extract(str , prefixes = [])
45
+ str = match_strings_with_prefix(str , prefixes) if prefixes.any?
42
46
 
43
- def self.extract(str)
44
47
  extract_isbn_as(str) + extract_thirteen_digit_isbns(str) + extract_ten_digit_isbns(str)
45
48
  end
46
49
 
50
+ def self.match_strings_with_prefix(str, prefixes)
51
+ prefix_regexp = prefixes.join('|')
52
+
53
+ str
54
+ .to_s
55
+ .scan(/(#{prefix_regexp})#{TEXT_AFTER_PREFIX_REGEXP}/i)
56
+ .inject('') do |acum, (_prefix, match)|
57
+ acum + "#{match} \n "
58
+ end
59
+ end
60
+
47
61
  def self.extract_isbn_as(str)
48
62
  extract_thirteen_digit_isbns(str.to_s.scan(ISBN_A_REGEXP).join("\n").tr('/.', ''))
49
63
  end
@@ -123,4 +123,29 @@ RSpec.describe Identifiers::ISBN do
123
123
  expect(described_class.extract('99921-58-10-7 9971-5-0210-0 960-425-059-0 80-902734-1-6'))
124
124
  .to contain_exactly('9789992158104', '9789971502102', '9789604250592', '9788090273412')
125
125
  end
126
+
127
+ context 'when passing prefixes' do
128
+ it 'extracts only prefixed ISBNs' do
129
+ text = "ISBN:9789992158104 \n ISBN-10 9789971502102 \n IsbN-13: 9789604250592 \n 9788090273412"
130
+ prefixes = ['IsBn', 'ISBN-10', 'ISBN-13']
131
+
132
+ expect(described_class.extract(text, prefixes))
133
+ .to contain_exactly('9789992158104', '9789971502102', '9789604250592')
134
+ end
135
+
136
+ it 'does not extract ISBNs with different prefixes' do
137
+ text = "ISBN:9789992158104 \n ISBN-10 9789971502102 \n IsbN-13: 9789604250592 \n 9788090273412"
138
+ prefixes = ['IsBn', 'ISBN-10']
139
+
140
+ expect(described_class.extract(text, prefixes))
141
+ .to contain_exactly('9789992158104', '9789971502102')
142
+ end
143
+
144
+ it 'does not extract ISBNs without prefixes' do
145
+ text = "9789992158104 9789971502102 9789604250592 \n 9788090273412"
146
+ prefixes = ['IsBn', 'ISBN-10', 'ISBN-13']
147
+
148
+ expect(described_class.extract(text, prefixes)).to be_empty
149
+ end
150
+ end
126
151
  end
metadata CHANGED
@@ -1,15 +1,16 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: identifiers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.0
4
+ version: 0.14.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Hernandez
8
8
  - Paul Mucur
9
- autorequire:
9
+ - PatoSoft
10
+ autorequire:
10
11
  bindir: exe
11
12
  cert_chain: []
12
- date: 2019-09-04 00:00:00.000000000 Z
13
+ date: 2024-08-01 00:00:00.000000000 Z
13
14
  dependencies:
14
15
  - !ruby/object:Gem::Dependency
15
16
  name: urn
@@ -53,7 +54,7 @@ dependencies:
53
54
  - - "~>"
54
55
  - !ruby/object:Gem::Version
55
56
  version: '3.4'
56
- description:
57
+ description:
57
58
  email:
58
59
  - support@altmetric.com
59
60
  executables: []
@@ -89,7 +90,7 @@ homepage: https://github.com/altmetric/identifiers
89
90
  licenses:
90
91
  - MIT
91
92
  metadata: {}
92
- post_install_message:
93
+ post_install_message:
93
94
  rdoc_options: []
94
95
  require_paths:
95
96
  - lib
@@ -104,19 +105,19 @@ required_rubygems_version: !ruby/object:Gem::Requirement
104
105
  - !ruby/object:Gem::Version
105
106
  version: '0'
106
107
  requirements: []
107
- rubygems_version: 3.0.3
108
- signing_key:
108
+ rubygems_version: 3.3.27
109
+ signing_key:
109
110
  specification_version: 4
110
111
  summary: Utilities library for various scholarly identifiers used by Altmetric
111
112
  test_files:
112
- - spec/spec_helper.rb
113
- - spec/identifiers/repec_id_spec.rb
114
- - spec/identifiers/pubmed_id_spec.rb
113
+ - spec/identifiers/ads_bibcode_spec.rb
115
114
  - spec/identifiers/arxiv_id_spec.rb
116
- - spec/identifiers/urn_spec.rb
115
+ - spec/identifiers/doi_spec.rb
117
116
  - spec/identifiers/handle_spec.rb
118
- - spec/identifiers/ads_bibcode_spec.rb
119
117
  - spec/identifiers/isbn_spec.rb
120
- - spec/identifiers/doi_spec.rb
121
118
  - spec/identifiers/national_clinical_trial_id_spec.rb
122
119
  - spec/identifiers/orcid_spec.rb
120
+ - spec/identifiers/pubmed_id_spec.rb
121
+ - spec/identifiers/repec_id_spec.rb
122
+ - spec/identifiers/urn_spec.rb
123
+ - spec/spec_helper.rb