identifiers 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a4546231ed2288fa0d807ac0fc71f268f92de75dbc38fb057410441b3b77ca4c
4
- data.tar.gz: b55eaf0b183185203c0957faf86335ac3b1996ed7312692d92daf013162798ff
3
+ metadata.gz: 911c52af8788814413165b25ec5dfdace4cd213906dd193216c720c31cb6d3de
4
+ data.tar.gz: fb75b4b356a1e87711f7b0d9a820d938d4dbb2d28730c5c8738d105f947cae00
5
5
  SHA512:
6
- metadata.gz: 81cfdbbd15e12d7394deeaf45463837ecb01a995fe9bc713736578153f1725d0a6cb71913453d918ad2a1d2cd5970caa6d18656df70f3b175628c4f753f07c37
7
- data.tar.gz: 5472c1c3f7b4c04d6a51f4d6fddfd476d3c8823d6c98a74de295cc730fe72bfb637b88e9c9f606d0144418a3d2b7828faf5ca145c1b53b19d8691c937fcc600f
6
+ metadata.gz: 72881f5981cec05c2273e8c76912a8853edaa5b6a0d0cc69a298daedde4037ee98080903fcf9c80f8290a9fe79d7e0ced24ec680dd9f92c7756c9e76c70e3bbe
7
+ data.tar.gz: 1255cb693b80e63ccd19e2ed0c2c5962319467c6635e8713cf7185e5e6c91dc7f952e6b614f5c1131cef45586c06fff12c4eb38e7803b8bd0cb1dfec8b0134e0
data/CHANGELOG.md CHANGED
@@ -2,6 +2,11 @@
2
2
  All notable changes to this project will be documented in this file. This
3
3
  project adheres to [Semantic Versioning](http://semver.org/).
4
4
 
5
+ ## [0.14.0] - 2024-07-30
6
+ ### Added
7
+ - Added optional prefixes argument to ISBNs extraction.
8
+ If passed `.extract` will only match series of numbers that are preceded by any of the passed prefixes
9
+
5
10
  ## [0.13.0] - 2019-09-04
6
11
  ### Added
7
12
  - Added new mode to the DOI extraction, so that it doesn't strip trailing
data/README.md CHANGED
@@ -77,6 +77,17 @@ Identifiers::RepecId.extract('')
77
77
  Identifiers::URN.extract('')
78
78
  ```
79
79
 
80
+ For `ISBN`s `.extract`, you can pass an array of prefixes as an optional parameter when you want to exclude matches that are not preceded by those prefixes (it is case insensitive and ignores ':' and extra whitespaces):
81
+
82
+ ```ruby
83
+ Identifiers::ISBN.extract(
84
+ "IsBN:9789992158104 \n isbn-10 9789971502102 \n ISBN-13: 9789604250592 \n 9788090273412",
85
+ ["ISBN", "ISBN-10"]
86
+ )
87
+ # => ["9789992158104", "9789971502102"]
88
+ ```
89
+
90
+
80
91
  But for some identifiers might have more. Check [their implementation](https://github.com/altmetric/identifiers/tree/master/lib/identifiers) to see all the methods available.
81
92
 
82
93
  For `URN`s, please check the [URN gem documentation](https://github.com/altmetric/urn) to see all the available options.
@@ -39,11 +39,25 @@ module Identifiers
39
39
  \d{1,7} # ISBN title enumerator and check digit
40
40
  \b
41
41
  }x
42
+ TEXT_AFTER_PREFIX_REGEXP = ':?\s*(\d.*)$'.freeze
43
+
44
+ def self.extract(str , prefixes = [])
45
+ str = match_strings_with_prefix(str , prefixes) if prefixes.any?
42
46
 
43
- def self.extract(str)
44
47
  extract_isbn_as(str) + extract_thirteen_digit_isbns(str) + extract_ten_digit_isbns(str)
45
48
  end
46
49
 
50
+ def self.match_strings_with_prefix(str, prefixes)
51
+ prefix_regexp = prefixes.join('|')
52
+
53
+ str
54
+ .to_s
55
+ .scan(/(#{prefix_regexp})#{TEXT_AFTER_PREFIX_REGEXP}/i)
56
+ .inject('') do |acum, (_prefix, match)|
57
+ acum + "#{match} \n "
58
+ end
59
+ end
60
+
47
61
  def self.extract_isbn_as(str)
48
62
  extract_thirteen_digit_isbns(str.to_s.scan(ISBN_A_REGEXP).join("\n").tr('/.', ''))
49
63
  end
@@ -123,4 +123,29 @@ RSpec.describe Identifiers::ISBN do
123
123
  expect(described_class.extract('99921-58-10-7 9971-5-0210-0 960-425-059-0 80-902734-1-6'))
124
124
  .to contain_exactly('9789992158104', '9789971502102', '9789604250592', '9788090273412')
125
125
  end
126
+
127
+ context 'when passing prefixes' do
128
+ it 'extracts only prefixed ISBNs' do
129
+ text = "ISBN:9789992158104 \n ISBN-10 9789971502102 \n IsbN-13: 9789604250592 \n 9788090273412"
130
+ prefixes = ['IsBn', 'ISBN-10', 'ISBN-13']
131
+
132
+ expect(described_class.extract(text, prefixes))
133
+ .to contain_exactly('9789992158104', '9789971502102', '9789604250592')
134
+ end
135
+
136
+ it 'does not extract ISBNs with different prefixes' do
137
+ text = "ISBN:9789992158104 \n ISBN-10 9789971502102 \n IsbN-13: 9789604250592 \n 9788090273412"
138
+ prefixes = ['IsBn', 'ISBN-10']
139
+
140
+ expect(described_class.extract(text, prefixes))
141
+ .to contain_exactly('9789992158104', '9789971502102')
142
+ end
143
+
144
+ it 'does not extract ISBNs without prefixes' do
145
+ text = "9789992158104 9789971502102 9789604250592 \n 9788090273412"
146
+ prefixes = ['IsBn', 'ISBN-10', 'ISBN-13']
147
+
148
+ expect(described_class.extract(text, prefixes)).to be_empty
149
+ end
150
+ end
126
151
  end
metadata CHANGED
@@ -1,15 +1,16 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: identifiers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.0
4
+ version: 0.14.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Hernandez
8
8
  - Paul Mucur
9
- autorequire:
9
+ - PatoSoft
10
+ autorequire:
10
11
  bindir: exe
11
12
  cert_chain: []
12
- date: 2019-09-04 00:00:00.000000000 Z
13
+ date: 2024-08-01 00:00:00.000000000 Z
13
14
  dependencies:
14
15
  - !ruby/object:Gem::Dependency
15
16
  name: urn
@@ -53,7 +54,7 @@ dependencies:
53
54
  - - "~>"
54
55
  - !ruby/object:Gem::Version
55
56
  version: '3.4'
56
- description:
57
+ description:
57
58
  email:
58
59
  - support@altmetric.com
59
60
  executables: []
@@ -89,7 +90,7 @@ homepage: https://github.com/altmetric/identifiers
89
90
  licenses:
90
91
  - MIT
91
92
  metadata: {}
92
- post_install_message:
93
+ post_install_message:
93
94
  rdoc_options: []
94
95
  require_paths:
95
96
  - lib
@@ -104,19 +105,19 @@ required_rubygems_version: !ruby/object:Gem::Requirement
104
105
  - !ruby/object:Gem::Version
105
106
  version: '0'
106
107
  requirements: []
107
- rubygems_version: 3.0.3
108
- signing_key:
108
+ rubygems_version: 3.3.27
109
+ signing_key:
109
110
  specification_version: 4
110
111
  summary: Utilities library for various scholarly identifiers used by Altmetric
111
112
  test_files:
112
- - spec/spec_helper.rb
113
- - spec/identifiers/repec_id_spec.rb
114
- - spec/identifiers/pubmed_id_spec.rb
113
+ - spec/identifiers/ads_bibcode_spec.rb
115
114
  - spec/identifiers/arxiv_id_spec.rb
116
- - spec/identifiers/urn_spec.rb
115
+ - spec/identifiers/doi_spec.rb
117
116
  - spec/identifiers/handle_spec.rb
118
- - spec/identifiers/ads_bibcode_spec.rb
119
117
  - spec/identifiers/isbn_spec.rb
120
- - spec/identifiers/doi_spec.rb
121
118
  - spec/identifiers/national_clinical_trial_id_spec.rb
122
119
  - spec/identifiers/orcid_spec.rb
120
+ - spec/identifiers/pubmed_id_spec.rb
121
+ - spec/identifiers/repec_id_spec.rb
122
+ - spec/identifiers/urn_spec.rb
123
+ - spec/spec_helper.rb