identifiers 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ab0f2da0f90f2dda268319d5be99dc314bd9b9fe
4
- data.tar.gz: 443f4aead25198aa6d267fa9c67a3800739f837c
3
+ metadata.gz: 036d129df184be8b4e577950bbe37c10455a2d6d
4
+ data.tar.gz: b35262b8180a444476fd45124ddffd5086ee3af3
5
5
  SHA512:
6
- metadata.gz: 51dc9476b30151bda26a037ae4dba6e25d59feef23af27bbc7d9ca7ab497221ef4117194a9524a4ba552cf708061bcc80b80b4404735abc08f4bbd38b615f3ad
7
- data.tar.gz: 088d481f41064ec32560dea76c25a0ddfdb3f731937bda7c0a0d32b2c2dd3e1d44e96f5e6eea2b4410a7fcb57c47779ca0adb68b5b41806256ef21504bc840d7
6
+ metadata.gz: 5e1ed385ed71f64287445b99452d8905d597f097be0e235e5057d443de98a0f2762f6e624633814a7fa1d2eb7973750d7439ed7a57fa94f2bc4a9d3eb0d4f09a
7
+ data.tar.gz: f2f32ba1157949782ed5db1d3cdbe28004d76b0743ecf1737719790e872dea024b63fc008b3c6d65873ea407f4ff8be3407856cb4b702f27ad5d57ecfa0ca41b
@@ -2,6 +2,10 @@
2
2
  All notable changes to this project will be documented in this file. This
3
3
  project adheres to [Semantic Versioning](http://semver.org/).
4
4
 
5
+ ## [0.5.0] - 2017-01-27
6
+ ### Added
7
+ - Added support for ISBN-As when extracting DOIs and ISBNs
8
+
5
9
  ## [0.4.0] - 2017-01-23
6
10
  ### Changed
7
11
  - Extract ISBNs separated by spaces as well as hyphens
@@ -27,3 +31,4 @@ project adheres to [Semantic Versioning](http://semver.org/).
27
31
  [0.3.0]: https://github.com/altmetric/identifiers/releases/tag/v0.2.0
28
32
  [0.3.1]: https://github.com/altmetric/identifiers/releases/tag/v0.3.1
29
33
  [0.4.0]: https://github.com/altmetric/identifiers/releases/tag/v0.4.0
34
+ [0.5.0]: https://github.com/altmetric/identifiers/releases/tag/v0.5.0
data/README.md CHANGED
@@ -4,7 +4,7 @@ Collection of utilities related to the extraction, validation and normalization
4
4
 
5
5
  - [ADS Bibcodes](http://adsdoc.harvard.edu/abs_doc/help_pages/bibcodes.html)
6
6
  - [arXiv IDs](https://arxiv.org/help/arxiv_identifier)
7
- - [DOIs](https://www.doi.org/)
7
+ - [DOIs](https://www.doi.org/) (including [ISBN-As](https://www.doi.org/factsheets/ISBN-A.html))
8
8
  - [Handles](https://en.wikipedia.org/wiki/Handle_System)
9
9
  - [ISBNs](https://en.wikipedia.org/wiki/International_Standard_Book_Number)
10
10
  - [National Clinical Trial IDs](https://clinicaltrials.gov/)
@@ -18,7 +18,7 @@ Collection of utilities related to the extraction, validation and normalization
18
18
  Add this line to your application's `Gemfile`:
19
19
 
20
20
  ```ruby
21
- gem 'identifiers', '~> 0.4'
21
+ gem 'identifiers', '~> 0.5'
22
22
  ```
23
23
 
24
24
  And then execute:
@@ -1,7 +1,7 @@
1
1
  module Identifiers
2
2
  class DOI
3
3
  def self.extract(str)
4
- str.scan(%r{\b10\.\d{3,}/\S+\b}).map(&:downcase)
4
+ str.scan(%r{\b10\.(?:97[89]\.\d{2,8}/\d{1,7}|\d{4,9}/\S+)\b}).map(&:downcase)
5
5
  end
6
6
  end
7
7
  end
@@ -2,9 +2,14 @@ module Identifiers
2
2
  class ISBN
3
3
  REGEX_13 = /\b97[89]\d{10}\b/
4
4
  REGEX_10 = /\b\d{9}(?:\d|X)\b/
5
+ REGEX_A = %r{\b(?<=10\.)97[89]\.\d{2,8}/\d{1,7}\b}
5
6
 
6
7
  def self.extract(str)
7
- extract_thirteen_digit_isbns(str) + extract_ten_digit_isbns(str)
8
+ extract_isbn_as(str) + extract_thirteen_digit_isbns(str) + extract_ten_digit_isbns(str)
9
+ end
10
+
11
+ def self.extract_isbn_as(str)
12
+ extract_thirteen_digit_isbns(str.scan(REGEX_A).join("\n").tr('/.', ''))
8
13
  end
9
14
 
10
15
  def self.extract_thirteen_digit_isbns(str)
@@ -13,9 +13,21 @@ RSpec.describe Identifiers::DOI do
13
13
  expect(described_class.extract(str)).to contain_exactly('10.1097/01.asw.0000443266.17665.19')
14
14
  end
15
15
 
16
- it 'does not extract a PUBMED ID' do
16
+ it 'does not extract a PubMed ID' do
17
17
  str = 'This is NOT a DOI: 123456'
18
18
 
19
19
  expect(described_class.extract(str)).to be_empty
20
20
  end
21
+
22
+ it 'extracts ISBN-As' do
23
+ str = 'This is an ISBN-A: 10.978.8898392/315'
24
+
25
+ expect(described_class.extract(str)).to contain_exactly('10.978.8898392/315')
26
+ end
27
+
28
+ it 'does not extract invalid ISBN-As' do
29
+ str = 'This is not an ISBN-A: 10.978.8898392/NotARealIsbnA'
30
+
31
+ expect(described_class.extract(str)).to be_empty
32
+ end
21
33
  end
@@ -19,6 +19,14 @@ RSpec.describe Identifiers::ISBN do
19
19
  expect(described_class.extract('ISBN: 978 0 80 506909 9')).to contain_exactly('9780805069099')
20
20
  end
21
21
 
22
+ it 'extracts ISBN-13s from ISBN-As' do
23
+ expect(described_class.extract('10.978.8898392/315')).to contain_exactly('9788898392315')
24
+ end
25
+
26
+ it 'does not extract invalid ISBNs from ISBN-As' do
27
+ expect(described_class.extract('10.978.8898392/316')).to be_empty
28
+ end
29
+
22
30
  it 'normalizes 10-digit ISBNs' do
23
31
  str = "0-8050-6909-7 \n 2-7594-0269-X"
24
32
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: identifiers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Hernandez
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2017-01-23 00:00:00.000000000 Z
12
+ date: 2017-01-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: urn