identifiers 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ab0f2da0f90f2dda268319d5be99dc314bd9b9fe
4
- data.tar.gz: 443f4aead25198aa6d267fa9c67a3800739f837c
3
+ metadata.gz: 036d129df184be8b4e577950bbe37c10455a2d6d
4
+ data.tar.gz: b35262b8180a444476fd45124ddffd5086ee3af3
5
5
  SHA512:
6
- metadata.gz: 51dc9476b30151bda26a037ae4dba6e25d59feef23af27bbc7d9ca7ab497221ef4117194a9524a4ba552cf708061bcc80b80b4404735abc08f4bbd38b615f3ad
7
- data.tar.gz: 088d481f41064ec32560dea76c25a0ddfdb3f731937bda7c0a0d32b2c2dd3e1d44e96f5e6eea2b4410a7fcb57c47779ca0adb68b5b41806256ef21504bc840d7
6
+ metadata.gz: 5e1ed385ed71f64287445b99452d8905d597f097be0e235e5057d443de98a0f2762f6e624633814a7fa1d2eb7973750d7439ed7a57fa94f2bc4a9d3eb0d4f09a
7
+ data.tar.gz: f2f32ba1157949782ed5db1d3cdbe28004d76b0743ecf1737719790e872dea024b63fc008b3c6d65873ea407f4ff8be3407856cb4b702f27ad5d57ecfa0ca41b
@@ -2,6 +2,10 @@
2
2
  All notable changes to this project will be documented in this file. This
3
3
  project adheres to [Semantic Versioning](http://semver.org/).
4
4
 
5
+ ## [0.5.0] - 2017-01-27
6
+ ### Added
7
+ - Added support for ISBN-As when extracting DOIs and ISBNs
8
+
5
9
  ## [0.4.0] - 2017-01-23
6
10
  ### Changed
7
11
  - Extract ISBNs separated by spaces as well as hyphens
@@ -27,3 +31,4 @@ project adheres to [Semantic Versioning](http://semver.org/).
27
31
  [0.3.0]: https://github.com/altmetric/identifiers/releases/tag/v0.2.0
28
32
  [0.3.1]: https://github.com/altmetric/identifiers/releases/tag/v0.3.1
29
33
  [0.4.0]: https://github.com/altmetric/identifiers/releases/tag/v0.4.0
34
+ [0.5.0]: https://github.com/altmetric/identifiers/releases/tag/v0.5.0
data/README.md CHANGED
@@ -4,7 +4,7 @@ Collection of utilities related to the extraction, validation and normalization
4
4
 
5
5
  - [ADS Bibcodes](http://adsdoc.harvard.edu/abs_doc/help_pages/bibcodes.html)
6
6
  - [arXiv IDs](https://arxiv.org/help/arxiv_identifier)
7
- - [DOIs](https://www.doi.org/)
7
+ - [DOIs](https://www.doi.org/) (including [ISBN-As](https://www.doi.org/factsheets/ISBN-A.html))
8
8
  - [Handles](https://en.wikipedia.org/wiki/Handle_System)
9
9
  - [ISBNs](https://en.wikipedia.org/wiki/International_Standard_Book_Number)
10
10
  - [National Clinical Trial IDs](https://clinicaltrials.gov/)
@@ -18,7 +18,7 @@ Collection of utilities related to the extraction, validation and normalization
18
18
  Add this line to your application's `Gemfile`:
19
19
 
20
20
  ```ruby
21
- gem 'identifiers', '~> 0.4'
21
+ gem 'identifiers', '~> 0.5'
22
22
  ```
23
23
 
24
24
  And then execute:
@@ -1,7 +1,7 @@
1
1
  module Identifiers
2
2
  class DOI
3
3
  def self.extract(str)
4
- str.scan(%r{\b10\.\d{3,}/\S+\b}).map(&:downcase)
4
+ str.scan(%r{\b10\.(?:97[89]\.\d{2,8}/\d{1,7}|\d{4,9}/\S+)\b}).map(&:downcase)
5
5
  end
6
6
  end
7
7
  end
@@ -2,9 +2,14 @@ module Identifiers
2
2
  class ISBN
3
3
  REGEX_13 = /\b97[89]\d{10}\b/
4
4
  REGEX_10 = /\b\d{9}(?:\d|X)\b/
5
+ REGEX_A = %r{\b(?<=10\.)97[89]\.\d{2,8}/\d{1,7}\b}
5
6
 
6
7
  def self.extract(str)
7
- extract_thirteen_digit_isbns(str) + extract_ten_digit_isbns(str)
8
+ extract_isbn_as(str) + extract_thirteen_digit_isbns(str) + extract_ten_digit_isbns(str)
9
+ end
10
+
11
+ def self.extract_isbn_as(str)
12
+ extract_thirteen_digit_isbns(str.scan(REGEX_A).join("\n").tr('/.', ''))
8
13
  end
9
14
 
10
15
  def self.extract_thirteen_digit_isbns(str)
@@ -13,9 +13,21 @@ RSpec.describe Identifiers::DOI do
13
13
  expect(described_class.extract(str)).to contain_exactly('10.1097/01.asw.0000443266.17665.19')
14
14
  end
15
15
 
16
- it 'does not extract a PUBMED ID' do
16
+ it 'does not extract a PubMed ID' do
17
17
  str = 'This is NOT a DOI: 123456'
18
18
 
19
19
  expect(described_class.extract(str)).to be_empty
20
20
  end
21
+
22
+ it 'extracts ISBN-As' do
23
+ str = 'This is an ISBN-A: 10.978.8898392/315'
24
+
25
+ expect(described_class.extract(str)).to contain_exactly('10.978.8898392/315')
26
+ end
27
+
28
+ it 'does not extract invalid ISBN-As' do
29
+ str = 'This is not an ISBN-A: 10.978.8898392/NotARealIsbnA'
30
+
31
+ expect(described_class.extract(str)).to be_empty
32
+ end
21
33
  end
@@ -19,6 +19,14 @@ RSpec.describe Identifiers::ISBN do
19
19
  expect(described_class.extract('ISBN: 978 0 80 506909 9')).to contain_exactly('9780805069099')
20
20
  end
21
21
 
22
+ it 'extracts ISBN-13s from ISBN-As' do
23
+ expect(described_class.extract('10.978.8898392/315')).to contain_exactly('9788898392315')
24
+ end
25
+
26
+ it 'does not extract invalid ISBNs from ISBN-As' do
27
+ expect(described_class.extract('10.978.8898392/316')).to be_empty
28
+ end
29
+
22
30
  it 'normalizes 10-digit ISBNs' do
23
31
  str = "0-8050-6909-7 \n 2-7594-0269-X"
24
32
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: identifiers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Hernandez
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2017-01-23 00:00:00.000000000 Z
12
+ date: 2017-01-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: urn