anystyle-parser 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f5e9afa705ff5d976d3b2ea79ad875779c102d62
4
- data.tar.gz: e20422c80e4be4c2effe81adba4f11675b70ebe0
3
+ metadata.gz: c5471f4465579c6aec53ffee37f8e337075934fd
4
+ data.tar.gz: e32a68bbbe093abd3c44df90df5524325afc9f7e
5
5
  SHA512:
6
- metadata.gz: c0af42d7c7d3a4f9216d20d3b71db2d481c454e105d03994e4e8d3ecc038fb9c533e81b650ab35e94f1415bd031d1f67c4b2634a3307658f2c001349b7742cd9
7
- data.tar.gz: 7270eebab80be4539e0655eaefd0cf390dd7ff58c74c67ffb7ca2634ed626057cbb5fdc415e47e6f0c53038d9607f7aa6080cedfea5869be4dead98021c05e26
6
+ metadata.gz: cf520db7b3e31bfe1bd02b03b3d8a951a6288e6d10055ceeccf0b515d3f45e1a83abce911cb698839169c75cf9cac8d2e4169ea54d5db900b25b134b7caffd39
7
+ data.tar.gz: ebc4173949674553f396dbba6631fae165493f47f748288edad5ccf4fdee33b4e0a2fe7b6a54c5fc0c95e79f8bbbf991a03f62dff202fb6ec949a1098ec1ab24
data/HISTORY.md CHANGED
@@ -1,6 +1,7 @@
1
- 0.5.0 / 2014-03-13
1
+ 0.5.2 / 2014-03-13
2
2
  ==================
3
3
  * Add XML output
4
+ * Improve ULR normalizer
4
5
 
5
6
  0.4.4 & 0.4.5 / 2014-03-10
6
7
  ==========================
@@ -312,8 +312,7 @@ module Anystyle
312
312
  url, *dangling = hash[:url]
313
313
  unmatched(:url, hash, dangling) unless dangling.empty?
314
314
 
315
- url.gsub!(/^\s+|[,\s]+$/, '')
316
- hash[:isbn] = isbn
315
+ hash[:url] = url[/([a-z]+:\/\/)?\w+\.\w+[\w\.\/%-]+/i] || url
317
316
  hash
318
317
  end
319
318
 
@@ -1,5 +1,5 @@
1
1
  module Anystyle
2
2
  module Parser
3
- VERSION = '0.5.1'.freeze
3
+ VERSION = '0.5.2'.freeze
4
4
  end
5
5
  end
@@ -58,6 +58,18 @@ module Anystyle
58
58
  end
59
59
  end
60
60
 
61
+ describe 'URL extraction' do
62
+ it 'recognizes full URLs' do
63
+ n.normalize_url(:url => 'Available at: https://www.example.org/x.pdf').should == { :url => 'https://www.example.org/x.pdf' }
64
+ n.normalize_url(:url => 'Available at: https://www.example.org/x.pdf [Retrieved today]').should == { :url => 'https://www.example.org/x.pdf' }
65
+ end
66
+
67
+ it 'tries to detect URLs without protocol' do
68
+ n.normalize_url(:url => 'Available at: www.example.org/x.pdf').should == { :url => 'www.example.org/x.pdf' }
69
+ n.normalize_url(:url => 'Available at: example.org/x.pdf [Retrieved today]').should == { :url => 'example.org/x.pdf' }
70
+ end
71
+ end
72
+
61
73
  describe 'date extraction' do
62
74
  it 'extracts month and year from a string like "(July 2009)"' do
63
75
  h = Normalizer.instance.normalize_date(:date => '(July 2009)')
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: anystyle-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sylvester Keil