scraped 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +3 -0
- data/CHANGELOG.md +7 -0
- data/lib/scraped/response/decorator/absolute_urls.rb +23 -3
- data/lib/scraped/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a7f429cba94e06130bef2f3b912e760f989f45bb
|
4
|
+
data.tar.gz: 4259d7da280f1e36755b589693c15cd37dcb049b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cdd825c2323150baf9b746ab2867c0a840b87b92d1ea0484266232039336432b8a99d274c61e6ebd1e8f4e5805cef173255b4c5882744721f75d5f4be2fc24af
|
7
|
+
data.tar.gz: b20b9930ef67e38fb50ce6128cd76c4934d9c05fbd6f26db1a91516eeee1ebeee15efb7955e2da4b94fd1cf5c19b3493f1efe77dca8f570f03255a9fbefb7d3c
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
|
|
5
5
|
The format is based on [Keep a Changelog](http://keepachangelog.com/)
|
6
6
|
and this project adheres to [Semantic Versioning](http://semver.org/).
|
7
7
|
|
8
|
+
## 0.3.0 - 2017-01-10
|
9
|
+
|
10
|
+
### Changed
|
11
|
+
|
12
|
+
- The AbsoluteUrls decorator now ensures the URL is correctly encoded
|
13
|
+
(e.g. by transforming spaces into %20)
|
14
|
+
|
8
15
|
## 0.2.0 - 2017-01-04
|
9
16
|
|
10
17
|
### Changed
|
@@ -5,6 +5,28 @@ module Scraped
|
|
5
5
|
class Response
|
6
6
|
class Decorator
|
7
7
|
class AbsoluteUrls < Decorator
|
8
|
+
class AbsoluteUrl
|
9
|
+
def initialize(base_url:, relative_url:)
|
10
|
+
@base_url = base_url
|
11
|
+
@relative_url = relative_url
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_s
|
15
|
+
unless relative_url.to_s.empty?
|
16
|
+
URI.join(base_url, URI.encode(
|
17
|
+
# To prevent encoded URLs from being encoded twice
|
18
|
+
URI.decode(relative_url)
|
19
|
+
).gsub('[', '%5B').gsub(']', '%5D')).to_s
|
20
|
+
end
|
21
|
+
rescue URI::InvalidURIError
|
22
|
+
relative_url
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
attr_reader :base_url, :relative_url
|
28
|
+
end
|
29
|
+
|
8
30
|
def body
|
9
31
|
Nokogiri::HTML(super).tap do |doc|
|
10
32
|
doc.css('img').each { |img| img[:src] = absolute_url(img[:src]) }
|
@@ -15,9 +37,7 @@ module Scraped
|
|
15
37
|
private
|
16
38
|
|
17
39
|
def absolute_url(relative_url)
|
18
|
-
|
19
|
-
rescue URI::InvalidURIError
|
20
|
-
relative_url
|
40
|
+
AbsoluteUrl.new(base_url: url, relative_url: relative_url).to_s
|
21
41
|
end
|
22
42
|
end
|
23
43
|
end
|
data/lib/scraped/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scraped
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- EveryPolitician
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-03-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|