scraped 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +3 -0
- data/CHANGELOG.md +7 -0
- data/lib/scraped/response/decorator/absolute_urls.rb +23 -3
- data/lib/scraped/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a7f429cba94e06130bef2f3b912e760f989f45bb
|
4
|
+
data.tar.gz: 4259d7da280f1e36755b589693c15cd37dcb049b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cdd825c2323150baf9b746ab2867c0a840b87b92d1ea0484266232039336432b8a99d274c61e6ebd1e8f4e5805cef173255b4c5882744721f75d5f4be2fc24af
|
7
|
+
data.tar.gz: b20b9930ef67e38fb50ce6128cd76c4934d9c05fbd6f26db1a91516eeee1ebeee15efb7955e2da4b94fd1cf5c19b3493f1efe77dca8f570f03255a9fbefb7d3c
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
|
|
5
5
|
The format is based on [Keep a Changelog](http://keepachangelog.com/)
|
6
6
|
and this project adheres to [Semantic Versioning](http://semver.org/).
|
7
7
|
|
8
|
+
## 0.3.0 - 2017-01-10
|
9
|
+
|
10
|
+
### Changed
|
11
|
+
|
12
|
+
- The AbsoluteUrls decorator now ensures the URL is correctly encoded
|
13
|
+
(e.g. by transforming spaces into %20)
|
14
|
+
|
8
15
|
## 0.2.0 - 2017-01-04
|
9
16
|
|
10
17
|
### Changed
|
@@ -5,6 +5,28 @@ module Scraped
|
|
5
5
|
class Response
|
6
6
|
class Decorator
|
7
7
|
class AbsoluteUrls < Decorator
|
8
|
+
class AbsoluteUrl
|
9
|
+
def initialize(base_url:, relative_url:)
|
10
|
+
@base_url = base_url
|
11
|
+
@relative_url = relative_url
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_s
|
15
|
+
unless relative_url.to_s.empty?
|
16
|
+
URI.join(base_url, URI.encode(
|
17
|
+
# To prevent encoded URLs from being encoded twice
|
18
|
+
URI.decode(relative_url)
|
19
|
+
).gsub('[', '%5B').gsub(']', '%5D')).to_s
|
20
|
+
end
|
21
|
+
rescue URI::InvalidURIError
|
22
|
+
relative_url
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
attr_reader :base_url, :relative_url
|
28
|
+
end
|
29
|
+
|
8
30
|
def body
|
9
31
|
Nokogiri::HTML(super).tap do |doc|
|
10
32
|
doc.css('img').each { |img| img[:src] = absolute_url(img[:src]) }
|
@@ -15,9 +37,7 @@ module Scraped
|
|
15
37
|
private
|
16
38
|
|
17
39
|
def absolute_url(relative_url)
|
18
|
-
|
19
|
-
rescue URI::InvalidURIError
|
20
|
-
relative_url
|
40
|
+
AbsoluteUrl.new(base_url: url, relative_url: relative_url).to_s
|
21
41
|
end
|
22
42
|
end
|
23
43
|
end
|
data/lib/scraped/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scraped
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- EveryPolitician
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-03-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|