preadly-bulbasaur 0.8.1 → 0.8.2
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8260cd6a3e468ab1139c0096ee1cbb5086fe7ff1
|
4
|
+
data.tar.gz: 146d9e064d05db1eb3c19c7cdc7290c6027fa79a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 61721cf5b99c6edba03b754fe98b863bd9747ab0bd279d4b3023becfc160309a591d931a2b8b814aab69884c940e026c72d8dc52022769101ef9f79f4da90b49
|
7
|
+
data.tar.gz: f8da5d762adf404102767f71c0c68ea99a3f49fbbb716612f01e95f9fda6ab8a5c982068d35d14232bdb8d033bfdcefab69d628c36a3fbdeb54dab390ab490d7
|
@@ -1,7 +1,5 @@
|
|
1
1
|
module Bulbasaur
|
2
2
|
class NormalizeImageSources
|
3
|
-
DOMAIN_REGEX = /^(?:https?:\/\/)?(?:[^@\n]+@)?(?:www\.)?([^:\/\n]+)/im
|
4
|
-
|
5
3
|
def initialize(html, target_attrs)
|
6
4
|
@html = html
|
7
5
|
@target_attrs = target_attrs
|
@@ -27,16 +25,10 @@ module Bulbasaur
|
|
27
25
|
end
|
28
26
|
|
29
27
|
def adjust(element, attr)
|
30
|
-
element.set_attribute 'src',
|
28
|
+
element.set_attribute 'src', element.xpath(attr).text
|
31
29
|
remove_target_attrs_from element
|
32
30
|
end
|
33
31
|
|
34
|
-
def lazy_load_url(element, text)
|
35
|
-
text_match = text.match(DOMAIN_REGEX).to_s
|
36
|
-
element_match = element.css('@src').text.match(DOMAIN_REGEX).to_s
|
37
|
-
element_match.empty? || text_match == element_match ? text : "#{element_match}/#{text}"
|
38
|
-
end
|
39
|
-
|
40
32
|
def remove_target_attrs_from(element)
|
41
33
|
@target_attrs.each { |attr| element.xpath("@#{attr}").remove }
|
42
34
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module Bulbasaur
|
2
2
|
class NormalizeURL
|
3
3
|
def initialize(base_url, context_url)
|
4
|
-
@base_url = base_url
|
4
|
+
@base_url = clean_url(base_url)
|
5
5
|
@context_url = context_url
|
6
6
|
end
|
7
7
|
|
@@ -11,5 +11,13 @@ module Bulbasaur
|
|
11
11
|
rescue
|
12
12
|
raise ArgumentError, "Unable to normalize URL. Params: [base_url: #{@base_url}, context_url: #{@context_url}]."
|
13
13
|
end
|
14
|
+
|
15
|
+
private
|
16
|
+
def clean_url(url)
|
17
|
+
uri = URI(url)
|
18
|
+
uri.path = ''
|
19
|
+
uri.query = ''
|
20
|
+
uri.to_s
|
21
|
+
end
|
14
22
|
end
|
15
23
|
end
|
data/lib/bulbasaur/version.rb
CHANGED
@@ -9,15 +9,16 @@ RSpec.describe Bulbasaur::NormalizeImageSources do
|
|
9
9
|
describe "#call" do
|
10
10
|
|
11
11
|
let(:html) do
|
12
|
-
|
12
|
+
<<-HTML
|
13
13
|
<img src="http://somewhere.to/get/a-pixel.gif" data-lazy-src="http://somewhere.to/get/the-real-image.jpg" alt="Image" width="800" height="1200">
|
14
14
|
<img src="http://somewhere.to/get/another-pixel.gif" data-image="http://somewhere.to/get/the-other-real-image.jpg">
|
15
15
|
<img src="http://somewhere.to/get/a-third-pixel.gif" data-src="get/the-third-real-image.jpg">
|
16
16
|
<img src="otherplace.to/load/a-fourth-pixel.gif" lazy-data="otherplace.to/load/the-fourth-real-image.jpg">
|
17
|
+
<img src="http://somewhere.to/get/a-fifth-pixel.gif" data-image="http://elsewhere.to/get/the-fifth-real-image.jpg">
|
17
18
|
<img src="https://place.where/an-image/is/without-extension/" data-src="https://place.where/an-image/is/without-extension/">
|
18
19
|
<img src="https://place.where/an-image/has-two/lazy-params/" data-src="https://place.where/an-image/has-two/lazy-params/" data-image="https://place.where/an-image/has-two/lazy-params/">
|
19
20
|
<img lazy-data="https://place.where/an-image/has/no-src-tag.jpg">
|
20
|
-
|
21
|
+
HTML
|
21
22
|
end
|
22
23
|
|
23
24
|
context "When there are no target attributes" do
|
@@ -38,15 +39,16 @@ RSpec.describe Bulbasaur::NormalizeImageSources do
|
|
38
39
|
end
|
39
40
|
|
40
41
|
let(:html_parsed) do
|
41
|
-
|
42
|
+
<<-HTML
|
42
43
|
<img src="http://somewhere.to/get/the-real-image.jpg" alt="Image" width="800" height="1200">
|
43
44
|
<img src="http://somewhere.to/get/the-other-real-image.jpg">
|
44
45
|
<img src="http://somewhere.to/get/a-third-pixel.gif" data-src="get/the-third-real-image.jpg">
|
45
46
|
<img src="otherplace.to/load/a-fourth-pixel.gif" lazy-data="otherplace.to/load/the-fourth-real-image.jpg">
|
47
|
+
<img src="http://elsewhere.to/get/the-fifth-real-image.jpg">
|
46
48
|
<img src="https://place.where/an-image/is/without-extension/" data-src="https://place.where/an-image/is/without-extension/">
|
47
49
|
<img src="https://place.where/an-image/has-two/lazy-params/" data-src="https://place.where/an-image/has-two/lazy-params/">
|
48
50
|
<img lazy-data="https://place.where/an-image/has/no-src-tag.jpg">
|
49
|
-
|
51
|
+
HTML
|
50
52
|
end
|
51
53
|
|
52
54
|
it "Returns the HTML code with the specified image tags adjusted" do
|
@@ -61,15 +63,16 @@ RSpec.describe Bulbasaur::NormalizeImageSources do
|
|
61
63
|
end
|
62
64
|
|
63
65
|
let(:html_parsed) do
|
64
|
-
|
66
|
+
<<-HTML
|
65
67
|
<img src="http://somewhere.to/get/the-real-image.jpg" alt="Image" width="800" height="1200">
|
66
68
|
<img src="http://somewhere.to/get/the-other-real-image.jpg">
|
67
|
-
<img src="
|
69
|
+
<img src="get/the-third-real-image.jpg">
|
68
70
|
<img src="otherplace.to/load/the-fourth-real-image.jpg">
|
71
|
+
<img src="http://elsewhere.to/get/the-fifth-real-image.jpg">
|
69
72
|
<img src="https://place.where/an-image/is/without-extension/">
|
70
73
|
<img src="https://place.where/an-image/has-two/lazy-params/">
|
71
74
|
<img src="https://place.where/an-image/has/no-src-tag.jpg">
|
72
|
-
|
75
|
+
HTML
|
73
76
|
end
|
74
77
|
|
75
78
|
it "Returns the HTML code with the involved image tags fixed with domain and path" do
|
@@ -5,7 +5,7 @@ RSpec.describe Bulbasaur::NormalizeURL do
|
|
5
5
|
subject do
|
6
6
|
described_class.new(base_url, context_url).call
|
7
7
|
end
|
8
|
-
|
8
|
+
|
9
9
|
let(:base_url) do
|
10
10
|
"http://pread.ly"
|
11
11
|
end
|
@@ -15,16 +15,16 @@ RSpec.describe Bulbasaur::NormalizeURL do
|
|
15
15
|
end
|
16
16
|
|
17
17
|
describe "#call" do
|
18
|
-
|
18
|
+
|
19
19
|
context "When use url normalized url: http://www.test.com/hello.jpg" do
|
20
|
-
|
20
|
+
|
21
21
|
it "Does return url normalized: http://www.test.com/hello.jpg" do
|
22
22
|
expect(subject).to eq "http://www.test.com/hello.jpg"
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
26
|
context "When use url unnormalized url: test.jpg" do
|
27
|
-
|
27
|
+
|
28
28
|
let(:context_url) do
|
29
29
|
"test.jpg"
|
30
30
|
end
|
@@ -35,7 +35,7 @@ RSpec.describe Bulbasaur::NormalizeURL do
|
|
35
35
|
end
|
36
36
|
|
37
37
|
context "When use url https normalized: https://www.test.com/hello.jpg" do
|
38
|
-
|
38
|
+
|
39
39
|
let(:context_url) do
|
40
40
|
"https://www.test.com/hello.jpg"
|
41
41
|
end
|
@@ -54,7 +54,7 @@ RSpec.describe Bulbasaur::NormalizeURL do
|
|
54
54
|
end
|
55
55
|
|
56
56
|
context "When use url not normalized with slash on base: hello.jpg" do
|
57
|
-
|
57
|
+
|
58
58
|
let(:base_url) do
|
59
59
|
"https://www.test.com/"
|
60
60
|
end
|
@@ -68,8 +68,36 @@ RSpec.describe Bulbasaur::NormalizeURL do
|
|
68
68
|
end
|
69
69
|
end
|
70
70
|
|
71
|
+
context "When base_url contains path" do
|
72
|
+
let!(:base_url) do
|
73
|
+
"http://pread.ly/other-path/"
|
74
|
+
end
|
75
|
+
|
76
|
+
let!(:context_url) do
|
77
|
+
"test.jpg"
|
78
|
+
end
|
79
|
+
|
80
|
+
it "Does return url normalized: http://pread.ly/test.jpg" do
|
81
|
+
expect(subject).to eq "http://pread.ly/test.jpg"
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
context "When base_url contains query string" do
|
86
|
+
let!(:base_url) do
|
87
|
+
"http://pread.ly/?page=1"
|
88
|
+
end
|
89
|
+
|
90
|
+
let!(:context_url) do
|
91
|
+
"test.jpg"
|
92
|
+
end
|
93
|
+
|
94
|
+
it "Does return url normalized: http://pread.ly/test.jpg" do
|
95
|
+
expect(subject).to eq "http://pread.ly/test.jpg"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
71
99
|
context "When base url not valid" do
|
72
|
-
|
100
|
+
|
73
101
|
let(:base_url) do
|
74
102
|
"test/httml"
|
75
103
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: preadly-bulbasaur
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Magno Costa
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-09-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|