preadly-bulbasaur 0.7.1 → 0.7.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 75b989582785e56fa7033a634ef68f664118cc2b
4
- data.tar.gz: a622c9d1db2fc88f0b053859bcd8e6ea3ec4ee69
3
+ metadata.gz: a3585babf7ec241a14a1bb08d933cb648216658e
4
+ data.tar.gz: e283f5b067e6a8d7642e28e747df9a36e49877fd
5
5
  SHA512:
6
- metadata.gz: 87a4cc64a0ea8f231b59f4446ca1319bece8f51ca41900d427c1fd4e585f3dba855919a2ca21da0314d903798a3bc124134c357ab1436adf4bdb7c6f4174626c
7
- data.tar.gz: cb01394e15a2b9cd0e376d2b438ab4237398bc104b7fd1c93e0bf312d4cf485f7cba236a9dff04005ce398a1a0bcb25a9ee550af593379b7ff684da939505de2
6
+ metadata.gz: 3ec4836696c83dd0eef9e32ca7cf9375ecc37bc4ed973f5761357702a23a3c5eeaa7198b195ebab39a4e11e3c505a2b6d50ad37034a95cf45a06a680e3d0b048
7
+ data.tar.gz: e1324f9d951aaa744e91b4a1113a3f2cc9b2e8222217e3a2ecc9303fb6455ec838819b218a52d64ffbf28a16a5dae7bff6ea4a1c56e9b06f86d464b66e52a4ee
@@ -0,0 +1,42 @@
1
+ module Bulbasaur
2
+ class ExtractMetaInformationsFromHTML
3
+
4
+ def initialize(html)
5
+ @html = html
6
+ end
7
+
8
+ def call
9
+ meta_informations = []
10
+ for_each_meta_information do |meta_information|
11
+ name = name_of meta_information
12
+ value = value_of meta_information
13
+ meta_informations << { name: name, value: value } unless name.nil? || value.nil?
14
+ end
15
+ meta_informations
16
+ end
17
+
18
+ private
19
+
20
+ def for_each_meta_information(&block)
21
+ if @html
22
+ Nokogiri::HTML(@html).xpath('//meta').each &block
23
+ Nokogiri::HTML(@html).xpath('//link').each &block
24
+ end
25
+ end
26
+
27
+ def name_of(meta_information)
28
+ include_attribute? %w(name property rel), meta_information
29
+ end
30
+
31
+ def value_of(meta_information)
32
+ include_attribute? %w(value content href), meta_information
33
+ end
34
+
35
+ def include_attribute?(attributes, tag)
36
+ attributes.each do |attr|
37
+ return tag.attribute(attr).to_s if tag.attributes.include?(attr)
38
+ end
39
+ nil
40
+ end
41
+ end
42
+ end
@@ -1,5 +1,7 @@
1
1
  module Bulbasaur
2
2
  class NormalizeImageSources
3
+ DOMAIN_REGEX = /^(?:https?:\/\/)?(?:[^@\n]+@)?(?:www\.)?([^:\/\n]+)/im
4
+
3
5
  def initialize(html, target_attrs)
4
6
  @html = html
5
7
  @target_attrs = target_attrs
@@ -25,8 +27,14 @@ module Bulbasaur
25
27
  end
26
28
 
27
29
  def adjust(element, attr)
28
- element.set_attribute 'src', element.xpath(attr).text
30
+ element.set_attribute 'src', lazy_load_url(element, element.xpath(attr).text)
29
31
  element.xpath(attr).remove
30
32
  end
33
+
34
+ def lazy_load_url(element, text)
35
+ text_match = text.match(DOMAIN_REGEX).to_s
36
+ element_match = element.css('@src').text.match(DOMAIN_REGEX).to_s
37
+ (text_match == element_match) ? text : "#{element_match}/#{text}"
38
+ end
31
39
  end
32
40
  end
@@ -3,7 +3,7 @@ module Bulbasaur
3
3
  module Version
4
4
  MAJOR = 0
5
5
  MINOR = 7
6
- PATCH = 1
6
+ PATCH = 2
7
7
  STRING = "#{MAJOR}.#{MINOR}.#{PATCH}"
8
8
  end
9
9
 
data/lib/bulbasaur.rb CHANGED
@@ -4,6 +4,7 @@ require "bulbasaur/extracts/extract_images_from_vimeo"
4
4
  require "bulbasaur/extracts/extract_images_from_html"
5
5
  require "bulbasaur/extracts/extract_images_from_all_resources"
6
6
  require "bulbasaur/extracts/extract_text_from_html.rb"
7
+ require "bulbasaur/extracts/extract_meta_informations_from_html.rb"
7
8
  require "bulbasaur/removals/remove_tags"
8
9
  require "bulbasaur/removals/remove_attributes"
9
10
  require "bulbasaur/replaces/replace_by_tag_image"
@@ -0,0 +1,38 @@
1
+ require "spec_helper"
2
+
3
+ RSpec.describe Bulbasaur::ExtractMetaInformationsFromHTML do
4
+
5
+ subject do
6
+ described_class.new(html).call
7
+ end
8
+
9
+ let(:html) do
10
+ %Q(
11
+ <head>
12
+ <meta name="description" content="test-description">
13
+ <meta property="keywords" value="test-keywords">
14
+ <meta NAME="author" VALUE="test-author">
15
+ <link rel="canonical" href="test-canonical">
16
+ </head>
17
+ )
18
+ end
19
+
20
+ describe "#call" do
21
+
22
+ let(:meta_names) do
23
+ subject.map {|h| h[:name]}
24
+ end
25
+
26
+ let(:meta_values) do
27
+ subject.map {|h| h[:value]}
28
+ end
29
+
30
+ it "Does extract meta names informations from html" do
31
+ expect(meta_names).to include "description", "keywords", "author", "canonical"
32
+ end
33
+
34
+ it "Does extract meta values informations from html" do
35
+ expect(meta_values).to include "test-description", "test-keywords", "test-author", "test-canonical"
36
+ end
37
+ end
38
+ end
@@ -1,9 +1,13 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe Bulbasaur::RemoveAttributes do
4
- subject { described_class.new(html, banned_attrs).call }
4
+
5
+ subject do
6
+ described_class.new(html, banned_attrs).call
7
+ end
5
8
 
6
9
  describe '#call' do
10
+
7
11
  let(:html) do
8
12
  %[
9
13
  <style>
@@ -18,7 +22,10 @@ RSpec.describe Bulbasaur::RemoveAttributes do
18
22
  end
19
23
 
20
24
  context 'when there are no banned attributes' do
21
- let(:banned_attrs) { [] }
25
+
26
+ let(:banned_attrs) do
27
+ []
28
+ end
22
29
 
23
30
  it 'returns the HTML code as it was before' do
24
31
  expect(subject).to eq html
@@ -26,7 +33,10 @@ RSpec.describe Bulbasaur::RemoveAttributes do
26
33
  end
27
34
 
28
35
  context 'when there are banned attributes' do
29
- let(:banned_attrs) { %w(style) }
36
+
37
+ let(:banned_attrs) do
38
+ %w(style)
39
+ end
30
40
 
31
41
  it 'returns the HTML code without the banned attributes' do
32
42
  expect(subject.strip.gsub(/\n/, '').squeeze ' ').to eq %[
@@ -42,4 +52,5 @@ RSpec.describe Bulbasaur::RemoveAttributes do
42
52
  end
43
53
  end
44
54
  end
45
- end
55
+
56
+ end
@@ -1,9 +1,13 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe Bulbasaur::RemoveTags do
4
- subject { described_class.new(html, banned_tags).call }
4
+
5
+ subject do
6
+ described_class.new(html, banned_tags).call
7
+ end
5
8
 
6
9
  describe '#call' do
10
+
7
11
  let(:html) do
8
12
  %[
9
13
  <style>
@@ -18,7 +22,10 @@ RSpec.describe Bulbasaur::RemoveTags do
18
22
  end
19
23
 
20
24
  context 'when there are no banned tags' do
21
- let(:banned_tags) { [] }
25
+
26
+ let(:banned_tags) do
27
+ []
28
+ end
22
29
 
23
30
  it 'returns the HTML code as it was before' do
24
31
  expect(subject).to eq html
@@ -26,7 +33,10 @@ RSpec.describe Bulbasaur::RemoveTags do
26
33
  end
27
34
 
28
35
  context 'when there are banned tags' do
29
- let(:banned_tags) { %w(form style) }
36
+
37
+ let(:banned_tags) do
38
+ %w(form style)
39
+ end
30
40
 
31
41
  it 'returns the HTML code without the banned tags' do
32
42
  expect(subject.strip.gsub(/\n/, '').squeeze ' ').to eq %[
@@ -36,4 +46,5 @@ RSpec.describe Bulbasaur::RemoveTags do
36
46
  end
37
47
  end
38
48
  end
39
- end
49
+
50
+ end
@@ -1,25 +1,72 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe Bulbasaur::NormalizeImageSources do
4
- subject { described_class.new(html, target_attrs).call }
5
-
6
- describe '#call' do
7
- let(:html) { '<img src="http://somewhere.to/get/a-pixel.gif" data-lazy-src="http://somewhere.to/get/the-real-image.jpg" alt="Image" width="800" height="1200"><img src="http://somewhere.to/get/another-pixel.gif" data-image="http://somewhere.to/get/the-other-real-image.jpg"><img src="http://somewhere.to/get/a-third-pixel.gif" data-src="http://somewhere.to/get/the-third-real-image.jpg">' }
4
+
5
+ subject do
6
+ described_class.new(html, target_attrs).call
7
+ end
8
8
 
9
- context 'when there are no target attributes' do
10
- let(:target_attrs) { [] }
9
+ describe "#call" do
10
+
11
+ let(:html) do
12
+ %Q(
13
+ <img src="http://somewhere.to/get/a-pixel.gif" data-lazy-src="http://somewhere.to/get/the-real-image.jpg" alt="Image" width="800" height="1200">
14
+ <img src="http://somewhere.to/get/another-pixel.gif" data-image="http://somewhere.to/get/the-other-real-image.jpg">
15
+ <img src="http://somewhere.to/get/a-third-pixel.gif" data-src="get/the-third-real-image.jpg">
16
+ <img src="otherplace.to/load/a-fourth-pixel.gif" lazy-data="otherplace.to/load/the-fourth-real-image.jpg">
17
+ )
18
+ end
19
+
20
+ context "When there are no target attributes" do
21
+
22
+ let(:target_attrs) do
23
+ []
24
+ end
11
25
 
12
- it 'returns the HTML code as it was before' do
26
+ it "Returns the HTML code as it was before" do
13
27
  expect(subject).to eq html
14
28
  end
15
29
  end
16
30
 
17
- context 'when there are target attributes' do
18
- let(:target_attrs) { %w(data-lazy-src data-image) }
31
+ context "When there are target attributes" do
32
+
33
+ let(:target_attrs) do
34
+ %w(data-lazy-src data-image)
35
+ end
36
+
37
+ let(:html_parsed) do
38
+ %Q(
39
+ <img src="http://somewhere.to/get/the-real-image.jpg" alt="Image" width="800" height="1200">
40
+ <img src="http://somewhere.to/get/the-other-real-image.jpg">
41
+ <img src="http://somewhere.to/get/a-third-pixel.gif" data-src="get/the-third-real-image.jpg">
42
+ <img src="otherplace.to/load/a-fourth-pixel.gif" lazy-data="otherplace.to/load/the-fourth-real-image.jpg">
43
+ )
44
+ end
19
45
 
20
- it 'returns the HTML code with the specified image tags adjusted' do
21
- expect(subject).to eq '<img src="http://somewhere.to/get/the-real-image.jpg" alt="Image" width="800" height="1200"><img src="http://somewhere.to/get/the-other-real-image.jpg"><img src="http://somewhere.to/get/a-third-pixel.gif" data-src="http://somewhere.to/get/the-third-real-image.jpg">'
46
+ it "Returns the HTML code with the specified image tags adjusted" do
47
+ expect(subject.delete(" ")).to eq html_parsed.delete(" ")
48
+ end
49
+ end
50
+
51
+ context "When there are target attributes with relative path" do
52
+
53
+ let(:target_attrs) do
54
+ %w(data-lazy-src data-image data-src lazy-data)
55
+ end
56
+
57
+ let(:html_parsed) do
58
+ %Q(
59
+ <img src="http://somewhere.to/get/the-real-image.jpg" alt="Image" width="800" height="1200">
60
+ <img src="http://somewhere.to/get/the-other-real-image.jpg">
61
+ <img src="http://somewhere.to/get/the-third-real-image.jpg">
62
+ <img src="otherplace.to/load/the-fourth-real-image.jpg">
63
+ )
64
+ end
65
+
66
+ it "Returns the HTML code with the involved image tags fixed with domain and path" do
67
+ expect(subject.delete(" ")).to eq html_parsed.delete(" ")
22
68
  end
23
69
  end
24
70
  end
25
- end
71
+
72
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: preadly-bulbasaur
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.1
4
+ version: 0.7.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Magno Costa
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-08-07 00:00:00.000000000 Z
11
+ date: 2015-08-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -94,6 +94,7 @@ files:
94
94
  - lib/bulbasaur/extracts/extract_images_from_html.rb
95
95
  - lib/bulbasaur/extracts/extract_images_from_vimeo.rb
96
96
  - lib/bulbasaur/extracts/extract_images_from_youtube.rb
97
+ - lib/bulbasaur/extracts/extract_meta_informations_from_html.rb
97
98
  - lib/bulbasaur/extracts/extract_text_from_html.rb
98
99
  - lib/bulbasaur/removals/remove_attributes.rb
99
100
  - lib/bulbasaur/removals/remove_tags.rb
@@ -106,12 +107,12 @@ files:
106
107
  - spec/bulbasaur/extracts/extract_images_from_vimeo_spec.rb
107
108
  - spec/bulbasaur/extracts/extract_images_from_youtube_spec.rb
108
109
  - spec/bulbasaur/extracts/extract_inner_text_from_html_spec.rb
110
+ - spec/bulbasaur/extracts/extract_meta_informations_from_html_spec.rb
109
111
  - spec/bulbasaur/removals/remove_attributes_spec.rb
110
112
  - spec/bulbasaur/removals/remove_tags_spec.rb
111
113
  - spec/bulbasaur/replaces/replace_by_tag_image_spec.rb
112
114
  - spec/bulbasaur/utils/normalize_image_sources_spec.rb
113
115
  - spec/bulbasaur/utils/normalize_url_spec.rb
114
- - spec/bulbasaur_spec.rb
115
116
  - spec/spec_helper.rb
116
117
  homepage: https://github.com/preadly/bulbasaur
117
118
  licenses: []
@@ -142,10 +143,10 @@ test_files:
142
143
  - spec/bulbasaur/extracts/extract_images_from_vimeo_spec.rb
143
144
  - spec/bulbasaur/extracts/extract_images_from_youtube_spec.rb
144
145
  - spec/bulbasaur/extracts/extract_inner_text_from_html_spec.rb
146
+ - spec/bulbasaur/extracts/extract_meta_informations_from_html_spec.rb
145
147
  - spec/bulbasaur/removals/remove_attributes_spec.rb
146
148
  - spec/bulbasaur/removals/remove_tags_spec.rb
147
149
  - spec/bulbasaur/replaces/replace_by_tag_image_spec.rb
148
150
  - spec/bulbasaur/utils/normalize_image_sources_spec.rb
149
151
  - spec/bulbasaur/utils/normalize_url_spec.rb
150
- - spec/bulbasaur_spec.rb
151
152
  - spec/spec_helper.rb
@@ -1,13 +0,0 @@
1
- require "spec_helper"
2
-
3
- RSpec.describe "test" do
4
-
5
- describe "#teste" do
6
-
7
- it "hello" do
8
- expect("test").to eq "test"
9
- end
10
-
11
- end
12
-
13
- end