preadly-bulbasaur 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 75b989582785e56fa7033a634ef68f664118cc2b
4
- data.tar.gz: a622c9d1db2fc88f0b053859bcd8e6ea3ec4ee69
3
+ metadata.gz: a3585babf7ec241a14a1bb08d933cb648216658e
4
+ data.tar.gz: e283f5b067e6a8d7642e28e747df9a36e49877fd
5
5
  SHA512:
6
- metadata.gz: 87a4cc64a0ea8f231b59f4446ca1319bece8f51ca41900d427c1fd4e585f3dba855919a2ca21da0314d903798a3bc124134c357ab1436adf4bdb7c6f4174626c
7
- data.tar.gz: cb01394e15a2b9cd0e376d2b438ab4237398bc104b7fd1c93e0bf312d4cf485f7cba236a9dff04005ce398a1a0bcb25a9ee550af593379b7ff684da939505de2
6
+ metadata.gz: 3ec4836696c83dd0eef9e32ca7cf9375ecc37bc4ed973f5761357702a23a3c5eeaa7198b195ebab39a4e11e3c505a2b6d50ad37034a95cf45a06a680e3d0b048
7
+ data.tar.gz: e1324f9d951aaa744e91b4a1113a3f2cc9b2e8222217e3a2ecc9303fb6455ec838819b218a52d64ffbf28a16a5dae7bff6ea4a1c56e9b06f86d464b66e52a4ee
@@ -0,0 +1,42 @@
1
+ module Bulbasaur
2
+ class ExtractMetaInformationsFromHTML
3
+
4
+ def initialize(html)
5
+ @html = html
6
+ end
7
+
8
+ def call
9
+ meta_informations = []
10
+ for_each_meta_information do |meta_information|
11
+ name = name_of meta_information
12
+ value = value_of meta_information
13
+ meta_informations << { name: name, value: value } unless name.nil? || value.nil?
14
+ end
15
+ meta_informations
16
+ end
17
+
18
+ private
19
+
20
+ def for_each_meta_information(&block)
21
+ if @html
22
+ Nokogiri::HTML(@html).xpath('//meta').each &block
23
+ Nokogiri::HTML(@html).xpath('//link').each &block
24
+ end
25
+ end
26
+
27
+ def name_of(meta_information)
28
+ include_attribute? %w(name property rel), meta_information
29
+ end
30
+
31
+ def value_of(meta_information)
32
+ include_attribute? %w(value content href), meta_information
33
+ end
34
+
35
+ def include_attribute?(attributes, tag)
36
+ attributes.each do |attr|
37
+ return tag.attribute(attr).to_s if tag.attributes.include?(attr)
38
+ end
39
+ nil
40
+ end
41
+ end
42
+ end
@@ -1,5 +1,7 @@
1
1
  module Bulbasaur
2
2
  class NormalizeImageSources
3
+ DOMAIN_REGEX = /^(?:https?:\/\/)?(?:[^@\n]+@)?(?:www\.)?([^:\/\n]+)/im
4
+
3
5
  def initialize(html, target_attrs)
4
6
  @html = html
5
7
  @target_attrs = target_attrs
@@ -25,8 +27,14 @@ module Bulbasaur
25
27
  end
26
28
 
27
29
  def adjust(element, attr)
28
- element.set_attribute 'src', element.xpath(attr).text
30
+ element.set_attribute 'src', lazy_load_url(element, element.xpath(attr).text)
29
31
  element.xpath(attr).remove
30
32
  end
33
+
34
+ def lazy_load_url(element, text)
35
+ text_match = text.match(DOMAIN_REGEX).to_s
36
+ element_match = element.css('@src').text.match(DOMAIN_REGEX).to_s
37
+ (text_match == element_match) ? text : "#{element_match}/#{text}"
38
+ end
31
39
  end
32
40
  end
@@ -3,7 +3,7 @@ module Bulbasaur
3
3
  module Version
4
4
  MAJOR = 0
5
5
  MINOR = 7
6
- PATCH = 1
6
+ PATCH = 2
7
7
  STRING = "#{MAJOR}.#{MINOR}.#{PATCH}"
8
8
  end
9
9
 
data/lib/bulbasaur.rb CHANGED
@@ -4,6 +4,7 @@ require "bulbasaur/extracts/extract_images_from_vimeo"
4
4
  require "bulbasaur/extracts/extract_images_from_html"
5
5
  require "bulbasaur/extracts/extract_images_from_all_resources"
6
6
  require "bulbasaur/extracts/extract_text_from_html.rb"
7
+ require "bulbasaur/extracts/extract_meta_informations_from_html.rb"
7
8
  require "bulbasaur/removals/remove_tags"
8
9
  require "bulbasaur/removals/remove_attributes"
9
10
  require "bulbasaur/replaces/replace_by_tag_image"
@@ -0,0 +1,38 @@
1
+ require "spec_helper"
2
+
3
+ RSpec.describe Bulbasaur::ExtractMetaInformationsFromHTML do
4
+
5
+ subject do
6
+ described_class.new(html).call
7
+ end
8
+
9
+ let(:html) do
10
+ %Q(
11
+ <head>
12
+ <meta name="description" content="test-description">
13
+ <meta property="keywords" value="test-keywords">
14
+ <meta NAME="author" VALUE="test-author">
15
+ <link rel="canonical" href="test-canonical">
16
+ </head>
17
+ )
18
+ end
19
+
20
+ describe "#call" do
21
+
22
+ let(:meta_names) do
23
+ subject.map {|h| h[:name]}
24
+ end
25
+
26
+ let(:meta_values) do
27
+ subject.map {|h| h[:value]}
28
+ end
29
+
30
+ it "Does extract meta names informations from html" do
31
+ expect(meta_names).to include "description", "keywords", "author", "canonical"
32
+ end
33
+
34
+ it "Does extract meta values informations from html" do
35
+ expect(meta_values).to include "test-description", "test-keywords", "test-author", "test-canonical"
36
+ end
37
+ end
38
+ end
@@ -1,9 +1,13 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe Bulbasaur::RemoveAttributes do
4
- subject { described_class.new(html, banned_attrs).call }
4
+
5
+ subject do
6
+ described_class.new(html, banned_attrs).call
7
+ end
5
8
 
6
9
  describe '#call' do
10
+
7
11
  let(:html) do
8
12
  %[
9
13
  <style>
@@ -18,7 +22,10 @@ RSpec.describe Bulbasaur::RemoveAttributes do
18
22
  end
19
23
 
20
24
  context 'when there are no banned attributes' do
21
- let(:banned_attrs) { [] }
25
+
26
+ let(:banned_attrs) do
27
+ []
28
+ end
22
29
 
23
30
  it 'returns the HTML code as it was before' do
24
31
  expect(subject).to eq html
@@ -26,7 +33,10 @@ RSpec.describe Bulbasaur::RemoveAttributes do
26
33
  end
27
34
 
28
35
  context 'when there are banned attributes' do
29
- let(:banned_attrs) { %w(style) }
36
+
37
+ let(:banned_attrs) do
38
+ %w(style)
39
+ end
30
40
 
31
41
  it 'returns the HTML code without the banned attributes' do
32
42
  expect(subject.strip.gsub(/\n/, '').squeeze ' ').to eq %[
@@ -42,4 +52,5 @@ RSpec.describe Bulbasaur::RemoveAttributes do
42
52
  end
43
53
  end
44
54
  end
45
- end
55
+
56
+ end
@@ -1,9 +1,13 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe Bulbasaur::RemoveTags do
4
- subject { described_class.new(html, banned_tags).call }
4
+
5
+ subject do
6
+ described_class.new(html, banned_tags).call
7
+ end
5
8
 
6
9
  describe '#call' do
10
+
7
11
  let(:html) do
8
12
  %[
9
13
  <style>
@@ -18,7 +22,10 @@ RSpec.describe Bulbasaur::RemoveTags do
18
22
  end
19
23
 
20
24
  context 'when there are no banned tags' do
21
- let(:banned_tags) { [] }
25
+
26
+ let(:banned_tags) do
27
+ []
28
+ end
22
29
 
23
30
  it 'returns the HTML code as it was before' do
24
31
  expect(subject).to eq html
@@ -26,7 +33,10 @@ RSpec.describe Bulbasaur::RemoveTags do
26
33
  end
27
34
 
28
35
  context 'when there are banned tags' do
29
- let(:banned_tags) { %w(form style) }
36
+
37
+ let(:banned_tags) do
38
+ %w(form style)
39
+ end
30
40
 
31
41
  it 'returns the HTML code without the banned tags' do
32
42
  expect(subject.strip.gsub(/\n/, '').squeeze ' ').to eq %[
@@ -36,4 +46,5 @@ RSpec.describe Bulbasaur::RemoveTags do
36
46
  end
37
47
  end
38
48
  end
39
- end
49
+
50
+ end
@@ -1,25 +1,72 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe Bulbasaur::NormalizeImageSources do
4
- subject { described_class.new(html, target_attrs).call }
5
-
6
- describe '#call' do
7
- let(:html) { '<img src="http://somewhere.to/get/a-pixel.gif" data-lazy-src="http://somewhere.to/get/the-real-image.jpg" alt="Image" width="800" height="1200"><img src="http://somewhere.to/get/another-pixel.gif" data-image="http://somewhere.to/get/the-other-real-image.jpg"><img src="http://somewhere.to/get/a-third-pixel.gif" data-src="http://somewhere.to/get/the-third-real-image.jpg">' }
4
+
5
+ subject do
6
+ described_class.new(html, target_attrs).call
7
+ end
8
8
 
9
- context 'when there are no target attributes' do
10
- let(:target_attrs) { [] }
9
+ describe "#call" do
10
+
11
+ let(:html) do
12
+ %Q(
13
+ <img src="http://somewhere.to/get/a-pixel.gif" data-lazy-src="http://somewhere.to/get/the-real-image.jpg" alt="Image" width="800" height="1200">
14
+ <img src="http://somewhere.to/get/another-pixel.gif" data-image="http://somewhere.to/get/the-other-real-image.jpg">
15
+ <img src="http://somewhere.to/get/a-third-pixel.gif" data-src="get/the-third-real-image.jpg">
16
+ <img src="otherplace.to/load/a-fourth-pixel.gif" lazy-data="otherplace.to/load/the-fourth-real-image.jpg">
17
+ )
18
+ end
19
+
20
+ context "When there are no target attributes" do
21
+
22
+ let(:target_attrs) do
23
+ []
24
+ end
11
25
 
12
- it 'returns the HTML code as it was before' do
26
+ it "Returns the HTML code as it was before" do
13
27
  expect(subject).to eq html
14
28
  end
15
29
  end
16
30
 
17
- context 'when there are target attributes' do
18
- let(:target_attrs) { %w(data-lazy-src data-image) }
31
+ context "When there are target attributes" do
32
+
33
+ let(:target_attrs) do
34
+ %w(data-lazy-src data-image)
35
+ end
36
+
37
+ let(:html_parsed) do
38
+ %Q(
39
+ <img src="http://somewhere.to/get/the-real-image.jpg" alt="Image" width="800" height="1200">
40
+ <img src="http://somewhere.to/get/the-other-real-image.jpg">
41
+ <img src="http://somewhere.to/get/a-third-pixel.gif" data-src="get/the-third-real-image.jpg">
42
+ <img src="otherplace.to/load/a-fourth-pixel.gif" lazy-data="otherplace.to/load/the-fourth-real-image.jpg">
43
+ )
44
+ end
19
45
 
20
- it 'returns the HTML code with the specified image tags adjusted' do
21
- expect(subject).to eq '<img src="http://somewhere.to/get/the-real-image.jpg" alt="Image" width="800" height="1200"><img src="http://somewhere.to/get/the-other-real-image.jpg"><img src="http://somewhere.to/get/a-third-pixel.gif" data-src="http://somewhere.to/get/the-third-real-image.jpg">'
46
+ it "Returns the HTML code with the specified image tags adjusted" do
47
+ expect(subject.delete(" ")).to eq html_parsed.delete(" ")
48
+ end
49
+ end
50
+
51
+ context "When there are target attributes with relative path" do
52
+
53
+ let(:target_attrs) do
54
+ %w(data-lazy-src data-image data-src lazy-data)
55
+ end
56
+
57
+ let(:html_parsed) do
58
+ %Q(
59
+ <img src="http://somewhere.to/get/the-real-image.jpg" alt="Image" width="800" height="1200">
60
+ <img src="http://somewhere.to/get/the-other-real-image.jpg">
61
+ <img src="http://somewhere.to/get/the-third-real-image.jpg">
62
+ <img src="otherplace.to/load/the-fourth-real-image.jpg">
63
+ )
64
+ end
65
+
66
+ it "Returns the HTML code with the involved image tags fixed with domain and path" do
67
+ expect(subject.delete(" ")).to eq html_parsed.delete(" ")
22
68
  end
23
69
  end
24
70
  end
25
- end
71
+
72
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: preadly-bulbasaur
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.1
4
+ version: 0.7.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Magno Costa
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-08-07 00:00:00.000000000 Z
11
+ date: 2015-08-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -94,6 +94,7 @@ files:
94
94
  - lib/bulbasaur/extracts/extract_images_from_html.rb
95
95
  - lib/bulbasaur/extracts/extract_images_from_vimeo.rb
96
96
  - lib/bulbasaur/extracts/extract_images_from_youtube.rb
97
+ - lib/bulbasaur/extracts/extract_meta_informations_from_html.rb
97
98
  - lib/bulbasaur/extracts/extract_text_from_html.rb
98
99
  - lib/bulbasaur/removals/remove_attributes.rb
99
100
  - lib/bulbasaur/removals/remove_tags.rb
@@ -106,12 +107,12 @@ files:
106
107
  - spec/bulbasaur/extracts/extract_images_from_vimeo_spec.rb
107
108
  - spec/bulbasaur/extracts/extract_images_from_youtube_spec.rb
108
109
  - spec/bulbasaur/extracts/extract_inner_text_from_html_spec.rb
110
+ - spec/bulbasaur/extracts/extract_meta_informations_from_html_spec.rb
109
111
  - spec/bulbasaur/removals/remove_attributes_spec.rb
110
112
  - spec/bulbasaur/removals/remove_tags_spec.rb
111
113
  - spec/bulbasaur/replaces/replace_by_tag_image_spec.rb
112
114
  - spec/bulbasaur/utils/normalize_image_sources_spec.rb
113
115
  - spec/bulbasaur/utils/normalize_url_spec.rb
114
- - spec/bulbasaur_spec.rb
115
116
  - spec/spec_helper.rb
116
117
  homepage: https://github.com/preadly/bulbasaur
117
118
  licenses: []
@@ -142,10 +143,10 @@ test_files:
142
143
  - spec/bulbasaur/extracts/extract_images_from_vimeo_spec.rb
143
144
  - spec/bulbasaur/extracts/extract_images_from_youtube_spec.rb
144
145
  - spec/bulbasaur/extracts/extract_inner_text_from_html_spec.rb
146
+ - spec/bulbasaur/extracts/extract_meta_informations_from_html_spec.rb
145
147
  - spec/bulbasaur/removals/remove_attributes_spec.rb
146
148
  - spec/bulbasaur/removals/remove_tags_spec.rb
147
149
  - spec/bulbasaur/replaces/replace_by_tag_image_spec.rb
148
150
  - spec/bulbasaur/utils/normalize_image_sources_spec.rb
149
151
  - spec/bulbasaur/utils/normalize_url_spec.rb
150
- - spec/bulbasaur_spec.rb
151
152
  - spec/spec_helper.rb
@@ -1,13 +0,0 @@
1
- require "spec_helper"
2
-
3
- RSpec.describe "test" do
4
-
5
- describe "#teste" do
6
-
7
- it "hello" do
8
- expect("test").to eq "test"
9
- end
10
-
11
- end
12
-
13
- end