preadly-bulbasaur 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e2ce49896cc75739b7b348c8562bd27b782ae050
4
- data.tar.gz: 8e085b9b873fe13483f5885bf4c5a26d7f4171b8
3
+ metadata.gz: 014f0ead8b757383f291bc31f15134e1c325032d
4
+ data.tar.gz: e3299a0a0b906620a99240065456189bebc08aee
5
5
  SHA512:
6
- metadata.gz: 36409b5eda6f8928b710698fbb7787fa387f59276f08a724e448a333e8b259096efd30cff1b8632b0e77401a3194c7b7f2a42c2b3fd64a44e20c649b3dc1be1a
7
- data.tar.gz: bedb92be982705d050f8ea472410a34af96f4df36a528d370a593cf6087246717a4f2bc3e08f2eac34473cd71d0433d7920482d42a96bd639630dcbef75940ea
6
+ metadata.gz: 7b18fbb448369cafdce3789bd9a4c61ec87a198118908fddfdef685c828c65331da6972701d3b713a86abcffde31ce5af0b446ee3abef285aa3df41bfe88affe
7
+ data.tar.gz: 37ae03db04175312915da5cd8e9511f3db4f9c56dcebf87d3c3385fb2573b90dd832a73f1cb1a43dec3d3a9b25397a7ffebf4f1bc06898be209fc8a30e14caad
@@ -3,7 +3,7 @@ module Bulbasaur
3
3
  class ExtractImagesFromHTML
4
4
 
5
5
  CSS_IMPORT_URL_REGEX = /(?<=url\()['"]?.+?['"]?.+?(?=\))/
6
- IMG_CANDIDATE_URL_REGEX = /https?:\/\/\S*\.(?:png|jpg|jpeg)(?!\.\S)/i
6
+ IMG_CANDIDATE_URL_REGEX = /https?:\/\/\S*\.(?:png|jpg|jpeg|gif)(?!\.\S)/i
7
7
 
8
8
  def initialize(html)
9
9
  @html = html
@@ -13,6 +13,7 @@ module Bulbasaur
13
13
  images = Array.new
14
14
  images = images + extract_images_by_tag_image
15
15
  images = images + extract_images_by_tag_style
16
+ images = images + extract_images_by_link
16
17
  images
17
18
  end
18
19
 
@@ -36,6 +37,15 @@ module Bulbasaur
36
37
  images
37
38
  end
38
39
 
40
+ def extract_images_by_link
41
+ images = Array.new
42
+ Nokogiri::HTML(@html).xpath('//a').each do |link|
43
+ url = link.xpath('@href').text
44
+ images << create_struct(url) if url =~ IMG_CANDIDATE_URL_REGEX
45
+ end
46
+ images
47
+ end
48
+
39
49
  def create_struct(url, alt=nil)
40
50
  {url: url, alt: alt }
41
51
  end
@@ -2,7 +2,7 @@ module Bulbasaur
2
2
 
3
3
  module Version
4
4
  MAJOR = 0
5
- MINOR = 4
5
+ MINOR = 5
6
6
  PATCH = 0
7
7
  STRING = "#{MAJOR}.#{MINOR}.#{PATCH}"
8
8
  end
@@ -60,6 +60,44 @@ RSpec.describe Bulbasaur::ExtractImagesFromHTML do
60
60
  expect(subject.first[:alt]).to be_nil
61
61
  end
62
62
  end
63
+
64
+ context 'When sending HTML with a link pointing to an image' do
65
+ let(:html) do
66
+ '<p>
67
+ <a href="http://somewhere.to/get/the_original_image.jpg">
68
+ Click here to see the original image.
69
+ </a>
70
+ <a href="http://somewhere.to/get/the_original_image.jpg?width=400&height=400">
71
+ Click here to see the original image.
72
+ </a>
73
+ <a href="http://somewhere.to/go/to/another_page.html">
74
+ Click here to go to another page.
75
+ </a>
76
+ </p>'
77
+ end
78
+
79
+ it 'Does return an image array with 1 item' do
80
+ expect(subject.size).to eq 2
81
+ end
82
+
83
+ it 'Does return the image URL with parameters' do
84
+ expect(subject.last[:url]).to eq 'http://somewhere.to/get/the_original_image.jpg?width=400&height=400'
85
+ end
86
+
87
+ it 'Does return the image URL without parameters' do
88
+ expect(subject.first[:url]).to eq 'http://somewhere.to/get/the_original_image.jpg'
89
+ end
90
+
91
+ it 'Does return the image alt' do
92
+ expect(subject.first[:alt]).to be_nil
93
+ end
94
+
95
+ it 'Does not include links other than for images' do
96
+ expect(subject).to include Hash(url: 'http://somewhere.to/get/the_original_image.jpg', alt: nil)
97
+ expect(subject).to include Hash(url: 'http://somewhere.to/get/the_original_image.jpg?width=400&height=400', alt: nil)
98
+ expect(subject).not_to include Hash(url: 'http://somewhere.to/go/to/another_page.html', alt: nil)
99
+ end
100
+ end
63
101
 
64
102
  context "When send html with many images" do
65
103
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: preadly-bulbasaur
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Magno Costa
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-07-16 00:00:00.000000000 Z
11
+ date: 2015-08-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -126,7 +126,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
126
126
  version: '0'
127
127
  requirements: []
128
128
  rubyforge_project:
129
- rubygems_version: 2.4.6
129
+ rubygems_version: 2.2.2
130
130
  signing_key:
131
131
  specification_version: 4
132
132
  summary: Bulbasaur is a helper for crawler operations used in Pread.ly
@@ -140,4 +140,3 @@ test_files:
140
140
  - spec/bulbasaur/utils/normalize_url_spec.rb
141
141
  - spec/bulbasaur_spec.rb
142
142
  - spec/spec_helper.rb
143
- has_rdoc: