RubyGems - preadly-bulbasaur - Versions diffs - 0.7.2 → 0.8.0 - Mend

preadly-bulbasaur 0.7.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: a3585babf7ec241a14a1bb08d933cb648216658e
-  data.tar.gz: e283f5b067e6a8d7642e28e747df9a36e49877fd
+  metadata.gz: 975df2c7ace7a0947cd98b55bfcf896a970e5660
+  data.tar.gz: 5a5b086ba797209a5493d4d1db2026631f2091a2
 SHA512:
-  metadata.gz: 3ec4836696c83dd0eef9e32ca7cf9375ecc37bc4ed973f5761357702a23a3c5eeaa7198b195ebab39a4e11e3c505a2b6d50ad37034a95cf45a06a680e3d0b048
-  data.tar.gz: e1324f9d951aaa744e91b4a1113a3f2cc9b2e8222217e3a2ecc9303fb6455ec838819b218a52d64ffbf28a16a5dae7bff6ea4a1c56e9b06f86d464b66e52a4ee
+  metadata.gz: a62e0c0d111f954d147003c25d37ede0ee09c1717ac3a15f8194d07d847e2cedf474667b03bf9c2a84f52a93d53920f6d5162c44b399ebbc9551b4db88bf65c9
+  data.tar.gz: fc8dd0769358ef5e52da253d0a412065667920a401c78a07eb52b8f14bb5368542db7098874987386b9ddd9952df0847d2c6c6acd43ef4a21765e596b75ec3f2

data/lib/bulbasaur/extracts/extract_images_from_all_resources.rb CHANGED Viewed

@@ -11,6 +11,7 @@ module Bulbasaur
       images = images + extract_images_html(@html)
       images = images + extract_images_youtube(@html)
       images = images + extract_images_vimeo(@html)
+      images = images + extract_images_meta(@html)
       images
     end
@@ -40,5 +41,13 @@ module Bulbasaur
       end
     end
+    def extract_images_meta(html)
+      begin
+        Bulbasaur::ExtractImagesFromMeta.new(html).call
+      rescue Exception => e
+        []
+      end
+    end
   end
 end

data/lib/bulbasaur/extracts/extract_images_from_html.rb CHANGED Viewed

@@ -24,7 +24,7 @@ module Bulbasaur
       Nokogiri::HTML(@html).xpath("//img").each do |item|
         url = item.xpath("@src").text
         alt = item.xpath("@alt").text
-        images << create_struct(url, alt)
+        images << create_struct(url, 'img', alt)
       end
       images
     end
@@ -32,7 +32,7 @@ module Bulbasaur
     def extract_images_by_tag_style
       images = Array.new
       @html.scan(CSS_IMPORT_URL_REGEX).each do |url|
-        images << create_struct(url)
+        images << create_struct(url, 'style')
       end
       images
     end
@@ -41,13 +41,13 @@ module Bulbasaur
       images = Array.new
       Nokogiri::HTML(@html).xpath('//a').each do |link|
         url = link.xpath('@href').text
-        images << create_struct(url) if url =~ IMG_CANDIDATE_URL_REGEX
+        images << create_struct(url, 'link') if url =~ IMG_CANDIDATE_URL_REGEX
       end
       images
     end
-    def create_struct(url, alt=nil)
-      {url: url, alt: alt }
+    def create_struct(url, source, alt = nil)
+      { url: url, alt: alt, source: source }
     end
   end
 end

data/lib/bulbasaur/extracts/extract_images_from_meta.rb ADDED Viewed

@@ -0,0 +1,21 @@
+module Bulbasaur
+  class ExtractImagesFromMeta
+    PROPERTY = 'og:image'
+    def initialize(html)
+      @html = html
+    end
+    def call
+      meta_informations = Bulbasaur::ExtractMetaInformationsFromHTML.new(@html).call
+      image_urls = image_meta_tags(meta_informations).map { |meta| { url: meta[:value], source: 'meta' } }
+      image_urls
+    end
+    private
+    def image_meta_tags(meta_informations)
+      meta_informations.select { |meta| meta[:name] == PROPERTY }
+    end
+  end
+end

data/lib/bulbasaur/extracts/extract_images_from_vimeo.rb CHANGED Viewed

@@ -16,7 +16,7 @@ module Bulbasaur
       @html.scan(EXTRACT_URL_PATTERN).each do |video|
         vid = get_vid(video)
         url_image = image_url_for(vid)
-        images << { url: url_image }
+        images << { url: url_image, video_url: video, source: 'vimeo' }
       end
       images
     end

data/lib/bulbasaur/extracts/extract_images_from_youtube.rb CHANGED Viewed

@@ -13,7 +13,7 @@ module Bulbasaur
       images = Array.new
       @html.scan(EXTRACT_URL_PATTERN).each do |video|
         vid = get_vid(video)
-        images << { url: image_url(vid) }
+        images << { url: image_url(vid), video_url: video, source: 'youtube' }
       end
       images
     end

data/lib/bulbasaur/version.rb CHANGED Viewed

@@ -2,8 +2,8 @@ module Bulbasaur
   module Version
     MAJOR = 0
-    MINOR = 7
-    PATCH = 2
+    MINOR = 8
+    PATCH = 0
     STRING = "#{MAJOR}.#{MINOR}.#{PATCH}"
   end

data/lib/bulbasaur.rb CHANGED Viewed

@@ -2,6 +2,7 @@ require "nokogiri"
 require "bulbasaur/extracts/extract_images_from_youtube"
 require "bulbasaur/extracts/extract_images_from_vimeo"
 require "bulbasaur/extracts/extract_images_from_html"
+require "bulbasaur/extracts/extract_images_from_meta"
 require "bulbasaur/extracts/extract_images_from_all_resources"
 require "bulbasaur/extracts/extract_text_from_html.rb"
 require "bulbasaur/extracts/extract_meta_informations_from_html.rb"

data/spec/bulbasaur/extracts/extract_images_from_all_resources_spec.rb CHANGED Viewed

@@ -14,6 +14,7 @@ RSpec.describe Bulbasaur::ExtractImagesFromAllResources do
     let(:html) do
         %Q(
+          <meta property="og:image" content="http://somewhere.to/get/an_image.jpg" />
           <p>
             <iframe width="560" height="315" src="https://www.youtube.com/embed/video0" frameborder="0" allowfullscreen></iframe>
           </p>
@@ -39,8 +40,8 @@ RSpec.describe Bulbasaur::ExtractImagesFromAllResources do
         )
     end
-    it "Does return 15 itens" do
-      expect(subject.size).to eq 15
+    it "Does return 16 items" do
+      expect(subject.size).to eq 16
     end
   end
 end

data/spec/bulbasaur/extracts/extract_images_from_html_spec.rb CHANGED Viewed

@@ -37,6 +37,10 @@ RSpec.describe Bulbasaur::ExtractImagesFromHTML do
       it "Does return the image alt" do
         expect(subject.first[:alt]).to eq "image alt test"
       end
+      it 'Does return the image source' do
+        expect(subject.first[:source]).to eq 'img'
+      end
     end
     context "When send html with a image style inline" do
@@ -59,6 +63,10 @@ RSpec.describe Bulbasaur::ExtractImagesFromHTML do
       it "Does return the image alt" do
         expect(subject.first[:alt]).to be_nil
       end
+      it 'Does return the image source' do
+        expect(subject.first[:source]).to eq 'style'
+      end
     end
     context 'When sending HTML with a link pointing to an image' do
@@ -90,25 +98,29 @@ RSpec.describe Bulbasaur::ExtractImagesFromHTML do
       end
       it 'Does return the image URL with parameters' do
-        expect(subject).to include Hash url: 'http://somewhere.to/get/the_original_image.jpg?width=400&height=400', alt: nil
+        expect(subject).to include Hash url: 'http://somewhere.to/get/the_original_image.jpg?width=400&height=400', alt: nil, source: 'link'
       end
       it 'Does return the image URL without parameters' do
-        expect(subject).to include Hash url: 'http://somewhere.to/get/the_original_image.jpg', alt: nil
+        expect(subject).to include Hash url: 'http://somewhere.to/get/the_original_image.jpg', alt: nil, source: 'link'
       end
       it 'Does return the image URL with tilde parameters' do
-        expect(subject).to include Hash url: 'http://somewhere.to/get/the_original_image.jpg~original', alt: nil
+        expect(subject).to include Hash url: 'http://somewhere.to/get/the_original_image.jpg~original', alt: nil, source: 'link'
       end
       it 'Does return the image URL with upcased and special characters' do
-        expect(subject).to include Hash url: 'http://somewhere.to/get/The_Original_Image%C3%A7a_3.JPG', alt: nil
+        expect(subject).to include Hash url: 'http://somewhere.to/get/The_Original_Image%C3%A7a_3.JPG', alt: nil, source: 'link'
       end
       it 'Does return the image alt' do
         expect(subject.first[:alt]).to be_nil
       end
+      it 'Does return the image source' do
+        expect(subject.first[:source]).to eq 'link'
+      end
       it 'Does not include links other than for images' do
         expect(subject).not_to include Hash url: 'http://somewhere.to/get/the_original_image.jpg.exe', alt: nil
         expect(subject).not_to include Hash url: 'http://somewhere.to/go/to/another_page.html', alt: nil

data/spec/bulbasaur/extracts/extract_images_from_meta_spec.rb ADDED Viewed

@@ -0,0 +1,45 @@
+require 'spec_helper'
+RSpec.describe Bulbasaur::ExtractImagesFromMeta do
+  subject { described_class.new(html).call }
+  describe '#call' do
+    context 'when there are no image meta tags' do
+      let(:html) { %Q(<meta property="og:description" content="Just a RSpec test." />) }
+      it 'returns an empty array' do
+        expect(subject).to be_empty
+      end
+    end
+    context 'when there is one image meta tag' do
+      let :html do
+        %Q(
+        <meta property="og:image" content="http://somewhere.to/get/an_image.jpg" />
+        <meta property="og:description" content="Just a RSpec test." />
+        )
+      end
+      it 'returns the image URL found' do
+        expect(subject.count).to be 1
+        expect(subject).to include Hash url: 'http://somewhere.to/get/an_image.jpg', source: 'meta'
+      end
+    end
+    context 'when there are multiple image meta tags' do
+      let :html do
+        %Q(
+        <meta property="og:image" content="http://somewhere.to/get/an_image.jpg" />
+        <meta property="og:image" content="http://somewhere.to/get/another_image.jpg" />
+        <meta property="og:image" content="http://somewhere.to/get/a_third_image.jpg" />
+        <meta property="og:description" content="Just a RSpec test." />
+        )
+      end
+      it 'returns the image URLs found' do
+        expect(subject.count).to be 3
+        expect(subject.map { |meta| meta[:url] }).to include 'http://somewhere.to/get/an_image.jpg', 'http://somewhere.to/get/another_image.jpg', 'http://somewhere.to/get/a_third_image.jpg'
+      end
+    end
+  end
+end

data/spec/bulbasaur/extracts/extract_images_from_vimeo_spec.rb CHANGED Viewed

@@ -31,6 +31,8 @@ RSpec.describe Bulbasaur::ExtractImagesFromVimeo do
       it "Does return vime url" do
         expect(subject.first[:url]).to eq "https://i.vimeocdn.com/video/123456789_640.webp"
+        expect(subject.first[:source]).to eq 'vimeo'
+        expect(subject.first[:video_url]).to eq 'player.vimeo.com/video/123456789'
       end
     end

data/spec/bulbasaur/extracts/extract_images_from_youtube_spec.rb CHANGED Viewed

@@ -35,6 +35,8 @@ RSpec.describe Bulbasaur::ExtractImagesFromYoutube do
       it "Does return youtube url" do
         expect(subject.first[:url]).to eq "http://img.youtube.com/vi/123idfake321/maxresdefault.jpg"
+        expect(subject.first[:source]).to eq 'youtube'
+        expect(subject.first[:video_url]).to eq 'www.youtube.com/embed/123idfake321'
       end
     end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: preadly-bulbasaur
 version: !ruby/object:Gem::Version
-  version: 0.7.2
+  version: 0.8.0
 platform: ruby
 authors:
 - Magno Costa
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-08-13 00:00:00.000000000 Z
+date: 2015-08-21 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -92,6 +92,7 @@ files:
 - lib/bulbasaur.rb
 - lib/bulbasaur/extracts/extract_images_from_all_resources.rb
 - lib/bulbasaur/extracts/extract_images_from_html.rb
+- lib/bulbasaur/extracts/extract_images_from_meta.rb
 - lib/bulbasaur/extracts/extract_images_from_vimeo.rb
 - lib/bulbasaur/extracts/extract_images_from_youtube.rb
 - lib/bulbasaur/extracts/extract_meta_informations_from_html.rb
@@ -104,6 +105,7 @@ files:
 - lib/bulbasaur/version.rb
 - spec/bulbasaur/extracts/extract_images_from_all_resources_spec.rb
 - spec/bulbasaur/extracts/extract_images_from_html_spec.rb
+- spec/bulbasaur/extracts/extract_images_from_meta_spec.rb
 - spec/bulbasaur/extracts/extract_images_from_vimeo_spec.rb
 - spec/bulbasaur/extracts/extract_images_from_youtube_spec.rb
 - spec/bulbasaur/extracts/extract_inner_text_from_html_spec.rb
@@ -140,6 +142,7 @@ summary: Bulbasaur is a helper for crawler operations used in Pread.ly
 test_files:
 - spec/bulbasaur/extracts/extract_images_from_all_resources_spec.rb
 - spec/bulbasaur/extracts/extract_images_from_html_spec.rb
+- spec/bulbasaur/extracts/extract_images_from_meta_spec.rb
 - spec/bulbasaur/extracts/extract_images_from_vimeo_spec.rb
 - spec/bulbasaur/extracts/extract_images_from_youtube_spec.rb
 - spec/bulbasaur/extracts/extract_inner_text_from_html_spec.rb