RubyGems - extractula - Versions diffs - 0.0.2 → 0.0.3 - Mend

extractula 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

data/lib/extractula/custom_extractors/dinosaur_comics.rb +1 -0
data/lib/extractula/custom_extractors/flickr.rb +2 -1
data/lib/extractula/custom_extractors/you_tube.rb +1 -0
data/lib/extractula/extracted_content.rb +1 -1
data/lib/extractula/extractor.rb +15 -1
data/lib/extractula.rb +1 -1
metadata +1 -1

data/lib/extractula/custom_extractors/dinosaur_comics.rb CHANGED Viewed

@@ -3,6 +3,7 @@
 module Extractula
   class DinosaurComics < Extractula::Extractor
     domain          'qwantz'
+    media_type      'image'
     content_path    'img.comic', 'title'
     image_urls_path 'img.comic'
   end

data/lib/extractula/custom_extractors/flickr.rb CHANGED Viewed

@@ -2,7 +2,8 @@ module Extractula
   class Flickr < Extractula::Extractor
     include Extractula::OEmbed
     domain              'flickr'
-    content_path        'div.photoDescription'
+    media_type          'image'
+    content_path        'meta[name=description]', 'content'
     oembed_endpoint     'http://www.flickr.com/services/oembed/'
   end
 end

data/lib/extractula/custom_extractors/you_tube.rb CHANGED Viewed

@@ -2,6 +2,7 @@ module Extractula
   class YouTube < Extractula::Extractor
     include Extractula::OEmbed
     domain              'youtube'
+    media_type          'video'
     content_path        '.description'
     oembed_endpoint     'http://www.youtube.com/oembed'
   end

data/lib/extractula/extracted_content.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 class Extractula::ExtractedContent
-  attr_reader :url, :title, :content, :summary, :image_urls, :video_embed
+  attr_reader :url, :media_type, :title, :content, :summary, :image_urls, :video_embed
   def initialize(attributes = {})
     attributes.each_pair {|k, v| instance_variable_set("@#{k}", v)}

data/lib/extractula/extractor.rb CHANGED Viewed

@@ -14,6 +14,11 @@ class Extractula::Extractor
   def self.can_extract? url, html
     @extractable_domain ? @extractable_domain == url.domain : false
   end
+  def self.media_type type = nil
+    @media_type = type if type
+    @media_type
+  end
   %w{title content summary image_urls video_embed }.each do |field|
     class_eval <<-EOS
@@ -50,6 +55,7 @@ class Extractula::Extractor
   def extract
     Extractula::ExtractedContent.new({
       :url          => url.url,
+      :media_type   => media_type,
       :title        => title,
       :content      => content,
       :summary      => summary,
@@ -58,6 +64,10 @@ class Extractula::Extractor
     })
   end
+  def media_type
+    self.class.media_type || 'text'
+  end
   def title
     content_at(title_path, title_attr) || content_at("//title")
   end
@@ -72,7 +82,11 @@ class Extractula::Extractor
   def image_urls
     if image_urls_path
-      html.search(image_urls_path).collect { |img| img['src'].strip }
+      html.search(image_urls_path).collect do |img|
+        src = img['src'].strip
+        src = "#{@url.scheme}://#{@url.host}#{src}" if src.start_with?('/')
+        src
+      end
     end
   end

data/lib/extractula.rb CHANGED Viewed

@@ -8,7 +8,7 @@ require 'extractula/extracted_content'
 require 'extractula/extractor'
 module Extractula
-  VERSION = "0.0.2"
+  VERSION = "0.0.3"
   @extractors = []

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: extractula
 version: !ruby/object:Gem::Version
-  version: 0.0.2
+  version: 0.0.3
 platform: ruby
 authors:
 - Paul Dix