RubyGems - valda-video_scraper - Versions diffs - 1.0.4 → 1.0.5 - Mend

valda-video_scraper 1.0.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

data/lib/www/video_scraper.rb +1 -1
data/lib/www/video_scraper/base.rb +5 -2
data/lib/www/video_scraper/eic_book.rb +2 -1
data/lib/www/video_scraper/pornhub.rb +4 -3
data/lib/www/video_scraper/pornotube.rb +3 -1
data/lib/www/video_scraper/red_tube.rb +7 -0
data/lib/www/video_scraper/veoh.rb +6 -2
data/lib/www/video_scraper/you_porn.rb +3 -3
data/lib/www/video_scraper/you_tube.rb +6 -3
data/test/www/test_video_scraper.rb +2 -0
data/test/www/video_scraper/test_eic_book.rb +1 -2
data/test/www/video_scraper/test_pornhub.rb +1 -0
data/test/www/video_scraper/test_veoh.rb +12 -1
metadata +6 -5

data/lib/www/video_scraper.rb CHANGED

@@ -15,7 +15,7 @@ end
 module WWW
   module VideoScraper
-    VERSION = '1.0.4'
+    VERSION = '1.0.5'
     MODULES_NAME = %w(adult_satellites age_sage ameba_vision dailymotion eic_book
                       moro_tube nico_video pornhub pornotube red_tube tube8 veoh

data/lib/www/video_scraper/base.rb CHANGED

@@ -19,7 +19,7 @@ module WWW
         end
         def valid_url?(url)
-          not (url =~ @url_regex).nil?
+          Array(@url_regex).any? { |r| r.match(url) }
         end
         def scrape(url, opt = nil)
@@ -32,7 +32,10 @@ module WWW
       def initialize(url, opt = nil)
         @page_url = url
         @opt = (opt || {})
-        @url_regex_match = self.class.instance_variable_get(:@url_regex).match(@page_url).freeze
+        url_regex = self.class.instance_variable_get(:@url_regex)
+        Array(url_regex).any? do |r|
+          @url_regex_match = r.match(@page_url).freeze
+        end
         raise StandardError, "url is not #{self.class.name} link: #{url}" if @url_regex_match.nil?
       end

data/lib/www/video_scraper/eic_book.rb CHANGED

@@ -10,7 +10,8 @@ module WWW
       def scrape
         uri = URI.parse(@page_url)
-        html = http_get("#{uri.scheme}://#{uri.host}#{uri.path}?flg=sm")
+        @page_url = "#{uri.scheme}://#{uri.host}#{uri.path}?flg=sm"
+        html = http_get(@page_url)
         doc = Hpricot(html.toutf8)
         raise FileNotFound unless flashvars = doc.at('//object //param[@name="FlashVars"]')
         flashvars = CGI.parse(flashvars.attributes['value'])

data/lib/www/video_scraper/pornhub.rb CHANGED

@@ -10,13 +10,14 @@ module WWW
       def scrape
         html = http_get(@page_url)
         raise FileNotFound unless m = html.match(/\.addVariable\("options",\s*"([^"]+)"\);/i)
-        @request_url = URI.decode m[1]
-        @response_body = http_get(@request_url)
-        @video_url = @response_body.match(%r|<flv_url>([^<]+)</flv_url>|).to_a[1]
+        request_url = URI.decode m[1]
+        response_body = http_get(request_url)
+        @video_url = response_body.match(%r|<flv_url>([^<]+)</flv_url>|).to_a[1]
         if m = @video_url.match(%r|videos/(\d{3}/\d{3}/\d{3})/\d+.flv|)
           @thumb_url = "http://p1.pornhub.com/thumbs/#{m[1]}/small.jpg"
         end
         @embed_tag = html.match(%r|<textarea[^>]+class="share-flag-embed">(<object type="application/x-shockwave-flash".*?</object>)</textarea>|).to_a[1]
+        @title = html.match(%r|<title>(.*) - Pornhub\.com</title>|).to_a[1]
       end
     end
   end

data/lib/www/video_scraper/pornotube.rb CHANGED

@@ -14,7 +14,9 @@ module WWW
         page = agent.get(@page_url)
         raise FileNotFound unless embed = page.root.at('//object/embed')
         src = embed.attributes['src']
-        hash = src.match(/\?v=(.*)$/)[1]
+        hash = src.to_s.match(/\?v=(.*)$/)[1]
+        t = page.at('//div[@class="contentheader"]//span[@class="blue"]')
+        @title = t.inner_html.gsub(/<[^>]*>/, '').strip
         page = agent.get("http://pornotube.com/player/player.php?#{hash}")
         q = CGI::parse(page.body)
         @video_url = "http://#{q['mediaDomain'][0]}.pornotube.com/#{q['userId'][0]}/#{q['mediaId'][0]}.flv"

data/lib/www/video_scraper/red_tube.rb CHANGED

@@ -64,6 +64,13 @@ module WWW
         nil
       end
+      def title
+        return @title if @title
+        html = http_get(@page_url)
+        doc = Hpricot(html.toutf8)
+        @title = doc.at("//table/tr[2]/td/table/tr[3]/td/table/tr/td").inner_html.gsub(/<[^>]*>/, '').strip
+      end
       def embed_tag
         return @embed_tag if @embed_tag
         url = "http://www.redtube.com/embed/#{content_id}"

data/lib/www/video_scraper/veoh.rb CHANGED

@@ -5,17 +5,21 @@ require File.expand_path(File.dirname(__FILE__) + '/base')
 module WWW
   module VideoScraper
     class Veoh < Base
-      url_regex %r!\Ahttp://www\.veoh\.com/videos/([[:alnum:]]+)!
+      url_regex [%r!\Ahttp://www\.veoh\.com/videos/(v\d+[[:alnum:]]+)!,
+                 %r!\Ahttp://www\.veoh\.com/collection/\w+/watch/.*#watch%3[Dd](v\d+[[:alnum:]]+)!,
+                 %r!\Ahttp://www\.veoh\.com/(?:browse|collection)/(?:[\w]+/)+watch/(v\d+[[:alnum:]]+)!]
       def scrape
         @id = url_regex_match[1]
+        @page_url = "http://www.veoh.com/videos/#{@id}"
         request_url = "http://www.veoh.com/rest/video/#{@id}/details"
         xml = http_get(request_url)
         @video_url = xml.match(/fullPreviewHashPath="([^"]+)"/).to_a[1]
         @title = xml.match(/title="([^"]+)"/).to_a[1]
         @thumb_url = xml.match(/fullMedResImagePath="([^"]+)"/).to_a[1]
         html = http_get(@page_url)
-        if embed_tag = html.match(/\sid="embed"\s[^>]*value="([^"]+)"/).to_a[1]
+        #logger.debug html
+        if embed_tag = html.match(/class="embedinput"\s[^>]*value="([^"]+)"/).to_a[1]
           @embed_tag = CGI.unescapeHTML(embed_tag)
         end
       end

data/lib/www/video_scraper/you_porn.rb CHANGED

@@ -5,7 +5,7 @@ require File.expand_path(File.dirname(__FILE__) + '/base')
 module WWW
   module VideoScraper
     class YouPorn < Base
-      url_regex %r!\Ahttp://youporn\.com/watch/(\d+)!
+      url_regex %r!\Ahttp://(?:www\.)?youporn\.com/watch/(\d+)!
       def scrape
         id = url_regex_match[1]
@@ -15,11 +15,11 @@ module WWW
         doc = Hpricot(html)
         doc.search('//div[@id="download"]//a').each do |elem|
           href = elem.attributes['href']
-          (@video_url = href; break) if href =~ %r!^http://download\.youporn\.com/download/.*\.flv!
+          (@video_url = href; break) if href =~ %r!^http://download\.youporn\.com/download/.*!
         end
         h1 = doc.at('//div[@id="videoArea"]/h1')
         @title = h1.inner_html.gsub(/<[^>]*>/, '').strip
-        @thumb_url = h1.at('/img').attributes['src'].sub(/(\d+)_small\.jpg$/, '\1_large.jpg')
+        @thumb_url = h1.at('/img').attributes['src'].sub(/(\d+)_small\.jpg$/, '\1_large.jpg') if h1.at('/img') != nil
       end
     end
   end

data/lib/www/video_scraper/you_tube.rb CHANGED

@@ -28,9 +28,12 @@ module WWW
       def login
         uri = URI.parse(@page_url)
         page = agent.get("#{uri.scheme}://#{uri.host}/login")
-        login_form = page.form('loginForm')
-        login_form.username = @opt[:you_tube_username]
-        login_form.password = @opt[:you_tube_password]
+        #login_form = page.form('loginForm')
+        #login_form.username = @opt[:you_tube_username]
+        #login_form.password = @opt[:you_tube_password]
+        login_form = page.form('gaia_loginform')
+        login_form.email  = @opt[:you_tube_username]
+        login_form.passwd = @opt[:you_tube_password]
         agent.submit(login_form)
       end

data/test/www/test_video_scraper.rb CHANGED

@@ -34,6 +34,8 @@ class TestVideoScraper < Test::Unit::TestCase
   end
   def test_scrape
+    mod = WWW::VideoScraper.find_module('http://www.yourfilehost.com/media.php?cat=video&file=XV436__03.wmv')
+    assert_equal WWW::VideoScraper::YourFileHost, mod
     vs = WWW::VideoScraper.scrape('http://www.yourfilehost.com/media.php?cat=video&file=XV436__03.wmv')
     assert_kind_of WWW::VideoScraper::YourFileHost, vs
   end

data/test/www/video_scraper/test_eic_book.rb CHANGED

@@ -5,11 +5,10 @@ require File.dirname(__FILE__) + '/../../test_helper'
 class EicBook < Test::Unit::TestCase
   def test_scrape
     vs = WWW::VideoScraper::EicBook.scrape('http://www.eic-book.com/detail_12759.html', default_opt)
-    assert_equal 'http://www.eic-book.com/detail_12759.html', vs.page_url
+    assert_equal 'http://www.eic-book.com/detail_12759.html?flg=sm', vs.page_url
     assert_equal 'http://flv.idol-mile.com/book/12759.flv', vs.video_url
     assert_equal 'http://www.eic-book.com/img/product/h4/pp_12759.jpg', vs.thumb_url
     assert_equal '藤木あやか  DVD 「お蔵入り寸前！藤木あやか A面」', vs.title
     assert_equal 24, vs.capture_urls.count
   end
 end

data/test/www/video_scraper/test_pornhub.rb CHANGED

@@ -9,5 +9,6 @@ class TestPornhub < Test::Unit::TestCase
     assert_match %r|http://media1.pornhub.com/dl/[[:alnum:]]{32}/[[:alnum:]]{8}/videos/000/191/743/191743\.flv|, vs.video_url
     assert_equal 'http://p1.pornhub.com/thumbs/000/191/743/small.jpg', vs.thumb_url
     assert_match %r|^<object type=\"application/x-shockwave-flash\" data=\".*</object>$|, vs.embed_tag
+    assert_equal 'Liliane Tiger and Jane Darling to hot to handle', vs.title
   end
 end

data/test/www/video_scraper/test_veoh.rb CHANGED

@@ -8,6 +8,17 @@ class TestVeoh < Test::Unit::TestCase
     assert_equal 'http://www.veoh.com/videos/v6245232rh8aGEM9', vs.page_url
     assert_match %r|http://content\.veoh\.com/flash/p/\d/[[:alnum:]]{16}/[[:alnum:]]{40}\.fll\?ct=[[:alnum:]]{48}|, vs.video_url
     assert_match %r|http://p-images\.veoh\.com/image\.out\?imageId=media-[[:alnum:]]+.jpg|, vs.thumb_url
-    assert_match %r|^<embed\s.*>$|, vs.embed_tag
+    assert_match %r|^<object\s.*>$|, vs.embed_tag
+  end
+  def test_canonical_url
+    vs = WWW::VideoScraper::Veoh.scrape('http://www.veoh.com/collection/maysaku/watch/v19937773gwSJPMk', default_opt)
+    assert_equal 'http://www.veoh.com/videos/v19937773gwSJPMk', vs.page_url
+    vs = WWW::VideoScraper::Veoh.scrape('http://www.veoh.com/collection/maysaku/watch/v19937773gwSJPMk#watch%3Dv16112008KGD7Pg2n', default_opt)
+    assert_equal 'http://www.veoh.com/videos/v16112008KGD7Pg2n', vs.page_url
+    vs = WWW::VideoScraper::Veoh.scrape('http://www.veoh.com/videos/v19937773gwSJPMk?rank=0&jsonParams=%7B%22numResults%22%3A20%2C%22rlmin%22%3A0%2C%22query%22%3A%22Shaman+King+01%22%2C%22rlmax%22%3Anull%2C%22veohOnly%22%3Atrue%2C%22order%22%3A%22default%22%2C%22range%22%3A%22a%22%2C%22sId%22%3A%22192998624295114150%22%7D&searchId=192998624295114150&rank=1', default_opt)
+    assert_equal 'http://www.veoh.com/videos/v19937773gwSJPMk', vs.page_url
+    vs = WWW::VideoScraper::Veoh.scrape('http://www.veoh.com/browse/videos/category/comedy/watch/v17078605sszQzbBF')
+    assert_equal 'http://www.veoh.com/videos/v17078605sszQzbBF', vs.page_url
   end
 end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: valda-video_scraper
 version: !ruby/object:Gem::Version
-  version: 1.0.4
+  version: 1.0.5
 platform: ruby
 authors:
 - YAMAGUCHI Seiji
@@ -9,11 +9,12 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-02-10 00:00:00 -08:00
+date: 2009-06-10 00:00:00 -07:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
   name: mechanize
+  type: :runtime
   version_requirement:
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
@@ -23,6 +24,7 @@ dependencies:
     version:
 - !ruby/object:Gem::Dependency
   name: hpricot
+  type: :runtime
   version_requirement:
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
@@ -63,7 +65,6 @@ files:
 - test/www/video_scraper/test_red_tube.rb
 - test/www/video_scraper/test_base.rb
 - test/www/test_video_scraper.rb
-- test/www/test_video_scraper_flymake.rb
 - lib/www
 - lib/www/video_scraper
 - lib/www/video_scraper/nico_video.rb
@@ -83,7 +84,7 @@ files:
 - lib/www/video_scraper/your_file_host.rb
 - lib/www/video_scraper/tube8.rb
 - lib/www/video_scraper.rb
-has_rdoc: true
+has_rdoc: false
 homepage: http://github.com/valda/video_scraper
 post_install_message:
 rdoc_options:
@@ -118,7 +119,7 @@ requirements: []
 rubyforge_project: video_scraper
 rubygems_version: 1.2.0
 signing_key:
-specification_version: 2
+specification_version: 3
 summary: Web scraping library for video sharing sites.
 test_files: []