RubyGems - valda-video_scraper - Versions diffs - 1.0.4 → 1.0.5 - Mend

valda-video_scraper 1.0.4 → 1.0.5

Files changed (14) hide show

data/lib/www/video_scraper.rb +1 -1
data/lib/www/video_scraper/base.rb +5 -2
data/lib/www/video_scraper/eic_book.rb +2 -1
data/lib/www/video_scraper/pornhub.rb +4 -3
data/lib/www/video_scraper/pornotube.rb +3 -1
data/lib/www/video_scraper/red_tube.rb +7 -0
data/lib/www/video_scraper/veoh.rb +6 -2
data/lib/www/video_scraper/you_porn.rb +3 -3
data/lib/www/video_scraper/you_tube.rb +6 -3
data/test/www/test_video_scraper.rb +2 -0
data/test/www/video_scraper/test_eic_book.rb +1 -2
data/test/www/video_scraper/test_pornhub.rb +1 -0
data/test/www/video_scraper/test_veoh.rb +12 -1
metadata +6 -5

@@ -15,7 +15,7 @@ end
 module WWW
   module VideoScraper
-    VERSION = '1.0.4'
+    VERSION = '1.0.5'
     MODULES_NAME = %w(adult_satellites age_sage ameba_vision dailymotion eic_book
                       moro_tube nico_video pornhub pornotube red_tube tube8 veoh

data/lib/www/video_scraper/base.rb CHANGED

@@ -19,7 +19,7 @@ module WWW
         end
         def valid_url?(url)
-          not (url =~ @url_regex).nil?
+          Array(@url_regex).any? { |r| r.match(url) }
         end
         def scrape(url, opt = nil)
@@ -32,7 +32,10 @@ module WWW
       def initialize(url, opt = nil)
         @page_url = url
         @opt = (opt || {})
-        @url_regex_match = self.class.instance_variable_get(:@url_regex).match(@page_url).freeze
+        url_regex = self.class.instance_variable_get(:@url_regex)
+        Array(url_regex).any? do |r|
+          @url_regex_match = r.match(@page_url).freeze
+        end
         raise StandardError, "url is not #{self.class.name} link: #{url}" if @url_regex_match.nil?
       end

data/lib/www/video_scraper/eic_book.rb CHANGED

@@ -10,7 +10,8 @@ module WWW
       def scrape
         uri = URI.parse(@page_url)
-        html = http_get("#{uri.scheme}://#{uri.host}#{uri.path}?flg=sm")
+        @page_url = "#{uri.scheme}://#{uri.host}#{uri.path}?flg=sm"
+        html = http_get(@page_url)
         doc = Hpricot(html.toutf8)
         raise FileNotFound unless flashvars = doc.at('//object //param[@name="FlashVars"]')
         flashvars = CGI.parse(flashvars.attributes['value'])

data/lib/www/video_scraper/pornhub.rb CHANGED

@@ -10,13 +10,14 @@ module WWW
       def scrape
         html = http_get(@page_url)
         raise FileNotFound unless m = html.match(/\.addVariable\("options",\s*"([^"]+)"\);/i)
-        @request_url = URI.decode m[1]
-        @response_body = http_get(@request_url)
-        @video_url = @response_body.match(%r|<flv_url>([^<]+)</flv_url>|).to_a[1]
+        request_url = URI.decode m[1]
+        response_body = http_get(request_url)
+        @video_url = response_body.match(%r|<flv_url>([^<]+)</flv_url>|).to_a[1]
         if m = @video_url.match(%r|videos/(\d{3}/\d{3}/\d{3})/\d+.flv|)
           @thumb_url = "http://p1.pornhub.com/thumbs/#{m[1]}/small.jpg"
         end
         @embed_tag = html.match(%r|<textarea[^>]+class="share-flag-embed">(<object type="application/x-shockwave-flash".*?</object>)</textarea>|).to_a[1]
+        @title = html.match(%r|<title>(.*) - Pornhub\.com</title>|).to_a[1]
       end
     end
   end

data/lib/www/video_scraper/pornotube.rb CHANGED

@@ -14,7 +14,9 @@ module WWW
         page = agent.get(@page_url)
         raise FileNotFound unless embed = page.root.at('//object/embed')
         src = embed.attributes['src']
-        hash = src.match(/\?v=(.*)$/)[1]
+        hash = src.to_s.match(/\?v=(.*)$/)[1]
+        t = page.at('//div[@class="contentheader"]//span[@class="blue"]')
+        @title = t.inner_html.gsub(/<[^>]*>/, '').strip
         page = agent.get("http://pornotube.com/player/player.php?#{hash}")
         q = CGI::parse(page.body)
         @video_url = "http://#{q['mediaDomain'][0]}.pornotube.com/#{q['userId'][0]}/#{q['mediaId'][0]}.flv"

data/lib/www/video_scraper/red_tube.rb CHANGED

@@ -64,6 +64,13 @@ module WWW
         nil
       end
+      def title
+        return @title if @title
+        html = http_get(@page_url)
+        doc = Hpricot(html.toutf8)
+        @title = doc.at("//table/tr[2]/td/table/tr[3]/td/table/tr/td").inner_html.gsub(/<[^>]*>/, '').strip
+      end
       def embed_tag
         return @embed_tag if @embed_tag
         url = "http://www.redtube.com/embed/#{content_id}"

data/lib/www/video_scraper/veoh.rb CHANGED

@@ -5,17 +5,21 @@ require File.expand_path(File.dirname(__FILE__) + '/base')
 module WWW
   module VideoScraper
     class Veoh < Base
-      url_regex %r!\Ahttp://www\.veoh\.com/videos/([[:alnum:]]+)!
+      url_regex [%r!\Ahttp://www\.veoh\.com/videos/(v\d+[[:alnum:]]+)!,
+                 %r!\Ahttp://www\.veoh\.com/collection/\w+/watch/.*#watch%3[Dd](v\d+[[:alnum:]]+)!,
+                 %r!\Ahttp://www\.veoh\.com/(?:browse|collection)/(?:[\w]+/)+watch/(v\d+[[:alnum:]]+)!]
       def scrape
         @id = url_regex_match[1]
+        @page_url = "http://www.veoh.com/videos/#{@id}"
         request_url = "http://www.veoh.com/rest/video/#{@id}/details"
         xml = http_get(request_url)
         @video_url = xml.match(/fullPreviewHashPath="([^"]+)"/).to_a[1]
         @title = xml.match(/title="([^"]+)"/).to_a[1]
         @thumb_url = xml.match(/fullMedResImagePath="([^"]+)"/).to_a[1]
         html = http_get(@page_url)
-        if embed_tag = html.match(/\sid="embed"\s[^>]*value="([^"]+)"/).to_a[1]
+        #logger.debug html
+        if embed_tag = html.match(/class="embedinput"\s[^>]*value="([^"]+)"/).to_a[1]
           @embed_tag = CGI.unescapeHTML(embed_tag)
         end
       end

data/lib/www/video_scraper/you_porn.rb CHANGED

@@ -5,7 +5,7 @@ require File.expand_path(File.dirname(__FILE__) + '/base')
 module WWW
   module VideoScraper
     class YouPorn < Base
-      url_regex %r!\Ahttp://youporn\.com/watch/(\d+)!
+      url_regex %r!\Ahttp://(?:www\.)?youporn\.com/watch/(\d+)!
       def scrape
         id = url_regex_match[1]
@@ -15,11 +15,11 @@ module WWW
         doc = Hpricot(html)
         doc.search('//div[@id="download"]//a').each do |elem|
           href = elem.attributes['href']
-          (@video_url = href; break) if href =~ %r!^http://download\.youporn\.com/download/.*\.flv!
+          (@video_url = href; break) if href =~ %r!^http://download\.youporn\.com/download/.*!
         end
         h1 = doc.at('//div[@id="videoArea"]/h1')
         @title = h1.inner_html.gsub(/<[^>]*>/, '').strip
-        @thumb_url = h1.at('/img').attributes['src'].sub(/(\d+)_small\.jpg$/, '\1_large.jpg')
+        @thumb_url = h1.at('/img').attributes['src'].sub(/(\d+)_small\.jpg$/, '\1_large.jpg') if h1.at('/img') != nil
       end
     end
   end

data/lib/www/video_scraper/you_tube.rb CHANGED

@@ -28,9 +28,12 @@ module WWW
       def login
         uri = URI.parse(@page_url)
         page = agent.get("#{uri.scheme}://#{uri.host}/login")
-        login_form = page.form('loginForm')
-        login_form.username = @opt[:you_tube_username]
-        login_form.password = @opt[:you_tube_password]
+        #login_form = page.form('loginForm')
+        #login_form.username = @opt[:you_tube_username]
+        #login_form.password = @opt[:you_tube_password]
+        login_form = page.form('gaia_loginform')
+        login_form.email  = @opt[:you_tube_username]
+        login_form.passwd = @opt[:you_tube_password]
         agent.submit(login_form)
       end

data/test/www/test_video_scraper.rb CHANGED

@@ -34,6 +34,8 @@ class TestVideoScraper < Test::Unit::TestCase
   end
   def test_scrape
+    mod = WWW::VideoScraper.find_module('http://www.yourfilehost.com/media.php?cat=video&file=XV436__03.wmv')
+    assert_equal WWW::VideoScraper::YourFileHost, mod
     vs = WWW::VideoScraper.scrape('http://www.yourfilehost.com/media.php?cat=video&file=XV436__03.wmv')
     assert_kind_of WWW::VideoScraper::YourFileHost, vs
   end

data/test/www/video_scraper/test_eic_book.rb CHANGED

@@ -5,11 +5,10 @@ require File.dirname(__FILE__) + '/../../test_helper'
 class EicBook < Test::Unit::TestCase
   def test_scrape
     vs = WWW::VideoScraper::EicBook.scrape('http://www.eic-book.com/detail_12759.html', default_opt)
-    assert_equal 'http://www.eic-book.com/detail_12759.html', vs.page_url
+    assert_equal 'http://www.eic-book.com/detail_12759.html?flg=sm', vs.page_url
     assert_equal 'http://flv.idol-mile.com/book/12759.flv', vs.video_url
     assert_equal 'http://www.eic-book.com/img/product/h4/pp_12759.jpg', vs.thumb_url
     assert_equal '藤木あやか  DVD 「お蔵入り寸前！藤木あやか A面」', vs.title
     assert_equal 24, vs.capture_urls.count
   end
 end

data/test/www/video_scraper/test_pornhub.rb CHANGED

@@ -9,5 +9,6 @@ class TestPornhub < Test::Unit::TestCase
     assert_match %r|http://media1.pornhub.com/dl/[[:alnum:]]{32}/[[:alnum:]]{8}/videos/000/191/743/191743\.flv|, vs.video_url
     assert_equal 'http://p1.pornhub.com/thumbs/000/191/743/small.jpg', vs.thumb_url
     assert_match %r|^<object type=\"application/x-shockwave-flash\" data=\".*</object>$|, vs.embed_tag
+    assert_equal 'Liliane Tiger and Jane Darling to hot to handle', vs.title
   end
 end

data/test/www/video_scraper/test_veoh.rb CHANGED

@@ -8,6 +8,17 @@ class TestVeoh < Test::Unit::TestCase
     assert_equal 'http://www.veoh.com/videos/v6245232rh8aGEM9', vs.page_url
     assert_match %r|http://content\.veoh\.com/flash/p/\d/[[:alnum:]]{16}/[[:alnum:]]{40}\.fll\?ct=[[:alnum:]]{48}|, vs.video_url
     assert_match %r|http://p-images\.veoh\.com/image\.out\?imageId=media-[[:alnum:]]+.jpg|, vs.thumb_url
-    assert_match %r|^<embed\s.*>$|, vs.embed_tag
+    assert_match %r|^<object\s.*>$|, vs.embed_tag
+  end
+  def test_canonical_url
+    vs = WWW::VideoScraper::Veoh.scrape('http://www.veoh.com/collection/maysaku/watch/v19937773gwSJPMk', default_opt)
+    assert_equal 'http://www.veoh.com/videos/v19937773gwSJPMk', vs.page_url
+    vs = WWW::VideoScraper::Veoh.scrape('http://www.veoh.com/collection/maysaku/watch/v19937773gwSJPMk#watch%3Dv16112008KGD7Pg2n', default_opt)
+    assert_equal 'http://www.veoh.com/videos/v16112008KGD7Pg2n', vs.page_url
+    vs = WWW::VideoScraper::Veoh.scrape('http://www.veoh.com/videos/v19937773gwSJPMk?rank=0&jsonParams=%7B%22numResults%22%3A20%2C%22rlmin%22%3A0%2C%22query%22%3A%22Shaman+King+01%22%2C%22rlmax%22%3Anull%2C%22veohOnly%22%3Atrue%2C%22order%22%3A%22default%22%2C%22range%22%3A%22a%22%2C%22sId%22%3A%22192998624295114150%22%7D&searchId=192998624295114150&rank=1', default_opt)
+    assert_equal 'http://www.veoh.com/videos/v19937773gwSJPMk', vs.page_url
+    vs = WWW::VideoScraper::Veoh.scrape('http://www.veoh.com/browse/videos/category/comedy/watch/v17078605sszQzbBF')
+    assert_equal 'http://www.veoh.com/videos/v17078605sszQzbBF', vs.page_url
   end
 end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: valda-video_scraper
 version: !ruby/object:Gem::Version
-  version: 1.0.4
+  version: 1.0.5
 platform: ruby
 authors:
 - YAMAGUCHI Seiji
@@ -9,11 +9,12 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-02-10 00:00:00 -08:00
+date: 2009-06-10 00:00:00 -07:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
   name: mechanize
+  type: :runtime
   version_requirement:
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
@@ -23,6 +24,7 @@ dependencies:
     version:
 - !ruby/object:Gem::Dependency
   name: hpricot
+  type: :runtime
   version_requirement:
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
@@ -63,7 +65,6 @@ files:
 - test/www/video_scraper/test_red_tube.rb
 - test/www/video_scraper/test_base.rb
 - test/www/test_video_scraper.rb
-- test/www/test_video_scraper_flymake.rb
 - lib/www
 - lib/www/video_scraper
 - lib/www/video_scraper/nico_video.rb
@@ -83,7 +84,7 @@ files:
 - lib/www/video_scraper/your_file_host.rb
 - lib/www/video_scraper/tube8.rb
 - lib/www/video_scraper.rb
-has_rdoc: true
+has_rdoc: false
 homepage: http://github.com/valda/video_scraper
 post_install_message:
 rdoc_options:
@@ -118,7 +119,7 @@ requirements: []
 rubyforge_project: video_scraper
 rubygems_version: 1.2.0
 signing_key:
-specification_version: 2
+specification_version: 3
 summary: Web scraping library for video sharing sites.
 test_files: []