valda-video_scraper 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +8 -7
- data/Rakefile +1 -0
- data/lib/www/video_scraper.rb +8 -5
- data/lib/www/video_scraper/base.rb +3 -0
- data/lib/www/video_scraper/tube8.rb +28 -0
- data/test/www/video_scraper/test_tube8.rb +24 -0
- metadata +13 -2
data/README
CHANGED
|
@@ -11,18 +11,19 @@ Web scraping library for video sharing sites.
|
|
|
11
11
|
|
|
12
12
|
Supported sites
|
|
13
13
|
|
|
14
|
-
* YouTube
|
|
15
|
-
* NICO NICO DOUGA
|
|
16
14
|
* AmebaVision
|
|
17
15
|
* Dailymotion
|
|
18
|
-
* Veoh
|
|
19
|
-
* YourFileHost
|
|
20
|
-
* RedTube
|
|
21
|
-
* Pornhub
|
|
22
|
-
* Ura Agesage
|
|
23
16
|
* MoroTube
|
|
17
|
+
* NICO NICO DOUGA
|
|
18
|
+
* Pornhub
|
|
24
19
|
* Pornotube
|
|
20
|
+
* RedTube
|
|
21
|
+
* Tube8
|
|
22
|
+
* Ura Agesage
|
|
23
|
+
* Veoh
|
|
25
24
|
* YouPorn
|
|
25
|
+
* YouTube
|
|
26
|
+
* YourFileHost
|
|
26
27
|
|
|
27
28
|
== SYNOPSIS:
|
|
28
29
|
|
data/Rakefile
CHANGED
data/lib/www/video_scraper.rb
CHANGED
|
@@ -2,24 +2,23 @@
|
|
|
2
2
|
|
|
3
3
|
require 'rubygems'
|
|
4
4
|
require 'open-uri'
|
|
5
|
+
require 'hpricot'
|
|
5
6
|
require 'mechanize'
|
|
6
7
|
require 'kconv'
|
|
7
8
|
require 'json'
|
|
8
9
|
require 'uri'
|
|
9
10
|
begin
|
|
10
|
-
require 'cgialt'
|
|
11
|
+
require 'cgialt' unless defined? CGI
|
|
11
12
|
rescue LoadError
|
|
12
13
|
require 'cgi'
|
|
13
14
|
end
|
|
14
15
|
|
|
15
16
|
module WWW
|
|
16
17
|
module VideoScraper
|
|
17
|
-
|
|
18
|
-
class FileNotFound < RuntimeError; end
|
|
19
|
-
VERSION = '1.0.1'
|
|
18
|
+
VERSION = '1.0.2'
|
|
20
19
|
|
|
21
20
|
MODULES_NAME = %w(age_sage ameba_vision dailymotion moro_tube
|
|
22
|
-
nico_video pornhub pornotube red_tube veoh
|
|
21
|
+
nico_video pornhub pornotube red_tube tube8 veoh
|
|
23
22
|
you_porn you_tube your_file_host)
|
|
24
23
|
|
|
25
24
|
@@modules = MODULES_NAME.map do |name|
|
|
@@ -34,6 +33,10 @@ module WWW
|
|
|
34
33
|
}
|
|
35
34
|
|
|
36
35
|
class << self
|
|
36
|
+
def modules
|
|
37
|
+
@@nodules
|
|
38
|
+
end
|
|
39
|
+
|
|
37
40
|
def options
|
|
38
41
|
@@options
|
|
39
42
|
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
|
2
|
+
|
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/base')
|
|
4
|
+
|
|
5
|
+
module WWW
|
|
6
|
+
module VideoScraper
|
|
7
|
+
class Tube8 < Base
|
|
8
|
+
url_regex %r!\Ahttp://www\.tube8\.com/.*/(\d+)(?:/|$)!
|
|
9
|
+
|
|
10
|
+
def initialize(url, opt = nil)
|
|
11
|
+
super
|
|
12
|
+
do_query
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
private
|
|
16
|
+
def do_query
|
|
17
|
+
html = http_get(@page_url)
|
|
18
|
+
doc = Hpricot(html.toutf8)
|
|
19
|
+
raise FileNotFound unless flashvars = doc.at('//object //param[@name="FlashVars"]')
|
|
20
|
+
flashvars = CGI.parse(flashvars.attributes['value'])
|
|
21
|
+
@video_url = flashvars['videoUrl'][0]
|
|
22
|
+
uri = URI.parse(@page_url)
|
|
23
|
+
@thumb_url = URI.join("#{uri.scheme}://#{uri.host}", flashvars['imageUrl'][0]).to_s
|
|
24
|
+
@title = doc.at('//h1[@class="text"]').inner_html rescue nil
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
|
2
|
+
|
|
3
|
+
require File.dirname(__FILE__) + '/../../test_helper'
|
|
4
|
+
|
|
5
|
+
class TestTube8 < Test::Unit::TestCase
|
|
6
|
+
def setup
|
|
7
|
+
@cache_root = '/tmp/test_video_scraper_cache'
|
|
8
|
+
WWW::VideoScraper.configure do |conf|
|
|
9
|
+
conf[:cache] = FileCache.new('TestVideoScraper', @cache_root, 60*60*24)
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def teardown
|
|
14
|
+
# FileUtils.remove_entry_secure(@cache_root, true)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def test_scrape
|
|
18
|
+
vs = WWW::VideoScraper.scrape('http://www.tube8.com/anal/alexis-amore-pov/56983/')
|
|
19
|
+
assert_equal 'http://www.tube8.com/anal/alexis-amore-pov/56983/', vs.page_url
|
|
20
|
+
assert_match %r|http://medianl\d+\.tube8\.com/flv/[[:alnum:]]{32}/\d{8}/\d{4}/\d{2}/[[:alnum:]]+/[[:alnum:]]+\.flv|, vs.video_url
|
|
21
|
+
assert_equal 'http://www.tube8.com/vs/83/56983.jpg', vs.thumb_url
|
|
22
|
+
assert_equal 'Alexis Amore POV', vs.title
|
|
23
|
+
end
|
|
24
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: valda-video_scraper
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.0.
|
|
4
|
+
version: 1.0.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- YAMAGUCHI Seiji
|
|
@@ -9,7 +9,7 @@ autorequire:
|
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
11
|
|
|
12
|
-
date: 2009-01-
|
|
12
|
+
date: 2009-01-18 00:00:00 -08:00
|
|
13
13
|
default_executable:
|
|
14
14
|
dependencies:
|
|
15
15
|
- !ruby/object:Gem::Dependency
|
|
@@ -21,6 +21,15 @@ dependencies:
|
|
|
21
21
|
- !ruby/object:Gem::Version
|
|
22
22
|
version: 0.8.4
|
|
23
23
|
version:
|
|
24
|
+
- !ruby/object:Gem::Dependency
|
|
25
|
+
name: hpricot
|
|
26
|
+
version_requirement:
|
|
27
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
28
|
+
requirements:
|
|
29
|
+
- - ">="
|
|
30
|
+
- !ruby/object:Gem::Version
|
|
31
|
+
version: 0.6.164
|
|
32
|
+
version:
|
|
24
33
|
description: Web scraping library for video sharing sites.
|
|
25
34
|
email: valda@underscore.jp
|
|
26
35
|
executables: []
|
|
@@ -41,6 +50,7 @@ files:
|
|
|
41
50
|
- test/www/video_scraper/test_dailymotion.rb
|
|
42
51
|
- test/www/video_scraper/test_age_sage.rb
|
|
43
52
|
- test/www/video_scraper/test_pornotube.rb
|
|
53
|
+
- test/www/video_scraper/test_tube8.rb
|
|
44
54
|
- test/www/video_scraper/test_your_file_host.rb
|
|
45
55
|
- test/www/video_scraper/test_moro_tube.rb
|
|
46
56
|
- test/www/video_scraper/test_veoh.rb
|
|
@@ -65,6 +75,7 @@ files:
|
|
|
65
75
|
- lib/www/video_scraper/red_tube.rb
|
|
66
76
|
- lib/www/video_scraper/base.rb
|
|
67
77
|
- lib/www/video_scraper/your_file_host.rb
|
|
78
|
+
- lib/www/video_scraper/tube8.rb
|
|
68
79
|
- lib/www/video_scraper.rb
|
|
69
80
|
has_rdoc: true
|
|
70
81
|
homepage: http://github.com/valda/video_scraper
|