valda-video_scraper 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +4 -0
- data/README +69 -0
- data/Rakefile +144 -0
- data/lib/www/video_scraper/age_sage.rb +34 -0
- data/lib/www/video_scraper/ameba_vision.rb +28 -0
- data/lib/www/video_scraper/base.rb +60 -0
- data/lib/www/video_scraper/dailymotion.rb +36 -0
- data/lib/www/video_scraper/moro_tube.rb +37 -0
- data/lib/www/video_scraper/nico_video.rb +73 -0
- data/lib/www/video_scraper/pornhub.rb +29 -0
- data/lib/www/video_scraper/pornotube.rb +42 -0
- data/lib/www/video_scraper/red_tube.rb +62 -0
- data/lib/www/video_scraper/veoh.rb +30 -0
- data/lib/www/video_scraper/you_porn.rb +32 -0
- data/lib/www/video_scraper/you_tube.rb +55 -0
- data/lib/www/video_scraper/your_file_host.rb +60 -0
- data/lib/www/video_scraper.rb +85 -0
- data/test/test_helper.rb +8 -0
- data/test/www/test_video_scraper.rb +20 -0
- data/test/www/video_scraper/test_age_sage.rb +24 -0
- data/test/www/video_scraper/test_ameba_vision.rb +23 -0
- data/test/www/video_scraper/test_dailymotion.rb +25 -0
- data/test/www/video_scraper/test_moro_tube.rb +24 -0
- data/test/www/video_scraper/test_nico_video.rb +30 -0
- data/test/www/video_scraper/test_pornhub.rb +24 -0
- data/test/www/video_scraper/test_pornotube.rb +32 -0
- data/test/www/video_scraper/test_red_tube.rb +24 -0
- data/test/www/video_scraper/test_veoh.rb +24 -0
- data/test/www/video_scraper/test_you_porn.rb +24 -0
- data/test/www/video_scraper/test_you_tube.rb +40 -0
- data/test/www/video_scraper/test_your_file_host.rb +24 -0
- metadata +107 -0
data/ChangeLog
ADDED
data/README
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
= WWW::VideoScraper
|
2
|
+
|
3
|
+
* http://coderepos.org/share/browser/lang/ruby/video_scraper
|
4
|
+
* http://github.com/valda/video_scraper/tree/master
|
5
|
+
|
6
|
+
== DESCRIPTION:
|
7
|
+
|
8
|
+
Web scraping library for video sharing sites.
|
9
|
+
|
10
|
+
== FEATURES/PROBLEMS:
|
11
|
+
|
12
|
+
Supported sites
|
13
|
+
|
14
|
+
* YouTube
|
15
|
+
* NICO NICO DOUGA
|
16
|
+
* AmebaVision
|
17
|
+
* Dailymotion
|
18
|
+
* Veoh
|
19
|
+
* YourFileHost
|
20
|
+
* RedTube
|
21
|
+
* Pornhub
|
22
|
+
* Ura Agesage
|
23
|
+
* MoroTube
|
24
|
+
* Pornotube
|
25
|
+
* YouPorn
|
26
|
+
|
27
|
+
== SYNOPSIS:
|
28
|
+
|
29
|
+
>> require 'www/video_scraper'
|
30
|
+
>> scraper = WWW::VideoScraper.scrape('http://www.youtube.com/watch?v=OFPnvARUOHI')
|
31
|
+
>> scraper.video_url
|
32
|
+
=> "http://www.youtube.com/get_video?video_id=OFPnvARUOHI&t=OEgsToPDskIpQJU48rm4-sS1RtbItouY"
|
33
|
+
>> scraper.thumb_url
|
34
|
+
=> "http://i.ytimg.com/vi/OFPnvARUOHI/default.jpg"
|
35
|
+
|
36
|
+
== REQUIREMENTS:
|
37
|
+
|
38
|
+
* WWW::Mechanize
|
39
|
+
* Hpricot
|
40
|
+
* CGIAlt (recommend)
|
41
|
+
|
42
|
+
== INSTALL:
|
43
|
+
|
44
|
+
* sudo gem install valda-video_scraper
|
45
|
+
|
46
|
+
== LICENSE:
|
47
|
+
|
48
|
+
(The MIT License)
|
49
|
+
|
50
|
+
Copyright (c) 2009 YAMAGUCHI Seiji <valda at underscore.jp>
|
51
|
+
|
52
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
53
|
+
a copy of this software and associated documentation files (the
|
54
|
+
'Software'), to deal in the Software without restriction, including
|
55
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
56
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
57
|
+
permit persons to whom the Software is furnished to do so, subject to
|
58
|
+
the following conditions:
|
59
|
+
|
60
|
+
The above copyright notice and this permission notice shall be
|
61
|
+
included in all copies or substantial portions of the Software.
|
62
|
+
|
63
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
64
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
65
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
66
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
67
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
68
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
69
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,144 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/clean'
|
4
|
+
require 'rake/testtask'
|
5
|
+
require 'rake/packagetask'
|
6
|
+
require 'rake/gempackagetask'
|
7
|
+
require 'rake/rdoctask'
|
8
|
+
require 'rake/contrib/rubyforgepublisher'
|
9
|
+
require 'rake/contrib/sshpublisher'
|
10
|
+
require 'fileutils'
|
11
|
+
require 'lib/www/video_scraper'
|
12
|
+
include FileUtils
|
13
|
+
|
14
|
+
NAME = "video_scraper"
|
15
|
+
AUTHOR = "YAMAGUCHI Seiji"
|
16
|
+
EMAIL = "valda@underscore.jp"
|
17
|
+
DESCRIPTION = "Web scraping library for video sharing sites."
|
18
|
+
RUBYFORGE_PROJECT = "video_scraper"
|
19
|
+
HOMEPATH = "http://github.com/valda/video_scraper"
|
20
|
+
BIN_FILES = %w( )
|
21
|
+
|
22
|
+
VERS = WWW::VideoScraper::VERSION
|
23
|
+
REV = File.read(".svn/entries")[/committed-rev="(d+)"/, 1] rescue nil
|
24
|
+
CLEAN.include ['**/.*.sw?', '*.gem', '.config']
|
25
|
+
RDOC_OPTS = [
|
26
|
+
'--title', "#{NAME} documentation",
|
27
|
+
"--charset", "utf-8",
|
28
|
+
"--opname", "index.html",
|
29
|
+
"--line-numbers",
|
30
|
+
"--main", "README",
|
31
|
+
"--inline-source",
|
32
|
+
]
|
33
|
+
|
34
|
+
task :default => [:test]
|
35
|
+
task :package => [:clean]
|
36
|
+
|
37
|
+
Rake::TestTask.new("test") do |t|
|
38
|
+
t.libs << "test"
|
39
|
+
t.pattern = "test/**/*_test.rb"
|
40
|
+
t.verbose = true
|
41
|
+
end
|
42
|
+
|
43
|
+
spec = Gem::Specification.new do |s|
|
44
|
+
s.name = NAME
|
45
|
+
s.version = VERS
|
46
|
+
s.platform = Gem::Platform::RUBY
|
47
|
+
s.has_rdoc = true
|
48
|
+
s.extra_rdoc_files = ["README", "ChangeLog"]
|
49
|
+
s.rdoc_options += RDOC_OPTS + ['--exclude', '^(examples|extras)/']
|
50
|
+
s.summary = DESCRIPTION
|
51
|
+
s.description = DESCRIPTION
|
52
|
+
s.author = AUTHOR
|
53
|
+
s.email = EMAIL
|
54
|
+
s.homepage = HOMEPATH
|
55
|
+
s.executables = BIN_FILES
|
56
|
+
s.rubyforge_project = RUBYFORGE_PROJECT
|
57
|
+
s.bindir = "bin"
|
58
|
+
s.require_path = "lib"
|
59
|
+
#s.autorequire = ""
|
60
|
+
s.test_files = Dir["test/*_test.rb"]
|
61
|
+
|
62
|
+
s.add_dependency('mechanize', '>=0.8.4')
|
63
|
+
#s.required_ruby_version = '>= 1.8.2'
|
64
|
+
|
65
|
+
s.files = %w(README ChangeLog Rakefile) +
|
66
|
+
Dir.glob("{bin,doc,test,lib,templates,generator,extras,website,script}/**/*") +
|
67
|
+
Dir.glob("ext/**/*.{h,c,rb}") +
|
68
|
+
Dir.glob("examples/**/*.rb") +
|
69
|
+
Dir.glob("tools/*.rb") +
|
70
|
+
Dir.glob("rails/*.rb")
|
71
|
+
|
72
|
+
s.extensions = FileList["ext/**/extconf.rb"].to_a
|
73
|
+
end
|
74
|
+
|
75
|
+
Rake::GemPackageTask.new(spec) do |p|
|
76
|
+
p.need_tar = true
|
77
|
+
p.gem_spec = spec
|
78
|
+
end
|
79
|
+
|
80
|
+
task :install do
|
81
|
+
name = "#{NAME}-#{VERS}.gem"
|
82
|
+
sh %{rake package}
|
83
|
+
sh %{sudo gem install pkg/#{name}}
|
84
|
+
end
|
85
|
+
|
86
|
+
task :uninstall => [:clean] do
|
87
|
+
sh %{sudo gem uninstall #{NAME}}
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
Rake::RDocTask.new do |rdoc|
|
92
|
+
rdoc.rdoc_dir = 'html'
|
93
|
+
rdoc.options += RDOC_OPTS
|
94
|
+
rdoc.template = "resh"
|
95
|
+
#rdoc.template = "#{ENV['template']}.rb" if ENV['template']
|
96
|
+
if ENV['DOC_FILES']
|
97
|
+
rdoc.rdoc_files.include(ENV['DOC_FILES'].split(/,\s*/))
|
98
|
+
else
|
99
|
+
rdoc.rdoc_files.include('README', 'ChangeLog')
|
100
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
101
|
+
rdoc.rdoc_files.include('ext/**/*.c')
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
desc "Publish to RubyForge"
|
106
|
+
task :rubyforge => [:rdoc, :package] do
|
107
|
+
require 'rubyforge'
|
108
|
+
Rake::RubyForgePublisher.new(RUBYFORGE_PROJECT, 'yamaguchi').upload
|
109
|
+
end
|
110
|
+
|
111
|
+
desc 'Package and upload the release to rubyforge.'
|
112
|
+
task :release => [:clean, :package] do |t|
|
113
|
+
v = ENV["VERSION"] or abort "Must supply VERSION=x.y.z"
|
114
|
+
abort "Versions don't match #{v} vs #{VERS}" unless v == VERS
|
115
|
+
pkg = "pkg/#{NAME}-#{VERS}"
|
116
|
+
|
117
|
+
require 'rubyforge'
|
118
|
+
rf = RubyForge.new.configure
|
119
|
+
puts "Logging in"
|
120
|
+
rf.login
|
121
|
+
|
122
|
+
c = rf.userconfig
|
123
|
+
# c["release_notes"] = description if description
|
124
|
+
# c["release_changes"] = changes if changes
|
125
|
+
c["preformatted"] = true
|
126
|
+
|
127
|
+
files = [
|
128
|
+
"#{pkg}.tgz",
|
129
|
+
"#{pkg}.gem"
|
130
|
+
].compact
|
131
|
+
|
132
|
+
puts "Releasing #{NAME} v. #{VERS}"
|
133
|
+
rf.add_release RUBYFORGE_PROJECT, NAME, VERS, *files
|
134
|
+
end
|
135
|
+
|
136
|
+
desc 'Show information about the gem.'
|
137
|
+
task :debug_gem do
|
138
|
+
puts spec.to_ruby
|
139
|
+
end
|
140
|
+
|
141
|
+
desc 'Update gem spec'
|
142
|
+
task :gemspec do
|
143
|
+
open("#{NAME}.gemspec", 'w').write spec.to_ruby
|
144
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/base')
|
4
|
+
|
5
|
+
module WWW
|
6
|
+
module VideoScraper
|
7
|
+
class AgeSage < Base
|
8
|
+
url_regex %r!\Ahttp://adult\.agesage\.jp/contentsPage\.html\?mcd=[[:alnum:]]{16}!
|
9
|
+
|
10
|
+
def initialize(url, opt = nil)
|
11
|
+
super
|
12
|
+
do_query
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
def do_query
|
17
|
+
@request_url = @page_url.sub('.html', '.xml')
|
18
|
+
@response_body = http_get(@request_url)
|
19
|
+
raise FileNotFound if @response_body.nil? or @response_body.empty?
|
20
|
+
xdoc = Hpricot.XML(@response_body.toutf8)
|
21
|
+
if movie = xdoc.at('/movie')
|
22
|
+
@video_url = movie.at('/movieurl').inner_html
|
23
|
+
@thumb_url = movie.at('/thumbnail').inner_html
|
24
|
+
@title = movie.at('/title').inner_html
|
25
|
+
mcd = @page_url.match(%r|agesage\.jp/contentsPage\.html\?mcd=([[:alnum:]]{16})|)[1]
|
26
|
+
@embed_tag = <<-HTML
|
27
|
+
<script type="text/javascript" src="http://adult.agesage.jp/js/past_uraui.js"></script>
|
28
|
+
<script type="text/javascript">Purauifla("mcd=#{mcd}", 320, 275);</script>
|
29
|
+
HTML
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/base')
|
4
|
+
|
5
|
+
module WWW
|
6
|
+
module VideoScraper
|
7
|
+
class AmebaVision < Base
|
8
|
+
url_regex %r!\Ahttp://vision\.ameba\.jp/watch\.do.*?\?movie=(\d+)!
|
9
|
+
|
10
|
+
def initialize(url, opt = nil)
|
11
|
+
super
|
12
|
+
do_query
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
def do_query
|
17
|
+
id = url_regex_match[1]
|
18
|
+
request_url = "http://vision.ameba.jp/api/get/detailMovie.do?movie=#{id}"
|
19
|
+
xml = http_get(request_url)
|
20
|
+
xdoc = Hpricot.XML(xml.toutf8)
|
21
|
+
@title = xdoc.at('//item/title').inner_html
|
22
|
+
@page_url = xdoc.at('//item/link').inner_html
|
23
|
+
@thumb_url = xdoc.at('//item/imageUrlLarge').inner_html
|
24
|
+
@video_url = @thumb_url.sub('//vi', '//vm').sub('/jpg/', '/flv/').sub('_4.jpg', '.flv')
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
module WWW
|
4
|
+
module VideoScraper
|
5
|
+
class Base
|
6
|
+
attr_reader :page_url, :video_url, :thumb_url, :embed_tag, :title
|
7
|
+
|
8
|
+
## class methods
|
9
|
+
class << self
|
10
|
+
def url_regex(regex)
|
11
|
+
@url_regex = regex
|
12
|
+
end
|
13
|
+
|
14
|
+
def valid_url?(url)
|
15
|
+
not (url =~ @url_regex).nil?
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def initialize(url, opt = nil)
|
20
|
+
@page_url = url
|
21
|
+
@opt = (opt || {})
|
22
|
+
@url_regex_match = self.class.instance_variable_get(:@url_regex).match(@page_url).freeze
|
23
|
+
raise StandardError, "url is not #{self.class.name} link: #{url}" if @url_regex_match.nil?
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
def url_regex_match; @url_regex_match; end
|
28
|
+
|
29
|
+
def agent
|
30
|
+
@agent ||= WWW::Mechanize.new do |a|
|
31
|
+
a.user_agent_alias = 'Windows IE 6'
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def http_get(url, opt = nil)
|
36
|
+
open_opt = {
|
37
|
+
"User-Agent" => "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)",
|
38
|
+
}.merge( opt || {} )
|
39
|
+
if @opt[:cache]
|
40
|
+
unless @opt[:cache].respond_to?(:get) and @opt[:cache].respond_to?(:set)
|
41
|
+
raise RuntimeError, 'As for cache object what responds to :get and :set is required.'
|
42
|
+
end
|
43
|
+
@opt[:logger].debug 'use cache.'
|
44
|
+
cache_key = "#{url}|#{open_opt}"
|
45
|
+
unless content = @opt[:cache].get(cache_key)
|
46
|
+
content = open(url, open_opt) {|fh| fh.read }
|
47
|
+
@opt[:cache].set(cache_key, content)
|
48
|
+
end
|
49
|
+
else
|
50
|
+
content = open(url, open_opt) {|fh| fh.read }
|
51
|
+
end
|
52
|
+
content
|
53
|
+
rescue OpenURI::HTTPError => e
|
54
|
+
raise TryAgainLater, e.to_s if e.to_s.include?('503')
|
55
|
+
raise e
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/base')
|
4
|
+
|
5
|
+
module WWW
|
6
|
+
module VideoScraper
|
7
|
+
class Dailymotion < Base
|
8
|
+
url_regex %r!\Ahttp://www\.dailymotion\.com/.*?/video/([\w/-]+)!
|
9
|
+
|
10
|
+
def initialize(url, opt = nil)
|
11
|
+
super
|
12
|
+
do_query
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
def do_query
|
17
|
+
uri = URI.parse(@page_url)
|
18
|
+
html = http_get(@page_url)
|
19
|
+
doc = Hpricot(html.toutf8)
|
20
|
+
doc.search('//script').each do |elem|
|
21
|
+
if m = elem.inner_html.match(/\.addVariable\("video",\s*"([^"]+)"/i)
|
22
|
+
path = CGI.unescape(m[1]).split(/\|\||@@/).first
|
23
|
+
@video_url = URI.join("#{uri.scheme}://#{uri.host}", path).to_s
|
24
|
+
end
|
25
|
+
if m = elem.inner_html.match(/\.addVariable\("preview",\s+"([^"]+)"/)
|
26
|
+
path = CGI.unescape(m[1]).split(/\|\||@@/).first
|
27
|
+
@thumb_url = URI.join("#{uri.scheme}://#{uri.host}", path).to_s
|
28
|
+
end
|
29
|
+
end
|
30
|
+
@title = doc.at('//h1[@class="nav"]').inner_html rescue nil
|
31
|
+
@embed_tag = CGI.unescapeHTML(doc.at('//textarea[@id="video_player_embed_code_text"]').inner_html) rescue nil
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/base')
|
4
|
+
|
5
|
+
module WWW
|
6
|
+
module VideoScraper
|
7
|
+
class MoroTube < Base
|
8
|
+
url_regex %r!\Ahttp://www\.morotube\.com/watch\.php\?clip=([[:alnum:]]{8})!
|
9
|
+
attr_reader :author, :duration
|
10
|
+
|
11
|
+
def initialize(url, opt = nil)
|
12
|
+
super
|
13
|
+
do_query
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
def do_query
|
18
|
+
uri = URI.parse(@page_url)
|
19
|
+
uri.path = '/gen_xml.php'
|
20
|
+
uri.query = "type=o&id=#{url_regex_match[1]}"
|
21
|
+
xml = http_get(uri.to_s)
|
22
|
+
xdoc = Hpricot.XML(xml.toutf8)
|
23
|
+
@title = xdoc.search('/root/video/title').inner_html
|
24
|
+
@video_url = xdoc.search('/root/video/file').inner_html
|
25
|
+
@thumb_url = xdoc.search('/root/video/image').inner_html
|
26
|
+
@author = xdoc.search('/root/video/author').inner_html
|
27
|
+
@duration = xdoc.search('/root/video/duration').inner_html
|
28
|
+
|
29
|
+
html = http_get(@page_url)
|
30
|
+
doc = Hpricot(html)
|
31
|
+
doc.search('//input#inpVdoEmbed') do |elem|
|
32
|
+
@embed_tag = elem.attributes['value']
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/base')
|
4
|
+
|
5
|
+
module WWW
|
6
|
+
module VideoScraper
|
7
|
+
class NicoVideo < Base
|
8
|
+
url_regex %r!\Ahttp://www\.nicovideo\.jp/watch/([[:alnum:]]+)!
|
9
|
+
|
10
|
+
def initialize(url, opt = nil)
|
11
|
+
super
|
12
|
+
do_query
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
def login
|
17
|
+
page = agent.post('https://secure.nicovideo.jp/secure/login?site=niconico',
|
18
|
+
'mail' => @opt[:nico_video_mail],
|
19
|
+
'password' => @opt[:nico_video_password])
|
20
|
+
raise RuntimeError, 'login failure' unless page.header['x-niconico-authflag'] == '1'
|
21
|
+
end
|
22
|
+
|
23
|
+
def get_flv(id)
|
24
|
+
request_url = "http://www.nicovideo.jp/api/getflv?v=#{id}"
|
25
|
+
page = agent.get(request_url)
|
26
|
+
q = CGI.parse(page.body)
|
27
|
+
raise FileNotFound unless q['url']
|
28
|
+
@video_url = q['url'].first
|
29
|
+
end
|
30
|
+
|
31
|
+
def get_thumb(id)
|
32
|
+
page = agent.get("http://www.nicovideo.jp/api/getthumbinfo/#{id}")
|
33
|
+
xdoc = Hpricot.XML(page.body.toutf8)
|
34
|
+
xdoc.search('//thumbnail_url') do |elem|
|
35
|
+
@thumb_url = elem.inner_html
|
36
|
+
end
|
37
|
+
xdoc.search('//thumb/title') do |elem|
|
38
|
+
@title = elem.inner_html
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def get_embed_tag(id)
|
43
|
+
page = agent.get(@page_url)
|
44
|
+
response_body = page.body
|
45
|
+
doc = Hpricot(response_body)
|
46
|
+
doc.search('//form[@name="form_iframe"] //input[@name="input_iframe"]') do |elem|
|
47
|
+
@embed_tag = elem.attributes['value']
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def do_query
|
52
|
+
begin
|
53
|
+
login
|
54
|
+
id = url_regex_match[1]
|
55
|
+
get_flv(id)
|
56
|
+
get_thumb(id)
|
57
|
+
get_embed_tag(id)
|
58
|
+
rescue Timeout::Error => e
|
59
|
+
raise TryAgainLater, e.to_s
|
60
|
+
rescue WWW::Mechanize::ResponseCodeError => e
|
61
|
+
case e.response_code
|
62
|
+
when '404', '403'
|
63
|
+
raise FileNotFound, e.to_s
|
64
|
+
when '502'
|
65
|
+
raise TryAgainLater, e.to_s
|
66
|
+
else
|
67
|
+
raise TryAgainLater, e.to_s
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/base')
|
4
|
+
|
5
|
+
module WWW
|
6
|
+
module VideoScraper
|
7
|
+
class Pornhub < Base
|
8
|
+
url_regex %r|\Ahttp://www\.pornhub\.com/view_video\.php.*viewkey=[[:alnum:]]{20}|
|
9
|
+
|
10
|
+
def initialize(url, opt = nil)
|
11
|
+
super
|
12
|
+
do_query
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
def do_query
|
17
|
+
html = http_get(@page_url)
|
18
|
+
raise FileNotFound unless m = html.match(/\.addVariable\("options",\s*"([^"]+)"\);/i)
|
19
|
+
@request_url = URI.decode m[1]
|
20
|
+
@response_body = http_get(@request_url)
|
21
|
+
@video_url = @response_body.match(%r|<flv_url>([^<]+)</flv_url>|).to_a[1]
|
22
|
+
if m = @video_url.match(%r|videos/(\d{3}/\d{3}/\d{3})/\d+.flv|)
|
23
|
+
@thumb_url = "http://p1.pornhub.com/thumbs/#{m[1]}/small.jpg"
|
24
|
+
end
|
25
|
+
@embed_tag = html.match(%r|<textarea[^>]+class="share-flag-embed">(<object type="application/x-shockwave-flash".*?</object>)</textarea>|).to_a[1]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/base')
|
4
|
+
|
5
|
+
module WWW
|
6
|
+
module VideoScraper
|
7
|
+
class Pornotube < Base
|
8
|
+
url_regex %r!\Ahttp://(?:www\.)?pornotube\.com/(?:media|channels)\.php\?.*m=(\d+)!
|
9
|
+
|
10
|
+
def initialize(url, opt = nil)
|
11
|
+
super
|
12
|
+
do_query
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
def login
|
17
|
+
agent.post("http://pornotube.com/index.php",
|
18
|
+
'verifyAge' => 'true',
|
19
|
+
'bMonth' => '01',
|
20
|
+
'bDay' => '01',
|
21
|
+
'bYear' => '1970',
|
22
|
+
'submit' => 'View All Content')
|
23
|
+
end
|
24
|
+
|
25
|
+
def do_query
|
26
|
+
id = url_regex_match[1]
|
27
|
+
|
28
|
+
login
|
29
|
+
page = agent.get(@page_url)
|
30
|
+
raise FileNotFound unless embed = page.root.at('//object/embed')
|
31
|
+
src = embed.attributes['src']
|
32
|
+
hash = src.match(/\?v=(.*)$/)[1]
|
33
|
+
page = agent.get("http://pornotube.com/player/player.php?#{hash}")
|
34
|
+
q = CGI::parse(page.body)
|
35
|
+
@video_url = "http://#{q['mediaDomain'][0]}.pornotube.com/#{q['userId'][0]}/#{q['mediaId'][0]}.flv"
|
36
|
+
@thumb_url = "http://photo.pornotube.com/thumbnails/video/#{q['userId'][0]}/#{q['mediaId'][0]}.jpg";
|
37
|
+
@image_url = "http://photo.pornotube.com/thumbnails/video/#{q['userId'][0]}/#{q['mediaId'][0]}_full.jpg";
|
38
|
+
@embed_tag = q['embedCode'][0]
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/base')
|
4
|
+
|
5
|
+
module WWW
|
6
|
+
module VideoScraper
|
7
|
+
class RedTube < Base
|
8
|
+
url_regex %r|\Ahttp://www\.redtube\.com/(\d{4})|
|
9
|
+
|
10
|
+
def initialize(url, opt = nil)
|
11
|
+
super
|
12
|
+
do_query
|
13
|
+
end
|
14
|
+
|
15
|
+
def embed_tag
|
16
|
+
return @embed_tag if @embed_tag
|
17
|
+
url = "http://www.redtube.com/embed/#{content_id}"
|
18
|
+
response_body = http_get(url)
|
19
|
+
doc = Hpricot(response_body)
|
20
|
+
doc.search('//textarea#cpf') do |elem|
|
21
|
+
@embed_tag = elem.inner_html
|
22
|
+
end
|
23
|
+
@embed_tag
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
def content_id; url_regex_match[1]; end
|
28
|
+
|
29
|
+
def do_query
|
30
|
+
s = content_id || '0'
|
31
|
+
s = '1' if s.empty?
|
32
|
+
pathnr = s.to_i / 1000
|
33
|
+
s = "%07d" % s.to_i
|
34
|
+
pathnr = "%07d" % pathnr
|
35
|
+
xc = %w!R 1 5 3 4 2 O 7 K 9 H B C D X F G A I J 8 L M Z 6 P Q 0 S T U V W E Y N!
|
36
|
+
qsum = 0
|
37
|
+
s.length.times do |i|
|
38
|
+
qsum += s[i,1].to_i * (i + 1)
|
39
|
+
end
|
40
|
+
s1 = qsum.to_s
|
41
|
+
qsum = 0
|
42
|
+
s1.length.times do |i|
|
43
|
+
qsum += s1[i,1].to_i
|
44
|
+
end
|
45
|
+
qstr = "%02d" % qsum
|
46
|
+
code = ''
|
47
|
+
code += xc[s[3] - 48 + qsum + 3]
|
48
|
+
code += qstr[1,1]
|
49
|
+
code += xc[s[0] - 48 + qsum + 2]
|
50
|
+
code += xc[s[2] - 48 + qsum + 1]
|
51
|
+
code += xc[s[5] - 48 + qsum + 6]
|
52
|
+
code += xc[s[1] - 48 + qsum + 5]
|
53
|
+
code += qstr[0,1]
|
54
|
+
code += xc[s[4] - 48 + qsum + 7]
|
55
|
+
code += xc[s[6] - 48 + qsum + 4]
|
56
|
+
content_video = pathnr + '/' + code + '.flv'
|
57
|
+
@video_url = "http://dl.redtube.com/_videos_t4vn23s9jc5498tgj49icfj4678/#{content_video}"
|
58
|
+
# @thumb_url = "http://thumbs.redtube.com/_thumbs/#{pathnr}/#{s}/#{s}_#{'%03d' % i}.jpg"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/base')
|
4
|
+
|
5
|
+
module WWW
|
6
|
+
module VideoScraper
|
7
|
+
class Veoh < Base
|
8
|
+
url_regex %r!\Ahttp://www\.veoh\.com/videos/([[:alnum:]]+)!
|
9
|
+
|
10
|
+
def initialize(url, opt = nil)
|
11
|
+
super
|
12
|
+
do_query
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
def do_query
|
17
|
+
@id = url_regex_match[1]
|
18
|
+
request_url = "http://www.veoh.com/rest/video/#{@id}/details"
|
19
|
+
xml = http_get(request_url)
|
20
|
+
@video_url = xml.match(/fullPreviewHashPath="([^"]+)"/).to_a[1]
|
21
|
+
@title = xml.match(/title="([^"]+)"/).to_a[1]
|
22
|
+
@thumb_url = xml.match(/fullMedResImagePath="([^"]+)"/).to_a[1]
|
23
|
+
html = http_get(@page_url)
|
24
|
+
embed_tag = html.match(/\sid="embed"\s[^>]*value="([^"]+)"/).to_a[1]
|
25
|
+
@embed_tag = CGI.unescapeHTML embed_tag
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|