valda-video_scraper 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +4 -0
- data/README +69 -0
- data/Rakefile +144 -0
- data/lib/www/video_scraper/age_sage.rb +34 -0
- data/lib/www/video_scraper/ameba_vision.rb +28 -0
- data/lib/www/video_scraper/base.rb +60 -0
- data/lib/www/video_scraper/dailymotion.rb +36 -0
- data/lib/www/video_scraper/moro_tube.rb +37 -0
- data/lib/www/video_scraper/nico_video.rb +73 -0
- data/lib/www/video_scraper/pornhub.rb +29 -0
- data/lib/www/video_scraper/pornotube.rb +42 -0
- data/lib/www/video_scraper/red_tube.rb +62 -0
- data/lib/www/video_scraper/veoh.rb +30 -0
- data/lib/www/video_scraper/you_porn.rb +32 -0
- data/lib/www/video_scraper/you_tube.rb +55 -0
- data/lib/www/video_scraper/your_file_host.rb +60 -0
- data/lib/www/video_scraper.rb +85 -0
- data/test/test_helper.rb +8 -0
- data/test/www/test_video_scraper.rb +20 -0
- data/test/www/video_scraper/test_age_sage.rb +24 -0
- data/test/www/video_scraper/test_ameba_vision.rb +23 -0
- data/test/www/video_scraper/test_dailymotion.rb +25 -0
- data/test/www/video_scraper/test_moro_tube.rb +24 -0
- data/test/www/video_scraper/test_nico_video.rb +30 -0
- data/test/www/video_scraper/test_pornhub.rb +24 -0
- data/test/www/video_scraper/test_pornotube.rb +32 -0
- data/test/www/video_scraper/test_red_tube.rb +24 -0
- data/test/www/video_scraper/test_veoh.rb +24 -0
- data/test/www/video_scraper/test_you_porn.rb +24 -0
- data/test/www/video_scraper/test_you_tube.rb +40 -0
- data/test/www/video_scraper/test_your_file_host.rb +24 -0
- metadata +107 -0
data/ChangeLog
ADDED
data/README
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
= WWW::VideoScraper
|
2
|
+
|
3
|
+
* http://coderepos.org/share/browser/lang/ruby/video_scraper
|
4
|
+
* http://github.com/valda/video_scraper/tree/master
|
5
|
+
|
6
|
+
== DESCRIPTION:
|
7
|
+
|
8
|
+
Web scraping library for video sharing sites.
|
9
|
+
|
10
|
+
== FEATURES/PROBLEMS:
|
11
|
+
|
12
|
+
Supported sites
|
13
|
+
|
14
|
+
* YouTube
|
15
|
+
* NICO NICO DOUGA
|
16
|
+
* AmebaVision
|
17
|
+
* Dailymotion
|
18
|
+
* Veoh
|
19
|
+
* YourFileHost
|
20
|
+
* RedTube
|
21
|
+
* Pornhub
|
22
|
+
* Ura Agesage
|
23
|
+
* MoroTube
|
24
|
+
* Pornotube
|
25
|
+
* YouPorn
|
26
|
+
|
27
|
+
== SYNOPSIS:
|
28
|
+
|
29
|
+
>> require 'www/video_scraper'
|
30
|
+
>> scraper = WWW::VideoScraper.scrape('http://www.youtube.com/watch?v=OFPnvARUOHI')
|
31
|
+
>> scraper.video_url
|
32
|
+
=> "http://www.youtube.com/get_video?video_id=OFPnvARUOHI&t=OEgsToPDskIpQJU48rm4-sS1RtbItouY"
|
33
|
+
>> scraper.thumb_url
|
34
|
+
=> "http://i.ytimg.com/vi/OFPnvARUOHI/default.jpg"
|
35
|
+
|
36
|
+
== REQUIREMENTS:
|
37
|
+
|
38
|
+
* WWW::Mechanize
|
39
|
+
* Hpricot
|
40
|
+
* CGIAlt (recommend)
|
41
|
+
|
42
|
+
== INSTALL:
|
43
|
+
|
44
|
+
* sudo gem install valda-video_scraper
|
45
|
+
|
46
|
+
== LICENSE:
|
47
|
+
|
48
|
+
(The MIT License)
|
49
|
+
|
50
|
+
Copyright (c) 2009 YAMAGUCHI Seiji <valda at underscore.jp>
|
51
|
+
|
52
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
53
|
+
a copy of this software and associated documentation files (the
|
54
|
+
'Software'), to deal in the Software without restriction, including
|
55
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
56
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
57
|
+
permit persons to whom the Software is furnished to do so, subject to
|
58
|
+
the following conditions:
|
59
|
+
|
60
|
+
The above copyright notice and this permission notice shall be
|
61
|
+
included in all copies or substantial portions of the Software.
|
62
|
+
|
63
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
64
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
65
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
66
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
67
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
68
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
69
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,144 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/clean'
|
4
|
+
require 'rake/testtask'
|
5
|
+
require 'rake/packagetask'
|
6
|
+
require 'rake/gempackagetask'
|
7
|
+
require 'rake/rdoctask'
|
8
|
+
require 'rake/contrib/rubyforgepublisher'
|
9
|
+
require 'rake/contrib/sshpublisher'
|
10
|
+
require 'fileutils'
|
11
|
+
require 'lib/www/video_scraper'
|
12
|
+
include FileUtils
|
13
|
+
|
14
|
+
NAME = "video_scraper"
|
15
|
+
AUTHOR = "YAMAGUCHI Seiji"
|
16
|
+
EMAIL = "valda@underscore.jp"
|
17
|
+
DESCRIPTION = "Web scraping library for video sharing sites."
|
18
|
+
RUBYFORGE_PROJECT = "video_scraper"
|
19
|
+
HOMEPATH = "http://github.com/valda/video_scraper"
|
20
|
+
BIN_FILES = %w( )
|
21
|
+
|
22
|
+
VERS = WWW::VideoScraper::VERSION
|
23
|
+
REV = File.read(".svn/entries")[/committed-rev="(d+)"/, 1] rescue nil
|
24
|
+
CLEAN.include ['**/.*.sw?', '*.gem', '.config']
|
25
|
+
RDOC_OPTS = [
|
26
|
+
'--title', "#{NAME} documentation",
|
27
|
+
"--charset", "utf-8",
|
28
|
+
"--opname", "index.html",
|
29
|
+
"--line-numbers",
|
30
|
+
"--main", "README",
|
31
|
+
"--inline-source",
|
32
|
+
]
|
33
|
+
|
34
|
+
task :default => [:test]
|
35
|
+
task :package => [:clean]
|
36
|
+
|
37
|
+
Rake::TestTask.new("test") do |t|
|
38
|
+
t.libs << "test"
|
39
|
+
t.pattern = "test/**/*_test.rb"
|
40
|
+
t.verbose = true
|
41
|
+
end
|
42
|
+
|
43
|
+
spec = Gem::Specification.new do |s|
|
44
|
+
s.name = NAME
|
45
|
+
s.version = VERS
|
46
|
+
s.platform = Gem::Platform::RUBY
|
47
|
+
s.has_rdoc = true
|
48
|
+
s.extra_rdoc_files = ["README", "ChangeLog"]
|
49
|
+
s.rdoc_options += RDOC_OPTS + ['--exclude', '^(examples|extras)/']
|
50
|
+
s.summary = DESCRIPTION
|
51
|
+
s.description = DESCRIPTION
|
52
|
+
s.author = AUTHOR
|
53
|
+
s.email = EMAIL
|
54
|
+
s.homepage = HOMEPATH
|
55
|
+
s.executables = BIN_FILES
|
56
|
+
s.rubyforge_project = RUBYFORGE_PROJECT
|
57
|
+
s.bindir = "bin"
|
58
|
+
s.require_path = "lib"
|
59
|
+
#s.autorequire = ""
|
60
|
+
s.test_files = Dir["test/*_test.rb"]
|
61
|
+
|
62
|
+
s.add_dependency('mechanize', '>=0.8.4')
|
63
|
+
#s.required_ruby_version = '>= 1.8.2'
|
64
|
+
|
65
|
+
s.files = %w(README ChangeLog Rakefile) +
|
66
|
+
Dir.glob("{bin,doc,test,lib,templates,generator,extras,website,script}/**/*") +
|
67
|
+
Dir.glob("ext/**/*.{h,c,rb}") +
|
68
|
+
Dir.glob("examples/**/*.rb") +
|
69
|
+
Dir.glob("tools/*.rb") +
|
70
|
+
Dir.glob("rails/*.rb")
|
71
|
+
|
72
|
+
s.extensions = FileList["ext/**/extconf.rb"].to_a
|
73
|
+
end
|
74
|
+
|
75
|
+
Rake::GemPackageTask.new(spec) do |p|
|
76
|
+
p.need_tar = true
|
77
|
+
p.gem_spec = spec
|
78
|
+
end
|
79
|
+
|
80
|
+
task :install do
|
81
|
+
name = "#{NAME}-#{VERS}.gem"
|
82
|
+
sh %{rake package}
|
83
|
+
sh %{sudo gem install pkg/#{name}}
|
84
|
+
end
|
85
|
+
|
86
|
+
task :uninstall => [:clean] do
|
87
|
+
sh %{sudo gem uninstall #{NAME}}
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
Rake::RDocTask.new do |rdoc|
|
92
|
+
rdoc.rdoc_dir = 'html'
|
93
|
+
rdoc.options += RDOC_OPTS
|
94
|
+
rdoc.template = "resh"
|
95
|
+
#rdoc.template = "#{ENV['template']}.rb" if ENV['template']
|
96
|
+
if ENV['DOC_FILES']
|
97
|
+
rdoc.rdoc_files.include(ENV['DOC_FILES'].split(/,\s*/))
|
98
|
+
else
|
99
|
+
rdoc.rdoc_files.include('README', 'ChangeLog')
|
100
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
101
|
+
rdoc.rdoc_files.include('ext/**/*.c')
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
desc "Publish to RubyForge"
|
106
|
+
task :rubyforge => [:rdoc, :package] do
|
107
|
+
require 'rubyforge'
|
108
|
+
Rake::RubyForgePublisher.new(RUBYFORGE_PROJECT, 'yamaguchi').upload
|
109
|
+
end
|
110
|
+
|
111
|
+
desc 'Package and upload the release to rubyforge.'
|
112
|
+
task :release => [:clean, :package] do |t|
|
113
|
+
v = ENV["VERSION"] or abort "Must supply VERSION=x.y.z"
|
114
|
+
abort "Versions don't match #{v} vs #{VERS}" unless v == VERS
|
115
|
+
pkg = "pkg/#{NAME}-#{VERS}"
|
116
|
+
|
117
|
+
require 'rubyforge'
|
118
|
+
rf = RubyForge.new.configure
|
119
|
+
puts "Logging in"
|
120
|
+
rf.login
|
121
|
+
|
122
|
+
c = rf.userconfig
|
123
|
+
# c["release_notes"] = description if description
|
124
|
+
# c["release_changes"] = changes if changes
|
125
|
+
c["preformatted"] = true
|
126
|
+
|
127
|
+
files = [
|
128
|
+
"#{pkg}.tgz",
|
129
|
+
"#{pkg}.gem"
|
130
|
+
].compact
|
131
|
+
|
132
|
+
puts "Releasing #{NAME} v. #{VERS}"
|
133
|
+
rf.add_release RUBYFORGE_PROJECT, NAME, VERS, *files
|
134
|
+
end
|
135
|
+
|
136
|
+
desc 'Show information about the gem.'
|
137
|
+
task :debug_gem do
|
138
|
+
puts spec.to_ruby
|
139
|
+
end
|
140
|
+
|
141
|
+
desc 'Update gem spec'
|
142
|
+
task :gemspec do
|
143
|
+
open("#{NAME}.gemspec", 'w').write spec.to_ruby
|
144
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/base')
|
4
|
+
|
5
|
+
module WWW
|
6
|
+
module VideoScraper
|
7
|
+
class AgeSage < Base
|
8
|
+
url_regex %r!\Ahttp://adult\.agesage\.jp/contentsPage\.html\?mcd=[[:alnum:]]{16}!
|
9
|
+
|
10
|
+
def initialize(url, opt = nil)
|
11
|
+
super
|
12
|
+
do_query
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
def do_query
|
17
|
+
@request_url = @page_url.sub('.html', '.xml')
|
18
|
+
@response_body = http_get(@request_url)
|
19
|
+
raise FileNotFound if @response_body.nil? or @response_body.empty?
|
20
|
+
xdoc = Hpricot.XML(@response_body.toutf8)
|
21
|
+
if movie = xdoc.at('/movie')
|
22
|
+
@video_url = movie.at('/movieurl').inner_html
|
23
|
+
@thumb_url = movie.at('/thumbnail').inner_html
|
24
|
+
@title = movie.at('/title').inner_html
|
25
|
+
mcd = @page_url.match(%r|agesage\.jp/contentsPage\.html\?mcd=([[:alnum:]]{16})|)[1]
|
26
|
+
@embed_tag = <<-HTML
|
27
|
+
<script type="text/javascript" src="http://adult.agesage.jp/js/past_uraui.js"></script>
|
28
|
+
<script type="text/javascript">Purauifla("mcd=#{mcd}", 320, 275);</script>
|
29
|
+
HTML
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/base')
|
4
|
+
|
5
|
+
module WWW
|
6
|
+
module VideoScraper
|
7
|
+
class AmebaVision < Base
|
8
|
+
url_regex %r!\Ahttp://vision\.ameba\.jp/watch\.do.*?\?movie=(\d+)!
|
9
|
+
|
10
|
+
def initialize(url, opt = nil)
|
11
|
+
super
|
12
|
+
do_query
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
def do_query
|
17
|
+
id = url_regex_match[1]
|
18
|
+
request_url = "http://vision.ameba.jp/api/get/detailMovie.do?movie=#{id}"
|
19
|
+
xml = http_get(request_url)
|
20
|
+
xdoc = Hpricot.XML(xml.toutf8)
|
21
|
+
@title = xdoc.at('//item/title').inner_html
|
22
|
+
@page_url = xdoc.at('//item/link').inner_html
|
23
|
+
@thumb_url = xdoc.at('//item/imageUrlLarge').inner_html
|
24
|
+
@video_url = @thumb_url.sub('//vi', '//vm').sub('/jpg/', '/flv/').sub('_4.jpg', '.flv')
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
module WWW
|
4
|
+
module VideoScraper
|
5
|
+
class Base
|
6
|
+
attr_reader :page_url, :video_url, :thumb_url, :embed_tag, :title
|
7
|
+
|
8
|
+
## class methods
|
9
|
+
class << self
|
10
|
+
def url_regex(regex)
|
11
|
+
@url_regex = regex
|
12
|
+
end
|
13
|
+
|
14
|
+
def valid_url?(url)
|
15
|
+
not (url =~ @url_regex).nil?
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def initialize(url, opt = nil)
|
20
|
+
@page_url = url
|
21
|
+
@opt = (opt || {})
|
22
|
+
@url_regex_match = self.class.instance_variable_get(:@url_regex).match(@page_url).freeze
|
23
|
+
raise StandardError, "url is not #{self.class.name} link: #{url}" if @url_regex_match.nil?
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
def url_regex_match; @url_regex_match; end
|
28
|
+
|
29
|
+
def agent
|
30
|
+
@agent ||= WWW::Mechanize.new do |a|
|
31
|
+
a.user_agent_alias = 'Windows IE 6'
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def http_get(url, opt = nil)
|
36
|
+
open_opt = {
|
37
|
+
"User-Agent" => "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)",
|
38
|
+
}.merge( opt || {} )
|
39
|
+
if @opt[:cache]
|
40
|
+
unless @opt[:cache].respond_to?(:get) and @opt[:cache].respond_to?(:set)
|
41
|
+
raise RuntimeError, 'As for cache object what responds to :get and :set is required.'
|
42
|
+
end
|
43
|
+
@opt[:logger].debug 'use cache.'
|
44
|
+
cache_key = "#{url}|#{open_opt}"
|
45
|
+
unless content = @opt[:cache].get(cache_key)
|
46
|
+
content = open(url, open_opt) {|fh| fh.read }
|
47
|
+
@opt[:cache].set(cache_key, content)
|
48
|
+
end
|
49
|
+
else
|
50
|
+
content = open(url, open_opt) {|fh| fh.read }
|
51
|
+
end
|
52
|
+
content
|
53
|
+
rescue OpenURI::HTTPError => e
|
54
|
+
raise TryAgainLater, e.to_s if e.to_s.include?('503')
|
55
|
+
raise e
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/base')
|
4
|
+
|
5
|
+
module WWW
|
6
|
+
module VideoScraper
|
7
|
+
class Dailymotion < Base
|
8
|
+
url_regex %r!\Ahttp://www\.dailymotion\.com/.*?/video/([\w/-]+)!
|
9
|
+
|
10
|
+
def initialize(url, opt = nil)
|
11
|
+
super
|
12
|
+
do_query
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
def do_query
|
17
|
+
uri = URI.parse(@page_url)
|
18
|
+
html = http_get(@page_url)
|
19
|
+
doc = Hpricot(html.toutf8)
|
20
|
+
doc.search('//script').each do |elem|
|
21
|
+
if m = elem.inner_html.match(/\.addVariable\("video",\s*"([^"]+)"/i)
|
22
|
+
path = CGI.unescape(m[1]).split(/\|\||@@/).first
|
23
|
+
@video_url = URI.join("#{uri.scheme}://#{uri.host}", path).to_s
|
24
|
+
end
|
25
|
+
if m = elem.inner_html.match(/\.addVariable\("preview",\s+"([^"]+)"/)
|
26
|
+
path = CGI.unescape(m[1]).split(/\|\||@@/).first
|
27
|
+
@thumb_url = URI.join("#{uri.scheme}://#{uri.host}", path).to_s
|
28
|
+
end
|
29
|
+
end
|
30
|
+
@title = doc.at('//h1[@class="nav"]').inner_html rescue nil
|
31
|
+
@embed_tag = CGI.unescapeHTML(doc.at('//textarea[@id="video_player_embed_code_text"]').inner_html) rescue nil
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/base')
|
4
|
+
|
5
|
+
module WWW
|
6
|
+
module VideoScraper
|
7
|
+
class MoroTube < Base
|
8
|
+
url_regex %r!\Ahttp://www\.morotube\.com/watch\.php\?clip=([[:alnum:]]{8})!
|
9
|
+
attr_reader :author, :duration
|
10
|
+
|
11
|
+
def initialize(url, opt = nil)
|
12
|
+
super
|
13
|
+
do_query
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
def do_query
|
18
|
+
uri = URI.parse(@page_url)
|
19
|
+
uri.path = '/gen_xml.php'
|
20
|
+
uri.query = "type=o&id=#{url_regex_match[1]}"
|
21
|
+
xml = http_get(uri.to_s)
|
22
|
+
xdoc = Hpricot.XML(xml.toutf8)
|
23
|
+
@title = xdoc.search('/root/video/title').inner_html
|
24
|
+
@video_url = xdoc.search('/root/video/file').inner_html
|
25
|
+
@thumb_url = xdoc.search('/root/video/image').inner_html
|
26
|
+
@author = xdoc.search('/root/video/author').inner_html
|
27
|
+
@duration = xdoc.search('/root/video/duration').inner_html
|
28
|
+
|
29
|
+
html = http_get(@page_url)
|
30
|
+
doc = Hpricot(html)
|
31
|
+
doc.search('//input#inpVdoEmbed') do |elem|
|
32
|
+
@embed_tag = elem.attributes['value']
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/base')
|
4
|
+
|
5
|
+
module WWW
|
6
|
+
module VideoScraper
|
7
|
+
class NicoVideo < Base
|
8
|
+
url_regex %r!\Ahttp://www\.nicovideo\.jp/watch/([[:alnum:]]+)!
|
9
|
+
|
10
|
+
def initialize(url, opt = nil)
|
11
|
+
super
|
12
|
+
do_query
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
def login
|
17
|
+
page = agent.post('https://secure.nicovideo.jp/secure/login?site=niconico',
|
18
|
+
'mail' => @opt[:nico_video_mail],
|
19
|
+
'password' => @opt[:nico_video_password])
|
20
|
+
raise RuntimeError, 'login failure' unless page.header['x-niconico-authflag'] == '1'
|
21
|
+
end
|
22
|
+
|
23
|
+
def get_flv(id)
|
24
|
+
request_url = "http://www.nicovideo.jp/api/getflv?v=#{id}"
|
25
|
+
page = agent.get(request_url)
|
26
|
+
q = CGI.parse(page.body)
|
27
|
+
raise FileNotFound unless q['url']
|
28
|
+
@video_url = q['url'].first
|
29
|
+
end
|
30
|
+
|
31
|
+
def get_thumb(id)
|
32
|
+
page = agent.get("http://www.nicovideo.jp/api/getthumbinfo/#{id}")
|
33
|
+
xdoc = Hpricot.XML(page.body.toutf8)
|
34
|
+
xdoc.search('//thumbnail_url') do |elem|
|
35
|
+
@thumb_url = elem.inner_html
|
36
|
+
end
|
37
|
+
xdoc.search('//thumb/title') do |elem|
|
38
|
+
@title = elem.inner_html
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def get_embed_tag(id)
|
43
|
+
page = agent.get(@page_url)
|
44
|
+
response_body = page.body
|
45
|
+
doc = Hpricot(response_body)
|
46
|
+
doc.search('//form[@name="form_iframe"] //input[@name="input_iframe"]') do |elem|
|
47
|
+
@embed_tag = elem.attributes['value']
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def do_query
|
52
|
+
begin
|
53
|
+
login
|
54
|
+
id = url_regex_match[1]
|
55
|
+
get_flv(id)
|
56
|
+
get_thumb(id)
|
57
|
+
get_embed_tag(id)
|
58
|
+
rescue Timeout::Error => e
|
59
|
+
raise TryAgainLater, e.to_s
|
60
|
+
rescue WWW::Mechanize::ResponseCodeError => e
|
61
|
+
case e.response_code
|
62
|
+
when '404', '403'
|
63
|
+
raise FileNotFound, e.to_s
|
64
|
+
when '502'
|
65
|
+
raise TryAgainLater, e.to_s
|
66
|
+
else
|
67
|
+
raise TryAgainLater, e.to_s
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/base')
|
4
|
+
|
5
|
+
module WWW
|
6
|
+
module VideoScraper
|
7
|
+
class Pornhub < Base
|
8
|
+
url_regex %r|\Ahttp://www\.pornhub\.com/view_video\.php.*viewkey=[[:alnum:]]{20}|
|
9
|
+
|
10
|
+
def initialize(url, opt = nil)
|
11
|
+
super
|
12
|
+
do_query
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
def do_query
|
17
|
+
html = http_get(@page_url)
|
18
|
+
raise FileNotFound unless m = html.match(/\.addVariable\("options",\s*"([^"]+)"\);/i)
|
19
|
+
@request_url = URI.decode m[1]
|
20
|
+
@response_body = http_get(@request_url)
|
21
|
+
@video_url = @response_body.match(%r|<flv_url>([^<]+)</flv_url>|).to_a[1]
|
22
|
+
if m = @video_url.match(%r|videos/(\d{3}/\d{3}/\d{3})/\d+.flv|)
|
23
|
+
@thumb_url = "http://p1.pornhub.com/thumbs/#{m[1]}/small.jpg"
|
24
|
+
end
|
25
|
+
@embed_tag = html.match(%r|<textarea[^>]+class="share-flag-embed">(<object type="application/x-shockwave-flash".*?</object>)</textarea>|).to_a[1]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/base')
|
4
|
+
|
5
|
+
module WWW
|
6
|
+
module VideoScraper
|
7
|
+
class Pornotube < Base
|
8
|
+
url_regex %r!\Ahttp://(?:www\.)?pornotube\.com/(?:media|channels)\.php\?.*m=(\d+)!
|
9
|
+
|
10
|
+
def initialize(url, opt = nil)
|
11
|
+
super
|
12
|
+
do_query
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
def login
|
17
|
+
agent.post("http://pornotube.com/index.php",
|
18
|
+
'verifyAge' => 'true',
|
19
|
+
'bMonth' => '01',
|
20
|
+
'bDay' => '01',
|
21
|
+
'bYear' => '1970',
|
22
|
+
'submit' => 'View All Content')
|
23
|
+
end
|
24
|
+
|
25
|
+
def do_query
|
26
|
+
id = url_regex_match[1]
|
27
|
+
|
28
|
+
login
|
29
|
+
page = agent.get(@page_url)
|
30
|
+
raise FileNotFound unless embed = page.root.at('//object/embed')
|
31
|
+
src = embed.attributes['src']
|
32
|
+
hash = src.match(/\?v=(.*)$/)[1]
|
33
|
+
page = agent.get("http://pornotube.com/player/player.php?#{hash}")
|
34
|
+
q = CGI::parse(page.body)
|
35
|
+
@video_url = "http://#{q['mediaDomain'][0]}.pornotube.com/#{q['userId'][0]}/#{q['mediaId'][0]}.flv"
|
36
|
+
@thumb_url = "http://photo.pornotube.com/thumbnails/video/#{q['userId'][0]}/#{q['mediaId'][0]}.jpg";
|
37
|
+
@image_url = "http://photo.pornotube.com/thumbnails/video/#{q['userId'][0]}/#{q['mediaId'][0]}_full.jpg";
|
38
|
+
@embed_tag = q['embedCode'][0]
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/base')
|
4
|
+
|
5
|
+
module WWW
|
6
|
+
module VideoScraper
|
7
|
+
class RedTube < Base
|
8
|
+
url_regex %r|\Ahttp://www\.redtube\.com/(\d{4})|
|
9
|
+
|
10
|
+
def initialize(url, opt = nil)
|
11
|
+
super
|
12
|
+
do_query
|
13
|
+
end
|
14
|
+
|
15
|
+
def embed_tag
|
16
|
+
return @embed_tag if @embed_tag
|
17
|
+
url = "http://www.redtube.com/embed/#{content_id}"
|
18
|
+
response_body = http_get(url)
|
19
|
+
doc = Hpricot(response_body)
|
20
|
+
doc.search('//textarea#cpf') do |elem|
|
21
|
+
@embed_tag = elem.inner_html
|
22
|
+
end
|
23
|
+
@embed_tag
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
def content_id; url_regex_match[1]; end
|
28
|
+
|
29
|
+
def do_query
|
30
|
+
s = content_id || '0'
|
31
|
+
s = '1' if s.empty?
|
32
|
+
pathnr = s.to_i / 1000
|
33
|
+
s = "%07d" % s.to_i
|
34
|
+
pathnr = "%07d" % pathnr
|
35
|
+
xc = %w!R 1 5 3 4 2 O 7 K 9 H B C D X F G A I J 8 L M Z 6 P Q 0 S T U V W E Y N!
|
36
|
+
qsum = 0
|
37
|
+
s.length.times do |i|
|
38
|
+
qsum += s[i,1].to_i * (i + 1)
|
39
|
+
end
|
40
|
+
s1 = qsum.to_s
|
41
|
+
qsum = 0
|
42
|
+
s1.length.times do |i|
|
43
|
+
qsum += s1[i,1].to_i
|
44
|
+
end
|
45
|
+
qstr = "%02d" % qsum
|
46
|
+
code = ''
|
47
|
+
code += xc[s[3] - 48 + qsum + 3]
|
48
|
+
code += qstr[1,1]
|
49
|
+
code += xc[s[0] - 48 + qsum + 2]
|
50
|
+
code += xc[s[2] - 48 + qsum + 1]
|
51
|
+
code += xc[s[5] - 48 + qsum + 6]
|
52
|
+
code += xc[s[1] - 48 + qsum + 5]
|
53
|
+
code += qstr[0,1]
|
54
|
+
code += xc[s[4] - 48 + qsum + 7]
|
55
|
+
code += xc[s[6] - 48 + qsum + 4]
|
56
|
+
content_video = pathnr + '/' + code + '.flv'
|
57
|
+
@video_url = "http://dl.redtube.com/_videos_t4vn23s9jc5498tgj49icfj4678/#{content_video}"
|
58
|
+
# @thumb_url = "http://thumbs.redtube.com/_thumbs/#{pathnr}/#{s}/#{s}_#{'%03d' % i}.jpg"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/base')
|
4
|
+
|
5
|
+
module WWW
|
6
|
+
module VideoScraper
|
7
|
+
class Veoh < Base
|
8
|
+
url_regex %r!\Ahttp://www\.veoh\.com/videos/([[:alnum:]]+)!
|
9
|
+
|
10
|
+
def initialize(url, opt = nil)
|
11
|
+
super
|
12
|
+
do_query
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
def do_query
|
17
|
+
@id = url_regex_match[1]
|
18
|
+
request_url = "http://www.veoh.com/rest/video/#{@id}/details"
|
19
|
+
xml = http_get(request_url)
|
20
|
+
@video_url = xml.match(/fullPreviewHashPath="([^"]+)"/).to_a[1]
|
21
|
+
@title = xml.match(/title="([^"]+)"/).to_a[1]
|
22
|
+
@thumb_url = xml.match(/fullMedResImagePath="([^"]+)"/).to_a[1]
|
23
|
+
html = http_get(@page_url)
|
24
|
+
embed_tag = html.match(/\sid="embed"\s[^>]*value="([^"]+)"/).to_a[1]
|
25
|
+
@embed_tag = CGI.unescapeHTML embed_tag
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|