valda-video_scraper 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/ChangeLog ADDED
@@ -0,0 +1,4 @@
1
+ == 1.0.1 / 2009-01-17
2
+
3
+ * initial release
4
+
data/README ADDED
@@ -0,0 +1,69 @@
1
+ = WWW::VideoScraper
2
+
3
+ * http://coderepos.org/share/browser/lang/ruby/video_scraper
4
+ * http://github.com/valda/video_scraper/tree/master
5
+
6
+ == DESCRIPTION:
7
+
8
+ Web scraping library for video sharing sites.
9
+
10
+ == FEATURES/PROBLEMS:
11
+
12
+ Supported sites
13
+
14
+ * YouTube
15
+ * NICO NICO DOUGA
16
+ * AmebaVision
17
+ * Dailymotion
18
+ * Veoh
19
+ * YourFileHost
20
+ * RedTube
21
+ * Pornhub
22
+ * Ura Agesage
23
+ * MoroTube
24
+ * Pornotube
25
+ * YouPorn
26
+
27
+ == SYNOPSIS:
28
+
29
+ >> require 'www/video_scraper'
30
+ >> scraper = WWW::VideoScraper.scrape('http://www.youtube.com/watch?v=OFPnvARUOHI')
31
+ >> scraper.video_url
32
+ => "http://www.youtube.com/get_video?video_id=OFPnvARUOHI&t=OEgsToPDskIpQJU48rm4-sS1RtbItouY"
33
+ >> scraper.thumb_url
34
+ => "http://i.ytimg.com/vi/OFPnvARUOHI/default.jpg"
35
+
36
+ == REQUIREMENTS:
37
+
38
+ * WWW::Mechanize
39
+ * Hpricot
40
+ * CGIAlt (recommend)
41
+
42
+ == INSTALL:
43
+
44
+ * sudo gem install valda-video_scraper
45
+
46
+ == LICENSE:
47
+
48
+ (The MIT License)
49
+
50
+ Copyright (c) 2009 YAMAGUCHI Seiji <valda at underscore.jp>
51
+
52
+ Permission is hereby granted, free of charge, to any person obtaining
53
+ a copy of this software and associated documentation files (the
54
+ 'Software'), to deal in the Software without restriction, including
55
+ without limitation the rights to use, copy, modify, merge, publish,
56
+ distribute, sublicense, and/or sell copies of the Software, and to
57
+ permit persons to whom the Software is furnished to do so, subject to
58
+ the following conditions:
59
+
60
+ The above copyright notice and this permission notice shall be
61
+ included in all copies or substantial portions of the Software.
62
+
63
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
64
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
65
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
66
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
67
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
68
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
69
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,144 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/clean'
4
+ require 'rake/testtask'
5
+ require 'rake/packagetask'
6
+ require 'rake/gempackagetask'
7
+ require 'rake/rdoctask'
8
+ require 'rake/contrib/rubyforgepublisher'
9
+ require 'rake/contrib/sshpublisher'
10
+ require 'fileutils'
11
+ require 'lib/www/video_scraper'
12
+ include FileUtils
13
+
14
+ NAME = "video_scraper"
15
+ AUTHOR = "YAMAGUCHI Seiji"
16
+ EMAIL = "valda@underscore.jp"
17
+ DESCRIPTION = "Web scraping library for video sharing sites."
18
+ RUBYFORGE_PROJECT = "video_scraper"
19
+ HOMEPATH = "http://github.com/valda/video_scraper"
20
+ BIN_FILES = %w( )
21
+
22
+ VERS = WWW::VideoScraper::VERSION
23
+ REV = File.read(".svn/entries")[/committed-rev="(d+)"/, 1] rescue nil
24
+ CLEAN.include ['**/.*.sw?', '*.gem', '.config']
25
+ RDOC_OPTS = [
26
+ '--title', "#{NAME} documentation",
27
+ "--charset", "utf-8",
28
+ "--opname", "index.html",
29
+ "--line-numbers",
30
+ "--main", "README",
31
+ "--inline-source",
32
+ ]
33
+
34
+ task :default => [:test]
35
+ task :package => [:clean]
36
+
37
+ Rake::TestTask.new("test") do |t|
38
+ t.libs << "test"
39
+ t.pattern = "test/**/*_test.rb"
40
+ t.verbose = true
41
+ end
42
+
43
+ spec = Gem::Specification.new do |s|
44
+ s.name = NAME
45
+ s.version = VERS
46
+ s.platform = Gem::Platform::RUBY
47
+ s.has_rdoc = true
48
+ s.extra_rdoc_files = ["README", "ChangeLog"]
49
+ s.rdoc_options += RDOC_OPTS + ['--exclude', '^(examples|extras)/']
50
+ s.summary = DESCRIPTION
51
+ s.description = DESCRIPTION
52
+ s.author = AUTHOR
53
+ s.email = EMAIL
54
+ s.homepage = HOMEPATH
55
+ s.executables = BIN_FILES
56
+ s.rubyforge_project = RUBYFORGE_PROJECT
57
+ s.bindir = "bin"
58
+ s.require_path = "lib"
59
+ #s.autorequire = ""
60
+ s.test_files = Dir["test/*_test.rb"]
61
+
62
+ s.add_dependency('mechanize', '>=0.8.4')
63
+ #s.required_ruby_version = '>= 1.8.2'
64
+
65
+ s.files = %w(README ChangeLog Rakefile) +
66
+ Dir.glob("{bin,doc,test,lib,templates,generator,extras,website,script}/**/*") +
67
+ Dir.glob("ext/**/*.{h,c,rb}") +
68
+ Dir.glob("examples/**/*.rb") +
69
+ Dir.glob("tools/*.rb") +
70
+ Dir.glob("rails/*.rb")
71
+
72
+ s.extensions = FileList["ext/**/extconf.rb"].to_a
73
+ end
74
+
75
+ Rake::GemPackageTask.new(spec) do |p|
76
+ p.need_tar = true
77
+ p.gem_spec = spec
78
+ end
79
+
80
+ task :install do
81
+ name = "#{NAME}-#{VERS}.gem"
82
+ sh %{rake package}
83
+ sh %{sudo gem install pkg/#{name}}
84
+ end
85
+
86
+ task :uninstall => [:clean] do
87
+ sh %{sudo gem uninstall #{NAME}}
88
+ end
89
+
90
+
91
+ Rake::RDocTask.new do |rdoc|
92
+ rdoc.rdoc_dir = 'html'
93
+ rdoc.options += RDOC_OPTS
94
+ rdoc.template = "resh"
95
+ #rdoc.template = "#{ENV['template']}.rb" if ENV['template']
96
+ if ENV['DOC_FILES']
97
+ rdoc.rdoc_files.include(ENV['DOC_FILES'].split(/,\s*/))
98
+ else
99
+ rdoc.rdoc_files.include('README', 'ChangeLog')
100
+ rdoc.rdoc_files.include('lib/**/*.rb')
101
+ rdoc.rdoc_files.include('ext/**/*.c')
102
+ end
103
+ end
104
+
105
+ desc "Publish to RubyForge"
106
+ task :rubyforge => [:rdoc, :package] do
107
+ require 'rubyforge'
108
+ Rake::RubyForgePublisher.new(RUBYFORGE_PROJECT, 'yamaguchi').upload
109
+ end
110
+
111
+ desc 'Package and upload the release to rubyforge.'
112
+ task :release => [:clean, :package] do |t|
113
+ v = ENV["VERSION"] or abort "Must supply VERSION=x.y.z"
114
+ abort "Versions don't match #{v} vs #{VERS}" unless v == VERS
115
+ pkg = "pkg/#{NAME}-#{VERS}"
116
+
117
+ require 'rubyforge'
118
+ rf = RubyForge.new.configure
119
+ puts "Logging in"
120
+ rf.login
121
+
122
+ c = rf.userconfig
123
+ # c["release_notes"] = description if description
124
+ # c["release_changes"] = changes if changes
125
+ c["preformatted"] = true
126
+
127
+ files = [
128
+ "#{pkg}.tgz",
129
+ "#{pkg}.gem"
130
+ ].compact
131
+
132
+ puts "Releasing #{NAME} v. #{VERS}"
133
+ rf.add_release RUBYFORGE_PROJECT, NAME, VERS, *files
134
+ end
135
+
136
+ desc 'Show information about the gem.'
137
+ task :debug_gem do
138
+ puts spec.to_ruby
139
+ end
140
+
141
+ desc 'Update gem spec'
142
+ task :gemspec do
143
+ open("#{NAME}.gemspec", 'w').write spec.to_ruby
144
+ end
@@ -0,0 +1,34 @@
1
+ # -*- mode:ruby; coding:utf-8 -*-
2
+
3
+ require File.expand_path(File.dirname(__FILE__) + '/base')
4
+
5
+ module WWW
6
+ module VideoScraper
7
+ class AgeSage < Base
8
+ url_regex %r!\Ahttp://adult\.agesage\.jp/contentsPage\.html\?mcd=[[:alnum:]]{16}!
9
+
10
+ def initialize(url, opt = nil)
11
+ super
12
+ do_query
13
+ end
14
+
15
+ private
16
+ def do_query
17
+ @request_url = @page_url.sub('.html', '.xml')
18
+ @response_body = http_get(@request_url)
19
+ raise FileNotFound if @response_body.nil? or @response_body.empty?
20
+ xdoc = Hpricot.XML(@response_body.toutf8)
21
+ if movie = xdoc.at('/movie')
22
+ @video_url = movie.at('/movieurl').inner_html
23
+ @thumb_url = movie.at('/thumbnail').inner_html
24
+ @title = movie.at('/title').inner_html
25
+ mcd = @page_url.match(%r|agesage\.jp/contentsPage\.html\?mcd=([[:alnum:]]{16})|)[1]
26
+ @embed_tag = <<-HTML
27
+ <script type="text/javascript" src="http://adult.agesage.jp/js/past_uraui.js"></script>
28
+ <script type="text/javascript">Purauifla("mcd=#{mcd}", 320, 275);</script>
29
+ HTML
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,28 @@
1
+ # -*- mode:ruby; coding:utf-8 -*-
2
+
3
+ require File.expand_path(File.dirname(__FILE__) + '/base')
4
+
5
+ module WWW
6
+ module VideoScraper
7
+ class AmebaVision < Base
8
+ url_regex %r!\Ahttp://vision\.ameba\.jp/watch\.do.*?\?movie=(\d+)!
9
+
10
+ def initialize(url, opt = nil)
11
+ super
12
+ do_query
13
+ end
14
+
15
+ private
16
+ def do_query
17
+ id = url_regex_match[1]
18
+ request_url = "http://vision.ameba.jp/api/get/detailMovie.do?movie=#{id}"
19
+ xml = http_get(request_url)
20
+ xdoc = Hpricot.XML(xml.toutf8)
21
+ @title = xdoc.at('//item/title').inner_html
22
+ @page_url = xdoc.at('//item/link').inner_html
23
+ @thumb_url = xdoc.at('//item/imageUrlLarge').inner_html
24
+ @video_url = @thumb_url.sub('//vi', '//vm').sub('/jpg/', '/flv/').sub('_4.jpg', '.flv')
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,60 @@
1
+ # -*- mode:ruby; coding:utf-8 -*-
2
+
3
+ module WWW
4
+ module VideoScraper
5
+ class Base
6
+ attr_reader :page_url, :video_url, :thumb_url, :embed_tag, :title
7
+
8
+ ## class methods
9
+ class << self
10
+ def url_regex(regex)
11
+ @url_regex = regex
12
+ end
13
+
14
+ def valid_url?(url)
15
+ not (url =~ @url_regex).nil?
16
+ end
17
+ end
18
+
19
+ def initialize(url, opt = nil)
20
+ @page_url = url
21
+ @opt = (opt || {})
22
+ @url_regex_match = self.class.instance_variable_get(:@url_regex).match(@page_url).freeze
23
+ raise StandardError, "url is not #{self.class.name} link: #{url}" if @url_regex_match.nil?
24
+ end
25
+
26
+ private
27
+ def url_regex_match; @url_regex_match; end
28
+
29
+ def agent
30
+ @agent ||= WWW::Mechanize.new do |a|
31
+ a.user_agent_alias = 'Windows IE 6'
32
+ end
33
+ end
34
+
35
+ def http_get(url, opt = nil)
36
+ open_opt = {
37
+ "User-Agent" => "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)",
38
+ }.merge( opt || {} )
39
+ if @opt[:cache]
40
+ unless @opt[:cache].respond_to?(:get) and @opt[:cache].respond_to?(:set)
41
+ raise RuntimeError, 'As for cache object what responds to :get and :set is required.'
42
+ end
43
+ @opt[:logger].debug 'use cache.'
44
+ cache_key = "#{url}|#{open_opt}"
45
+ unless content = @opt[:cache].get(cache_key)
46
+ content = open(url, open_opt) {|fh| fh.read }
47
+ @opt[:cache].set(cache_key, content)
48
+ end
49
+ else
50
+ content = open(url, open_opt) {|fh| fh.read }
51
+ end
52
+ content
53
+ rescue OpenURI::HTTPError => e
54
+ raise TryAgainLater, e.to_s if e.to_s.include?('503')
55
+ raise e
56
+ end
57
+
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,36 @@
1
+ # -*- mode:ruby; coding:utf-8 -*-
2
+
3
+ require File.expand_path(File.dirname(__FILE__) + '/base')
4
+
5
+ module WWW
6
+ module VideoScraper
7
+ class Dailymotion < Base
8
+ url_regex %r!\Ahttp://www\.dailymotion\.com/.*?/video/([\w/-]+)!
9
+
10
+ def initialize(url, opt = nil)
11
+ super
12
+ do_query
13
+ end
14
+
15
+ private
16
+ def do_query
17
+ uri = URI.parse(@page_url)
18
+ html = http_get(@page_url)
19
+ doc = Hpricot(html.toutf8)
20
+ doc.search('//script').each do |elem|
21
+ if m = elem.inner_html.match(/\.addVariable\("video",\s*"([^"]+)"/i)
22
+ path = CGI.unescape(m[1]).split(/\|\||@@/).first
23
+ @video_url = URI.join("#{uri.scheme}://#{uri.host}", path).to_s
24
+ end
25
+ if m = elem.inner_html.match(/\.addVariable\("preview",\s+"([^"]+)"/)
26
+ path = CGI.unescape(m[1]).split(/\|\||@@/).first
27
+ @thumb_url = URI.join("#{uri.scheme}://#{uri.host}", path).to_s
28
+ end
29
+ end
30
+ @title = doc.at('//h1[@class="nav"]').inner_html rescue nil
31
+ @embed_tag = CGI.unescapeHTML(doc.at('//textarea[@id="video_player_embed_code_text"]').inner_html) rescue nil
32
+ end
33
+ end
34
+ end
35
+ end
36
+
@@ -0,0 +1,37 @@
1
+ # -*- mode:ruby; coding:utf-8 -*-
2
+
3
+ require File.expand_path(File.dirname(__FILE__) + '/base')
4
+
5
+ module WWW
6
+ module VideoScraper
7
+ class MoroTube < Base
8
+ url_regex %r!\Ahttp://www\.morotube\.com/watch\.php\?clip=([[:alnum:]]{8})!
9
+ attr_reader :author, :duration
10
+
11
+ def initialize(url, opt = nil)
12
+ super
13
+ do_query
14
+ end
15
+
16
+ private
17
+ def do_query
18
+ uri = URI.parse(@page_url)
19
+ uri.path = '/gen_xml.php'
20
+ uri.query = "type=o&id=#{url_regex_match[1]}"
21
+ xml = http_get(uri.to_s)
22
+ xdoc = Hpricot.XML(xml.toutf8)
23
+ @title = xdoc.search('/root/video/title').inner_html
24
+ @video_url = xdoc.search('/root/video/file').inner_html
25
+ @thumb_url = xdoc.search('/root/video/image').inner_html
26
+ @author = xdoc.search('/root/video/author').inner_html
27
+ @duration = xdoc.search('/root/video/duration').inner_html
28
+
29
+ html = http_get(@page_url)
30
+ doc = Hpricot(html)
31
+ doc.search('//input#inpVdoEmbed') do |elem|
32
+ @embed_tag = elem.attributes['value']
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,73 @@
1
+ # -*- mode:ruby; coding:utf-8 -*-
2
+
3
+ require File.expand_path(File.dirname(__FILE__) + '/base')
4
+
5
+ module WWW
6
+ module VideoScraper
7
+ class NicoVideo < Base
8
+ url_regex %r!\Ahttp://www\.nicovideo\.jp/watch/([[:alnum:]]+)!
9
+
10
+ def initialize(url, opt = nil)
11
+ super
12
+ do_query
13
+ end
14
+
15
+ private
16
+ def login
17
+ page = agent.post('https://secure.nicovideo.jp/secure/login?site=niconico',
18
+ 'mail' => @opt[:nico_video_mail],
19
+ 'password' => @opt[:nico_video_password])
20
+ raise RuntimeError, 'login failure' unless page.header['x-niconico-authflag'] == '1'
21
+ end
22
+
23
+ def get_flv(id)
24
+ request_url = "http://www.nicovideo.jp/api/getflv?v=#{id}"
25
+ page = agent.get(request_url)
26
+ q = CGI.parse(page.body)
27
+ raise FileNotFound unless q['url']
28
+ @video_url = q['url'].first
29
+ end
30
+
31
+ def get_thumb(id)
32
+ page = agent.get("http://www.nicovideo.jp/api/getthumbinfo/#{id}")
33
+ xdoc = Hpricot.XML(page.body.toutf8)
34
+ xdoc.search('//thumbnail_url') do |elem|
35
+ @thumb_url = elem.inner_html
36
+ end
37
+ xdoc.search('//thumb/title') do |elem|
38
+ @title = elem.inner_html
39
+ end
40
+ end
41
+
42
+ def get_embed_tag(id)
43
+ page = agent.get(@page_url)
44
+ response_body = page.body
45
+ doc = Hpricot(response_body)
46
+ doc.search('//form[@name="form_iframe"] //input[@name="input_iframe"]') do |elem|
47
+ @embed_tag = elem.attributes['value']
48
+ end
49
+ end
50
+
51
+ def do_query
52
+ begin
53
+ login
54
+ id = url_regex_match[1]
55
+ get_flv(id)
56
+ get_thumb(id)
57
+ get_embed_tag(id)
58
+ rescue Timeout::Error => e
59
+ raise TryAgainLater, e.to_s
60
+ rescue WWW::Mechanize::ResponseCodeError => e
61
+ case e.response_code
62
+ when '404', '403'
63
+ raise FileNotFound, e.to_s
64
+ when '502'
65
+ raise TryAgainLater, e.to_s
66
+ else
67
+ raise TryAgainLater, e.to_s
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,29 @@
1
+ # -*- mode:ruby; coding:utf-8 -*-
2
+
3
+ require File.expand_path(File.dirname(__FILE__) + '/base')
4
+
5
+ module WWW
6
+ module VideoScraper
7
+ class Pornhub < Base
8
+ url_regex %r|\Ahttp://www\.pornhub\.com/view_video\.php.*viewkey=[[:alnum:]]{20}|
9
+
10
+ def initialize(url, opt = nil)
11
+ super
12
+ do_query
13
+ end
14
+
15
+ private
16
+ def do_query
17
+ html = http_get(@page_url)
18
+ raise FileNotFound unless m = html.match(/\.addVariable\("options",\s*"([^"]+)"\);/i)
19
+ @request_url = URI.decode m[1]
20
+ @response_body = http_get(@request_url)
21
+ @video_url = @response_body.match(%r|<flv_url>([^<]+)</flv_url>|).to_a[1]
22
+ if m = @video_url.match(%r|videos/(\d{3}/\d{3}/\d{3})/\d+.flv|)
23
+ @thumb_url = "http://p1.pornhub.com/thumbs/#{m[1]}/small.jpg"
24
+ end
25
+ @embed_tag = html.match(%r|<textarea[^>]+class="share-flag-embed">(<object type="application/x-shockwave-flash".*?</object>)</textarea>|).to_a[1]
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,42 @@
1
+ # -*- mode:ruby; coding:utf-8 -*-
2
+
3
+ require File.expand_path(File.dirname(__FILE__) + '/base')
4
+
5
+ module WWW
6
+ module VideoScraper
7
+ class Pornotube < Base
8
+ url_regex %r!\Ahttp://(?:www\.)?pornotube\.com/(?:media|channels)\.php\?.*m=(\d+)!
9
+
10
+ def initialize(url, opt = nil)
11
+ super
12
+ do_query
13
+ end
14
+
15
+ private
16
+ def login
17
+ agent.post("http://pornotube.com/index.php",
18
+ 'verifyAge' => 'true',
19
+ 'bMonth' => '01',
20
+ 'bDay' => '01',
21
+ 'bYear' => '1970',
22
+ 'submit' => 'View All Content')
23
+ end
24
+
25
+ def do_query
26
+ id = url_regex_match[1]
27
+
28
+ login
29
+ page = agent.get(@page_url)
30
+ raise FileNotFound unless embed = page.root.at('//object/embed')
31
+ src = embed.attributes['src']
32
+ hash = src.match(/\?v=(.*)$/)[1]
33
+ page = agent.get("http://pornotube.com/player/player.php?#{hash}")
34
+ q = CGI::parse(page.body)
35
+ @video_url = "http://#{q['mediaDomain'][0]}.pornotube.com/#{q['userId'][0]}/#{q['mediaId'][0]}.flv"
36
+ @thumb_url = "http://photo.pornotube.com/thumbnails/video/#{q['userId'][0]}/#{q['mediaId'][0]}.jpg";
37
+ @image_url = "http://photo.pornotube.com/thumbnails/video/#{q['userId'][0]}/#{q['mediaId'][0]}_full.jpg";
38
+ @embed_tag = q['embedCode'][0]
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,62 @@
1
+ # -*- mode:ruby; coding:utf-8 -*-
2
+
3
+ require File.expand_path(File.dirname(__FILE__) + '/base')
4
+
5
+ module WWW
6
+ module VideoScraper
7
+ class RedTube < Base
8
+ url_regex %r|\Ahttp://www\.redtube\.com/(\d{4})|
9
+
10
+ def initialize(url, opt = nil)
11
+ super
12
+ do_query
13
+ end
14
+
15
+ def embed_tag
16
+ return @embed_tag if @embed_tag
17
+ url = "http://www.redtube.com/embed/#{content_id}"
18
+ response_body = http_get(url)
19
+ doc = Hpricot(response_body)
20
+ doc.search('//textarea#cpf') do |elem|
21
+ @embed_tag = elem.inner_html
22
+ end
23
+ @embed_tag
24
+ end
25
+
26
+ private
27
+ def content_id; url_regex_match[1]; end
28
+
29
+ def do_query
30
+ s = content_id || '0'
31
+ s = '1' if s.empty?
32
+ pathnr = s.to_i / 1000
33
+ s = "%07d" % s.to_i
34
+ pathnr = "%07d" % pathnr
35
+ xc = %w!R 1 5 3 4 2 O 7 K 9 H B C D X F G A I J 8 L M Z 6 P Q 0 S T U V W E Y N!
36
+ qsum = 0
37
+ s.length.times do |i|
38
+ qsum += s[i,1].to_i * (i + 1)
39
+ end
40
+ s1 = qsum.to_s
41
+ qsum = 0
42
+ s1.length.times do |i|
43
+ qsum += s1[i,1].to_i
44
+ end
45
+ qstr = "%02d" % qsum
46
+ code = ''
47
+ code += xc[s[3] - 48 + qsum + 3]
48
+ code += qstr[1,1]
49
+ code += xc[s[0] - 48 + qsum + 2]
50
+ code += xc[s[2] - 48 + qsum + 1]
51
+ code += xc[s[5] - 48 + qsum + 6]
52
+ code += xc[s[1] - 48 + qsum + 5]
53
+ code += qstr[0,1]
54
+ code += xc[s[4] - 48 + qsum + 7]
55
+ code += xc[s[6] - 48 + qsum + 4]
56
+ content_video = pathnr + '/' + code + '.flv'
57
+ @video_url = "http://dl.redtube.com/_videos_t4vn23s9jc5498tgj49icfj4678/#{content_video}"
58
+ # @thumb_url = "http://thumbs.redtube.com/_thumbs/#{pathnr}/#{s}/#{s}_#{'%03d' % i}.jpg"
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,30 @@
1
+ # -*- mode:ruby; coding:utf-8 -*-
2
+
3
+ require File.expand_path(File.dirname(__FILE__) + '/base')
4
+
5
+ module WWW
6
+ module VideoScraper
7
+ class Veoh < Base
8
+ url_regex %r!\Ahttp://www\.veoh\.com/videos/([[:alnum:]]+)!
9
+
10
+ def initialize(url, opt = nil)
11
+ super
12
+ do_query
13
+ end
14
+
15
+ private
16
+ def do_query
17
+ @id = url_regex_match[1]
18
+ request_url = "http://www.veoh.com/rest/video/#{@id}/details"
19
+ xml = http_get(request_url)
20
+ @video_url = xml.match(/fullPreviewHashPath="([^"]+)"/).to_a[1]
21
+ @title = xml.match(/title="([^"]+)"/).to_a[1]
22
+ @thumb_url = xml.match(/fullMedResImagePath="([^"]+)"/).to_a[1]
23
+ html = http_get(@page_url)
24
+ embed_tag = html.match(/\sid="embed"\s[^>]*value="([^"]+)"/).to_a[1]
25
+ @embed_tag = CGI.unescapeHTML embed_tag
26
+ end
27
+ end
28
+ end
29
+ end
30
+