valda-video_scraper 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ChangeLog ADDED
@@ -0,0 +1,4 @@
1
+ == 1.0.1 / 2009-01-17
2
+
3
+ * initial release
4
+
data/README ADDED
@@ -0,0 +1,69 @@
1
+ = WWW::VideoScraper
2
+
3
+ * http://coderepos.org/share/browser/lang/ruby/video_scraper
4
+ * http://github.com/valda/video_scraper/tree/master
5
+
6
+ == DESCRIPTION:
7
+
8
+ Web scraping library for video sharing sites.
9
+
10
+ == FEATURES/PROBLEMS:
11
+
12
+ Supported sites
13
+
14
+ * YouTube
15
+ * NICO NICO DOUGA
16
+ * AmebaVision
17
+ * Dailymotion
18
+ * Veoh
19
+ * YourFileHost
20
+ * RedTube
21
+ * Pornhub
22
+ * Ura Agesage
23
+ * MoroTube
24
+ * Pornotube
25
+ * YouPorn
26
+
27
+ == SYNOPSIS:
28
+
29
+ >> require 'www/video_scraper'
30
+ >> scraper = WWW::VideoScraper.scrape('http://www.youtube.com/watch?v=OFPnvARUOHI')
31
+ >> scraper.video_url
32
+ => "http://www.youtube.com/get_video?video_id=OFPnvARUOHI&t=OEgsToPDskIpQJU48rm4-sS1RtbItouY"
33
+ >> scraper.thumb_url
34
+ => "http://i.ytimg.com/vi/OFPnvARUOHI/default.jpg"
35
+
36
+ == REQUIREMENTS:
37
+
38
+ * WWW::Mechanize
39
+ * Hpricot
40
+ * CGIAlt (recommend)
41
+
42
+ == INSTALL:
43
+
44
+ * sudo gem install valda-video_scraper
45
+
46
+ == LICENSE:
47
+
48
+ (The MIT License)
49
+
50
+ Copyright (c) 2009 YAMAGUCHI Seiji <valda at underscore.jp>
51
+
52
+ Permission is hereby granted, free of charge, to any person obtaining
53
+ a copy of this software and associated documentation files (the
54
+ 'Software'), to deal in the Software without restriction, including
55
+ without limitation the rights to use, copy, modify, merge, publish,
56
+ distribute, sublicense, and/or sell copies of the Software, and to
57
+ permit persons to whom the Software is furnished to do so, subject to
58
+ the following conditions:
59
+
60
+ The above copyright notice and this permission notice shall be
61
+ included in all copies or substantial portions of the Software.
62
+
63
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
64
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
65
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
66
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
67
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
68
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
69
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,144 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/clean'
4
+ require 'rake/testtask'
5
+ require 'rake/packagetask'
6
+ require 'rake/gempackagetask'
7
+ require 'rake/rdoctask'
8
+ require 'rake/contrib/rubyforgepublisher'
9
+ require 'rake/contrib/sshpublisher'
10
+ require 'fileutils'
11
+ require 'lib/www/video_scraper'
12
+ include FileUtils
13
+
14
+ NAME = "video_scraper"
15
+ AUTHOR = "YAMAGUCHI Seiji"
16
+ EMAIL = "valda@underscore.jp"
17
+ DESCRIPTION = "Web scraping library for video sharing sites."
18
+ RUBYFORGE_PROJECT = "video_scraper"
19
+ HOMEPATH = "http://github.com/valda/video_scraper"
20
+ BIN_FILES = %w( )
21
+
22
+ VERS = WWW::VideoScraper::VERSION
23
+ REV = File.read(".svn/entries")[/committed-rev="(d+)"/, 1] rescue nil
24
+ CLEAN.include ['**/.*.sw?', '*.gem', '.config']
25
+ RDOC_OPTS = [
26
+ '--title', "#{NAME} documentation",
27
+ "--charset", "utf-8",
28
+ "--opname", "index.html",
29
+ "--line-numbers",
30
+ "--main", "README",
31
+ "--inline-source",
32
+ ]
33
+
34
+ task :default => [:test]
35
+ task :package => [:clean]
36
+
37
+ Rake::TestTask.new("test") do |t|
38
+ t.libs << "test"
39
+ t.pattern = "test/**/*_test.rb"
40
+ t.verbose = true
41
+ end
42
+
43
+ spec = Gem::Specification.new do |s|
44
+ s.name = NAME
45
+ s.version = VERS
46
+ s.platform = Gem::Platform::RUBY
47
+ s.has_rdoc = true
48
+ s.extra_rdoc_files = ["README", "ChangeLog"]
49
+ s.rdoc_options += RDOC_OPTS + ['--exclude', '^(examples|extras)/']
50
+ s.summary = DESCRIPTION
51
+ s.description = DESCRIPTION
52
+ s.author = AUTHOR
53
+ s.email = EMAIL
54
+ s.homepage = HOMEPATH
55
+ s.executables = BIN_FILES
56
+ s.rubyforge_project = RUBYFORGE_PROJECT
57
+ s.bindir = "bin"
58
+ s.require_path = "lib"
59
+ #s.autorequire = ""
60
+ s.test_files = Dir["test/*_test.rb"]
61
+
62
+ s.add_dependency('mechanize', '>=0.8.4')
63
+ #s.required_ruby_version = '>= 1.8.2'
64
+
65
+ s.files = %w(README ChangeLog Rakefile) +
66
+ Dir.glob("{bin,doc,test,lib,templates,generator,extras,website,script}/**/*") +
67
+ Dir.glob("ext/**/*.{h,c,rb}") +
68
+ Dir.glob("examples/**/*.rb") +
69
+ Dir.glob("tools/*.rb") +
70
+ Dir.glob("rails/*.rb")
71
+
72
+ s.extensions = FileList["ext/**/extconf.rb"].to_a
73
+ end
74
+
75
+ Rake::GemPackageTask.new(spec) do |p|
76
+ p.need_tar = true
77
+ p.gem_spec = spec
78
+ end
79
+
80
+ task :install do
81
+ name = "#{NAME}-#{VERS}.gem"
82
+ sh %{rake package}
83
+ sh %{sudo gem install pkg/#{name}}
84
+ end
85
+
86
+ task :uninstall => [:clean] do
87
+ sh %{sudo gem uninstall #{NAME}}
88
+ end
89
+
90
+
91
+ Rake::RDocTask.new do |rdoc|
92
+ rdoc.rdoc_dir = 'html'
93
+ rdoc.options += RDOC_OPTS
94
+ rdoc.template = "resh"
95
+ #rdoc.template = "#{ENV['template']}.rb" if ENV['template']
96
+ if ENV['DOC_FILES']
97
+ rdoc.rdoc_files.include(ENV['DOC_FILES'].split(/,\s*/))
98
+ else
99
+ rdoc.rdoc_files.include('README', 'ChangeLog')
100
+ rdoc.rdoc_files.include('lib/**/*.rb')
101
+ rdoc.rdoc_files.include('ext/**/*.c')
102
+ end
103
+ end
104
+
105
+ desc "Publish to RubyForge"
106
+ task :rubyforge => [:rdoc, :package] do
107
+ require 'rubyforge'
108
+ Rake::RubyForgePublisher.new(RUBYFORGE_PROJECT, 'yamaguchi').upload
109
+ end
110
+
111
+ desc 'Package and upload the release to rubyforge.'
112
+ task :release => [:clean, :package] do |t|
113
+ v = ENV["VERSION"] or abort "Must supply VERSION=x.y.z"
114
+ abort "Versions don't match #{v} vs #{VERS}" unless v == VERS
115
+ pkg = "pkg/#{NAME}-#{VERS}"
116
+
117
+ require 'rubyforge'
118
+ rf = RubyForge.new.configure
119
+ puts "Logging in"
120
+ rf.login
121
+
122
+ c = rf.userconfig
123
+ # c["release_notes"] = description if description
124
+ # c["release_changes"] = changes if changes
125
+ c["preformatted"] = true
126
+
127
+ files = [
128
+ "#{pkg}.tgz",
129
+ "#{pkg}.gem"
130
+ ].compact
131
+
132
+ puts "Releasing #{NAME} v. #{VERS}"
133
+ rf.add_release RUBYFORGE_PROJECT, NAME, VERS, *files
134
+ end
135
+
136
+ desc 'Show information about the gem.'
137
+ task :debug_gem do
138
+ puts spec.to_ruby
139
+ end
140
+
141
+ desc 'Update gem spec'
142
+ task :gemspec do
143
+ open("#{NAME}.gemspec", 'w').write spec.to_ruby
144
+ end
@@ -0,0 +1,34 @@
1
+ # -*- mode:ruby; coding:utf-8 -*-
2
+
3
+ require File.expand_path(File.dirname(__FILE__) + '/base')
4
+
5
+ module WWW
6
+ module VideoScraper
7
+ class AgeSage < Base
8
+ url_regex %r!\Ahttp://adult\.agesage\.jp/contentsPage\.html\?mcd=[[:alnum:]]{16}!
9
+
10
+ def initialize(url, opt = nil)
11
+ super
12
+ do_query
13
+ end
14
+
15
+ private
16
+ def do_query
17
+ @request_url = @page_url.sub('.html', '.xml')
18
+ @response_body = http_get(@request_url)
19
+ raise FileNotFound if @response_body.nil? or @response_body.empty?
20
+ xdoc = Hpricot.XML(@response_body.toutf8)
21
+ if movie = xdoc.at('/movie')
22
+ @video_url = movie.at('/movieurl').inner_html
23
+ @thumb_url = movie.at('/thumbnail').inner_html
24
+ @title = movie.at('/title').inner_html
25
+ mcd = @page_url.match(%r|agesage\.jp/contentsPage\.html\?mcd=([[:alnum:]]{16})|)[1]
26
+ @embed_tag = <<-HTML
27
+ <script type="text/javascript" src="http://adult.agesage.jp/js/past_uraui.js"></script>
28
+ <script type="text/javascript">Purauifla("mcd=#{mcd}", 320, 275);</script>
29
+ HTML
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,28 @@
1
+ # -*- mode:ruby; coding:utf-8 -*-
2
+
3
+ require File.expand_path(File.dirname(__FILE__) + '/base')
4
+
5
+ module WWW
6
+ module VideoScraper
7
+ class AmebaVision < Base
8
+ url_regex %r!\Ahttp://vision\.ameba\.jp/watch\.do.*?\?movie=(\d+)!
9
+
10
+ def initialize(url, opt = nil)
11
+ super
12
+ do_query
13
+ end
14
+
15
+ private
16
+ def do_query
17
+ id = url_regex_match[1]
18
+ request_url = "http://vision.ameba.jp/api/get/detailMovie.do?movie=#{id}"
19
+ xml = http_get(request_url)
20
+ xdoc = Hpricot.XML(xml.toutf8)
21
+ @title = xdoc.at('//item/title').inner_html
22
+ @page_url = xdoc.at('//item/link').inner_html
23
+ @thumb_url = xdoc.at('//item/imageUrlLarge').inner_html
24
+ @video_url = @thumb_url.sub('//vi', '//vm').sub('/jpg/', '/flv/').sub('_4.jpg', '.flv')
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,60 @@
1
+ # -*- mode:ruby; coding:utf-8 -*-
2
+
3
+ module WWW
4
+ module VideoScraper
5
+ class Base
6
+ attr_reader :page_url, :video_url, :thumb_url, :embed_tag, :title
7
+
8
+ ## class methods
9
+ class << self
10
+ def url_regex(regex)
11
+ @url_regex = regex
12
+ end
13
+
14
+ def valid_url?(url)
15
+ not (url =~ @url_regex).nil?
16
+ end
17
+ end
18
+
19
+ def initialize(url, opt = nil)
20
+ @page_url = url
21
+ @opt = (opt || {})
22
+ @url_regex_match = self.class.instance_variable_get(:@url_regex).match(@page_url).freeze
23
+ raise StandardError, "url is not #{self.class.name} link: #{url}" if @url_regex_match.nil?
24
+ end
25
+
26
+ private
27
+ def url_regex_match; @url_regex_match; end
28
+
29
+ def agent
30
+ @agent ||= WWW::Mechanize.new do |a|
31
+ a.user_agent_alias = 'Windows IE 6'
32
+ end
33
+ end
34
+
35
+ def http_get(url, opt = nil)
36
+ open_opt = {
37
+ "User-Agent" => "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)",
38
+ }.merge( opt || {} )
39
+ if @opt[:cache]
40
+ unless @opt[:cache].respond_to?(:get) and @opt[:cache].respond_to?(:set)
41
+ raise RuntimeError, 'As for cache object what responds to :get and :set is required.'
42
+ end
43
+ @opt[:logger].debug 'use cache.'
44
+ cache_key = "#{url}|#{open_opt}"
45
+ unless content = @opt[:cache].get(cache_key)
46
+ content = open(url, open_opt) {|fh| fh.read }
47
+ @opt[:cache].set(cache_key, content)
48
+ end
49
+ else
50
+ content = open(url, open_opt) {|fh| fh.read }
51
+ end
52
+ content
53
+ rescue OpenURI::HTTPError => e
54
+ raise TryAgainLater, e.to_s if e.to_s.include?('503')
55
+ raise e
56
+ end
57
+
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,36 @@
1
+ # -*- mode:ruby; coding:utf-8 -*-
2
+
3
+ require File.expand_path(File.dirname(__FILE__) + '/base')
4
+
5
+ module WWW
6
+ module VideoScraper
7
+ class Dailymotion < Base
8
+ url_regex %r!\Ahttp://www\.dailymotion\.com/.*?/video/([\w/-]+)!
9
+
10
+ def initialize(url, opt = nil)
11
+ super
12
+ do_query
13
+ end
14
+
15
+ private
16
+ def do_query
17
+ uri = URI.parse(@page_url)
18
+ html = http_get(@page_url)
19
+ doc = Hpricot(html.toutf8)
20
+ doc.search('//script').each do |elem|
21
+ if m = elem.inner_html.match(/\.addVariable\("video",\s*"([^"]+)"/i)
22
+ path = CGI.unescape(m[1]).split(/\|\||@@/).first
23
+ @video_url = URI.join("#{uri.scheme}://#{uri.host}", path).to_s
24
+ end
25
+ if m = elem.inner_html.match(/\.addVariable\("preview",\s+"([^"]+)"/)
26
+ path = CGI.unescape(m[1]).split(/\|\||@@/).first
27
+ @thumb_url = URI.join("#{uri.scheme}://#{uri.host}", path).to_s
28
+ end
29
+ end
30
+ @title = doc.at('//h1[@class="nav"]').inner_html rescue nil
31
+ @embed_tag = CGI.unescapeHTML(doc.at('//textarea[@id="video_player_embed_code_text"]').inner_html) rescue nil
32
+ end
33
+ end
34
+ end
35
+ end
36
+
@@ -0,0 +1,37 @@
1
+ # -*- mode:ruby; coding:utf-8 -*-
2
+
3
+ require File.expand_path(File.dirname(__FILE__) + '/base')
4
+
5
+ module WWW
6
+ module VideoScraper
7
+ class MoroTube < Base
8
+ url_regex %r!\Ahttp://www\.morotube\.com/watch\.php\?clip=([[:alnum:]]{8})!
9
+ attr_reader :author, :duration
10
+
11
+ def initialize(url, opt = nil)
12
+ super
13
+ do_query
14
+ end
15
+
16
+ private
17
+ def do_query
18
+ uri = URI.parse(@page_url)
19
+ uri.path = '/gen_xml.php'
20
+ uri.query = "type=o&id=#{url_regex_match[1]}"
21
+ xml = http_get(uri.to_s)
22
+ xdoc = Hpricot.XML(xml.toutf8)
23
+ @title = xdoc.search('/root/video/title').inner_html
24
+ @video_url = xdoc.search('/root/video/file').inner_html
25
+ @thumb_url = xdoc.search('/root/video/image').inner_html
26
+ @author = xdoc.search('/root/video/author').inner_html
27
+ @duration = xdoc.search('/root/video/duration').inner_html
28
+
29
+ html = http_get(@page_url)
30
+ doc = Hpricot(html)
31
+ doc.search('//input#inpVdoEmbed') do |elem|
32
+ @embed_tag = elem.attributes['value']
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,73 @@
1
+ # -*- mode:ruby; coding:utf-8 -*-
2
+
3
+ require File.expand_path(File.dirname(__FILE__) + '/base')
4
+
5
+ module WWW
6
+ module VideoScraper
7
+ class NicoVideo < Base
8
+ url_regex %r!\Ahttp://www\.nicovideo\.jp/watch/([[:alnum:]]+)!
9
+
10
+ def initialize(url, opt = nil)
11
+ super
12
+ do_query
13
+ end
14
+
15
+ private
16
+ def login
17
+ page = agent.post('https://secure.nicovideo.jp/secure/login?site=niconico',
18
+ 'mail' => @opt[:nico_video_mail],
19
+ 'password' => @opt[:nico_video_password])
20
+ raise RuntimeError, 'login failure' unless page.header['x-niconico-authflag'] == '1'
21
+ end
22
+
23
+ def get_flv(id)
24
+ request_url = "http://www.nicovideo.jp/api/getflv?v=#{id}"
25
+ page = agent.get(request_url)
26
+ q = CGI.parse(page.body)
27
+ raise FileNotFound unless q['url']
28
+ @video_url = q['url'].first
29
+ end
30
+
31
+ def get_thumb(id)
32
+ page = agent.get("http://www.nicovideo.jp/api/getthumbinfo/#{id}")
33
+ xdoc = Hpricot.XML(page.body.toutf8)
34
+ xdoc.search('//thumbnail_url') do |elem|
35
+ @thumb_url = elem.inner_html
36
+ end
37
+ xdoc.search('//thumb/title') do |elem|
38
+ @title = elem.inner_html
39
+ end
40
+ end
41
+
42
+ def get_embed_tag(id)
43
+ page = agent.get(@page_url)
44
+ response_body = page.body
45
+ doc = Hpricot(response_body)
46
+ doc.search('//form[@name="form_iframe"] //input[@name="input_iframe"]') do |elem|
47
+ @embed_tag = elem.attributes['value']
48
+ end
49
+ end
50
+
51
+ def do_query
52
+ begin
53
+ login
54
+ id = url_regex_match[1]
55
+ get_flv(id)
56
+ get_thumb(id)
57
+ get_embed_tag(id)
58
+ rescue Timeout::Error => e
59
+ raise TryAgainLater, e.to_s
60
+ rescue WWW::Mechanize::ResponseCodeError => e
61
+ case e.response_code
62
+ when '404', '403'
63
+ raise FileNotFound, e.to_s
64
+ when '502'
65
+ raise TryAgainLater, e.to_s
66
+ else
67
+ raise TryAgainLater, e.to_s
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,29 @@
1
+ # -*- mode:ruby; coding:utf-8 -*-
2
+
3
+ require File.expand_path(File.dirname(__FILE__) + '/base')
4
+
5
+ module WWW
6
+ module VideoScraper
7
+ class Pornhub < Base
8
+ url_regex %r|\Ahttp://www\.pornhub\.com/view_video\.php.*viewkey=[[:alnum:]]{20}|
9
+
10
+ def initialize(url, opt = nil)
11
+ super
12
+ do_query
13
+ end
14
+
15
+ private
16
+ def do_query
17
+ html = http_get(@page_url)
18
+ raise FileNotFound unless m = html.match(/\.addVariable\("options",\s*"([^"]+)"\);/i)
19
+ @request_url = URI.decode m[1]
20
+ @response_body = http_get(@request_url)
21
+ @video_url = @response_body.match(%r|<flv_url>([^<]+)</flv_url>|).to_a[1]
22
+ if m = @video_url.match(%r|videos/(\d{3}/\d{3}/\d{3})/\d+.flv|)
23
+ @thumb_url = "http://p1.pornhub.com/thumbs/#{m[1]}/small.jpg"
24
+ end
25
+ @embed_tag = html.match(%r|<textarea[^>]+class="share-flag-embed">(<object type="application/x-shockwave-flash".*?</object>)</textarea>|).to_a[1]
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,42 @@
1
+ # -*- mode:ruby; coding:utf-8 -*-
2
+
3
+ require File.expand_path(File.dirname(__FILE__) + '/base')
4
+
5
+ module WWW
6
+ module VideoScraper
7
+ class Pornotube < Base
8
+ url_regex %r!\Ahttp://(?:www\.)?pornotube\.com/(?:media|channels)\.php\?.*m=(\d+)!
9
+
10
+ def initialize(url, opt = nil)
11
+ super
12
+ do_query
13
+ end
14
+
15
+ private
16
+ def login
17
+ agent.post("http://pornotube.com/index.php",
18
+ 'verifyAge' => 'true',
19
+ 'bMonth' => '01',
20
+ 'bDay' => '01',
21
+ 'bYear' => '1970',
22
+ 'submit' => 'View All Content')
23
+ end
24
+
25
+ def do_query
26
+ id = url_regex_match[1]
27
+
28
+ login
29
+ page = agent.get(@page_url)
30
+ raise FileNotFound unless embed = page.root.at('//object/embed')
31
+ src = embed.attributes['src']
32
+ hash = src.match(/\?v=(.*)$/)[1]
33
+ page = agent.get("http://pornotube.com/player/player.php?#{hash}")
34
+ q = CGI::parse(page.body)
35
+ @video_url = "http://#{q['mediaDomain'][0]}.pornotube.com/#{q['userId'][0]}/#{q['mediaId'][0]}.flv"
36
+ @thumb_url = "http://photo.pornotube.com/thumbnails/video/#{q['userId'][0]}/#{q['mediaId'][0]}.jpg";
37
+ @image_url = "http://photo.pornotube.com/thumbnails/video/#{q['userId'][0]}/#{q['mediaId'][0]}_full.jpg";
38
+ @embed_tag = q['embedCode'][0]
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,62 @@
1
+ # -*- mode:ruby; coding:utf-8 -*-
2
+
3
+ require File.expand_path(File.dirname(__FILE__) + '/base')
4
+
5
+ module WWW
6
+ module VideoScraper
7
+ class RedTube < Base
8
+ url_regex %r|\Ahttp://www\.redtube\.com/(\d{4})|
9
+
10
+ def initialize(url, opt = nil)
11
+ super
12
+ do_query
13
+ end
14
+
15
+ def embed_tag
16
+ return @embed_tag if @embed_tag
17
+ url = "http://www.redtube.com/embed/#{content_id}"
18
+ response_body = http_get(url)
19
+ doc = Hpricot(response_body)
20
+ doc.search('//textarea#cpf') do |elem|
21
+ @embed_tag = elem.inner_html
22
+ end
23
+ @embed_tag
24
+ end
25
+
26
+ private
27
+ def content_id; url_regex_match[1]; end
28
+
29
+ def do_query
30
+ s = content_id || '0'
31
+ s = '1' if s.empty?
32
+ pathnr = s.to_i / 1000
33
+ s = "%07d" % s.to_i
34
+ pathnr = "%07d" % pathnr
35
+ xc = %w!R 1 5 3 4 2 O 7 K 9 H B C D X F G A I J 8 L M Z 6 P Q 0 S T U V W E Y N!
36
+ qsum = 0
37
+ s.length.times do |i|
38
+ qsum += s[i,1].to_i * (i + 1)
39
+ end
40
+ s1 = qsum.to_s
41
+ qsum = 0
42
+ s1.length.times do |i|
43
+ qsum += s1[i,1].to_i
44
+ end
45
+ qstr = "%02d" % qsum
46
+ code = ''
47
+ code += xc[s[3] - 48 + qsum + 3]
48
+ code += qstr[1,1]
49
+ code += xc[s[0] - 48 + qsum + 2]
50
+ code += xc[s[2] - 48 + qsum + 1]
51
+ code += xc[s[5] - 48 + qsum + 6]
52
+ code += xc[s[1] - 48 + qsum + 5]
53
+ code += qstr[0,1]
54
+ code += xc[s[4] - 48 + qsum + 7]
55
+ code += xc[s[6] - 48 + qsum + 4]
56
+ content_video = pathnr + '/' + code + '.flv'
57
+ @video_url = "http://dl.redtube.com/_videos_t4vn23s9jc5498tgj49icfj4678/#{content_video}"
58
+ # @thumb_url = "http://thumbs.redtube.com/_thumbs/#{pathnr}/#{s}/#{s}_#{'%03d' % i}.jpg"
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,30 @@
1
+ # -*- mode:ruby; coding:utf-8 -*-
2
+
3
+ require File.expand_path(File.dirname(__FILE__) + '/base')
4
+
5
+ module WWW
6
+ module VideoScraper
7
+ class Veoh < Base
8
+ url_regex %r!\Ahttp://www\.veoh\.com/videos/([[:alnum:]]+)!
9
+
10
+ def initialize(url, opt = nil)
11
+ super
12
+ do_query
13
+ end
14
+
15
+ private
16
+ def do_query
17
+ @id = url_regex_match[1]
18
+ request_url = "http://www.veoh.com/rest/video/#{@id}/details"
19
+ xml = http_get(request_url)
20
+ @video_url = xml.match(/fullPreviewHashPath="([^"]+)"/).to_a[1]
21
+ @title = xml.match(/title="([^"]+)"/).to_a[1]
22
+ @thumb_url = xml.match(/fullMedResImagePath="([^"]+)"/).to_a[1]
23
+ html = http_get(@page_url)
24
+ embed_tag = html.match(/\sid="embed"\s[^>]*value="([^"]+)"/).to_a[1]
25
+ @embed_tag = CGI.unescapeHTML embed_tag
26
+ end
27
+ end
28
+ end
29
+ end
30
+