video_parser 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
@@ -0,0 +1,42 @@
1
+ = video_parser
2
+
3
+ == DESCRIPTION:
4
+ It is used to get video info from youku, tudou, slideshare.
5
+
6
+ == SYNOPSIS:
7
+ VideoParser.get(url) # => {:title => "...", :video_src => "...", :pic_path => "..."}
8
+
9
+
10
+ == REQUIREMENTS:
11
+
12
+ * httparty
13
+ * nokogiri
14
+
15
+ == INSTALL:
16
+
17
+ * gem install video_parser
18
+
19
+ == LICENSE:
20
+
21
+ (The MIT License)
22
+
23
+ Copyright (c) 2011 kame
24
+
25
+ Permission is hereby granted, free of charge, to any person obtaining
26
+ a copy of this software and associated documentation files (the
27
+ 'Software'), to deal in the Software without restriction, including
28
+ without limitation the rights to use, copy, modify, merge, publish,
29
+ distribute, sublicense, and/or sell copies of the Software, and to
30
+ permit persons to whom the Software is furnished to do so, subject to
31
+ the following conditions:
32
+
33
+ The above copyright notice and this permission notice shall be
34
+ included in all copies or substantial portions of the Software.
35
+
36
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
37
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
38
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
39
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
40
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
41
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
42
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,72 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/testtask'
4
+ require 'rake/rdoctask'
5
+ require 'rake/gempackagetask'
6
+ require 'spec'
7
+ require 'spec/rake/spectask'
8
+
9
+ desc 'Default: run unit tests.'
10
+ task :default => :test
11
+
12
+ desc 'Test the video_parser gem.'
13
+ Spec::Rake::SpecTask.new(:test) do |t|
14
+ t.spec_files = FileList['spec/**/*_spec.rb']
15
+ t.spec_opts = ['-c','-f','nested']
16
+ end
17
+
18
+ desc 'Generate documentation for the video_parser gem.'
19
+ Rake::RDocTask.new(:rdoc) do |rdoc|
20
+ rdoc.rdoc_dir = 'rdoc'
21
+ rdoc.title = 'VideoParser'
22
+ rdoc.options << '--line-numbers' << '--inline-source'
23
+ rdoc.rdoc_files.include('README')
24
+ rdoc.rdoc_files.include('lib/**/*.rb')
25
+ end
26
+
27
+
28
+ PKG_VERSION = "0.1.0"
29
+ PKG_FILES = FileList[
30
+ "lib/**/*", "spec/**/*", "Rakefile", "README"
31
+ ]
32
+
33
+ # Genereate the package
34
+ spec = Gem::Specification.new do |s|
35
+
36
+ #### Basic information.
37
+
38
+ s.name = 'video_parser'
39
+ s.version = PKG_VERSION
40
+ s.summary = <<-EOF
41
+ This gem is used to get video info from youku, tudou, slideshare.
42
+ EOF
43
+ s.description = <<-EOF
44
+ This gem is used to get video info from youku, tudou, slideshare.
45
+ EOF
46
+ s.homepage = "https://github.com/kamechb/video_parser"
47
+
48
+ #### Which files are to be included in this gem? Everything! (Except CVS directories.)
49
+
50
+ s.files = PKG_FILES
51
+
52
+ #### Load-time details: library and application (you will need one or both).
53
+
54
+ s.require_path = 'lib'
55
+
56
+ s.add_dependency('httparty')
57
+ s.add_dependency('nokogiri')
58
+
59
+ #### Documentation and testing.
60
+
61
+ s.has_rdoc = true
62
+
63
+ #### Author and project details.
64
+
65
+ s.authors = ["kame"]
66
+ s.email = ["kamechb@gmail.com"]
67
+ end
68
+
69
+ Rake::GemPackageTask.new(spec) do |pkg|
70
+ pkg.need_zip = true
71
+ pkg.need_tar = true
72
+ end
@@ -0,0 +1,30 @@
1
+ require 'iconv'
2
+ require 'nokogiri'
3
+ require 'httparty'
4
+
5
+ module GateWay
6
+ def get_doc(uri, options = {})
7
+ options[:timeout] ||= 20
8
+ begin
9
+ page = HTTParty.get(uri.to_s, :timeout => options[:timeout]).to_s
10
+ Nokogiri::HTML(conv_page(page))
11
+ rescue
12
+ nil
13
+ end
14
+ end
15
+
16
+ private
17
+ def conv_page(page)
18
+ charset = get_charset(page)
19
+ if charset && !['UTF8', 'utf8', 'UTF-8', 'utf-8'].include?(charset)
20
+ conved_page = Iconv.conv('UTF-8', charset, page)
21
+ end
22
+
23
+ conved_page || page
24
+ end
25
+
26
+ def get_charset(page)
27
+ page =~ /charset\s*?=\s*?['"]?([^'"]*)['"]?/
28
+ $1
29
+ end
30
+ end
@@ -0,0 +1,38 @@
1
+ require 'video_parser/video_site'
2
+
3
+ module VideoParser
4
+ class SlideShare < VideoSite
5
+ VideoHost = "www.slideshare.net"
6
+ def self.embed_html(video_src )
7
+ <<-EMBED
8
+ <div style='width:425px'>
9
+ <iframe src='#{video_src}' width='425' height='355' frameborder='0' marginwidth='0' marginheight='0' scrolling='no'></iframe>
10
+ </div>
11
+ EMBED
12
+ end
13
+ private
14
+ def extract_video_info
15
+ return nil unless doc
16
+ {:title => title, :video_src => video_src, :pic_path => pic_path}
17
+ end
18
+
19
+ # <meta name="og_title" property="og:title" content="Professional Programmer" />
20
+ def title
21
+ doc.at_css('meta[name~=og_title]')[:content] rescue nil
22
+ end
23
+
24
+ # video src is in script content with iframe_url attribute
25
+ def video_src
26
+ doc.at_css('script[id~=page-json]').inner_text =~ /['"]iframe_url['"]\:\s*?['"](.*?)['"],/
27
+ $1
28
+ rescue
29
+ nil
30
+ end
31
+
32
+ # pic path is in meta tag with name="og_image"
33
+ def pic_path
34
+ doc.at_css('meta[name~=og_image]')[:content] rescue nil
35
+ end
36
+
37
+ end
38
+ end
@@ -0,0 +1,83 @@
1
+ require 'video_parser/video_site'
2
+
3
+ module VideoParser
4
+ # support path begin with "/playlist" or "/programs" format
5
+ #
6
+ class TuDou < VideoSite
7
+ VideoHost = "www.tudou.com"
8
+ attr_reader :is_playlist, :is_programs
9
+
10
+ def self.embed_html(video_src)
11
+ <<-EMBED
12
+ <embed src = '#{video_src}' type='application/x-shockwave-flash' allowscriptaccess='always' allowfullscreen='true' wmode='opaque' width='480' height='400'></embed>
13
+ EMBED
14
+ end
15
+
16
+ private
17
+ def extract_video_info
18
+ return nil unless doc
19
+ {:title => title, :video_src => video_src, :pic_path => pic_path}
20
+ end
21
+
22
+ def title
23
+ doc.at_css("h1").inner_text rescue nil
24
+ end
25
+
26
+ def video_src
27
+ if is_playlist?
28
+ uri.path.match(/\/p\/l\d+i(\d+).*\.html/) ||
29
+ uri.path.match(/[&#\?]iid=(\d+)/) ||
30
+ uri.path.match(/\/p\/a\d+i(\d+).*\.html/)
31
+ iid = $1
32
+ lcode = get_lcode_from_script
33
+ default_iid = get_default_iid_from_script
34
+ iid ||= default_iid
35
+ "http://www.tudou.com/l/#{lcode}/&iid=#{iid}/v.swf"
36
+ elsif is_programs?
37
+ uri.path =~ /view\/(.*?)\//
38
+ lcode = $1
39
+ "http://www.tudou.com/v/#{lcode}/v.swf"
40
+ end
41
+ end
42
+
43
+ # pic in script content is:
44
+ #
45
+ # pic:"http://i4.tdimg.com/112/289/113/p.jpg"
46
+ def pic_path
47
+ if is_playlist?
48
+ script_content =~ /pic:\s*?['"](.*?)['"]\s*?,/
49
+ elsif is_programs?
50
+ script_content =~ /pic\s*?=\s*?['"](.*?)['"]\s*?,/
51
+ end
52
+ $1
53
+ end
54
+
55
+ def is_playlist?
56
+ uri.path =~ /\/playlist/
57
+ end
58
+
59
+ def is_programs?
60
+ uri.path =~ /\/programs/
61
+ end
62
+
63
+ # lcode in script content is:
64
+ #
65
+ # lid_code = lcode = 'uQa8UQMbcWE'
66
+ def get_lcode_from_script
67
+ script_content =~ /lcode\s*?=\s*?['"](.*?)['"]/m
68
+ $1
69
+ end
70
+
71
+ # defaultIid in script content is:
72
+ #
73
+ # defaultIid = 111370446,
74
+ def get_default_iid_from_script
75
+ script_content =~ /defaultIid\s*?=\s*?(\d+?)\s*?,/
76
+ $1
77
+ end
78
+
79
+ def script_content
80
+ @script_content ||= doc.at_css("script").content
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,21 @@
1
+ require 'video_parser/gateway'
2
+
3
+ module VideoParser
4
+ class VideoSite
5
+ include GateWay
6
+ attr_reader :uri, :doc
7
+
8
+ def initialize(uri)
9
+ @uri = uri
10
+ end
11
+
12
+ def get(options = {})
13
+ @doc = get_doc(uri, options)
14
+ extract_video_info
15
+ end
16
+ private
17
+ def extract_video_info
18
+ raise "This is a abstract class, you should implete this method in subclass!"
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,39 @@
1
+ require 'video_parser/video_site'
2
+
3
+ module VideoParser
4
+ class YouKu < VideoSite
5
+ VideoHost = "v.youku.com"
6
+
7
+ def self.embed_html(video_src)
8
+ <<-EMBED
9
+ <embed src='#{video_src}' quality='high' width='480' height='400' align='middle' allowScriptAccess='sameDomain' type='application/x-shockwave-flash'></embed>
10
+ EMBED
11
+ end
12
+
13
+ private
14
+ def extract_video_info
15
+ return nil unless doc
16
+ {:title => title, :video_src => video_src, :pic_path => pic_path}
17
+ end
18
+
19
+ def title
20
+ doc.at_css("h1[class~='title']").inner_text.gsub(/\s/,'') rescue nil
21
+ end
22
+
23
+ def video_src
24
+ doc.at_css('input[id=link2]')[:value] rescue nil
25
+ end
26
+
27
+ def pic_path
28
+ begin
29
+ pic_tag = doc.at_css('a[id=s_sina]')
30
+ pic_tag ||= doc.at_css('a[id=s_sohu]')
31
+ pic_param = pic_tag[:href].split("&").detect{|param|param =~ /pic=/ }
32
+ pic_param.split("=").last
33
+ rescue
34
+ nil
35
+ end
36
+ end
37
+
38
+ end
39
+ end
@@ -0,0 +1,41 @@
1
+ $:.unshift(File.expand_path('../', __FILE__)) unless $:.include?(File.expand_path('../', __FILE__))
2
+ require 'rubygems'
3
+ require 'uri'
4
+ require 'video_parser/gateway'
5
+ require 'video_parser/video_site'
6
+ require 'video_parser/youku'
7
+ require 'video_parser/tudou'
8
+ require 'video_parser/slideshare'
9
+
10
+ # VideoParser support video site:
11
+ # * youku
12
+ # * tudou
13
+ # * slideshare
14
+ module VideoParser
15
+ extend self
16
+ # get video info from url
17
+ #
18
+ # video info include
19
+ # * title
20
+ # * thumbnail pic path
21
+ # * video src
22
+ def get(url, options = {})
23
+ uri = URI.parse(auto_complete_url(url)) rescue nil
24
+ video_site = get_video_site(uri)
25
+ video_site && video_site.new(uri).get(options)
26
+ end
27
+
28
+ private
29
+ def auto_complete_url(url)
30
+ url =~ /^http/ ? url : "http://#{url}"
31
+ end
32
+
33
+ # uri is a URI instance
34
+ def get_video_site(uri)
35
+ return nil unless uri.is_a?(URI)
36
+ return YouKu if uri.host == YouKu::VideoHost
37
+ return TuDou if uri.host == TuDou::VideoHost
38
+ return SlideShare if uri.host == SlideShare::VideoHost
39
+ end
40
+
41
+ end
@@ -0,0 +1,2 @@
1
+ $:.unshift(File.expand_path('../../lib', __FILE__))
2
+ require 'video_parser'
@@ -0,0 +1,64 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe VideoParser do
4
+ describe "youku" do
5
+ it "v_show格式的视频地址应该能解析出正确的信息" do
6
+ correct_info = {
7
+ :video_src => "http://player.youku.com/player.php/sid/XMzIzMzQ0MTA0/v.swf",
8
+ :pic_path => "http://g4.ykimg.com/0100641F464EA66CEB57FB050A7945BF6C192B-D2FE-0C76-A668-7E312C869217",
9
+ :title => "视频:北京现代爱与梦同行"
10
+ }
11
+ VideoParser.get("http://v.youku.com/v_show/id_XMzIzMzQ0MTA0.html").should == correct_info
12
+ end
13
+ it "v_playlist格式的视频地址应该能解析出正确的信息" do
14
+ correct_info = {
15
+ :video_src => "http://player.youku.com/player.php/Type/Folder/Fid/16661073/Ob/1/Pt/0/sid/XMzI0NzkwNTQ4/v.swf",
16
+ :pic_path => "http://g2.ykimg.com/01270F1F464ECA6FF944A40123193CA1419694-4596-7AE5-C968-5CE1DF4383C8",
17
+ :title => "专辑:韩国又抓3艘中国渔船韩媒要求对中国渔船动武"
18
+ }
19
+ VideoParser.get("http://v.youku.com/v_playlist/f16661073o1p0.html").should == correct_info
20
+ end
21
+ end
22
+
23
+ describe "tudou" do
24
+ it "playlist格式的地址应该能解析出正确的信息" do
25
+ correct_info = {
26
+ :video_src => "http://www.tudou.com/l/b5tccWF-afo/&iid=112289113/v.swf",
27
+ :pic_path => "http://i4.tdimg.com/112/289/113/p.jpg",
28
+ :title => "暗拍交警拦运煤车赚外快"
29
+ }
30
+ VideoParser.get("http://www.tudou.com/playlist/p/l14450778.html").should == correct_info
31
+ end
32
+ it "programs格式的地址应该能解析出正确的信息" do
33
+ correct_info = {
34
+ :video_src => "http://www.tudou.com/v/qF_ZI0HPgig/v.swf",
35
+ :pic_path => "http://i1.tdimg.com/112/238/874/p.jpg",
36
+ :title => "元首版 机器猫之歌"
37
+ }
38
+ VideoParser.get("http://www.tudou.com/programs/view/qF_ZI0HPgig/").should == correct_info
39
+ end
40
+ end
41
+
42
+ describe "slideshare" do
43
+ it "正确的slideshare格式应该能解析出正确的信息" do
44
+ correct_info = {
45
+ :video_src => "http://www.slideshare.net/slideshow/embed_code/10251145",
46
+ :title => "Professional Programmer",
47
+ :pic_path => "http://cdn.slidesharecdn.com/craftsman2011-agileday-111121040632-phpapp02-thumbnail-2"
48
+ }
49
+ VideoParser.get("http://www.slideshare.net/gabriele.lana/professional-programmer").should == correct_info
50
+ end
51
+ end
52
+
53
+ describe "错误的地址应该返回nil" do
54
+ it "空地址返回nil" do
55
+ VideoParser.get("").should be_nil
56
+ end
57
+ it "地址为nil返回nil" do
58
+ VideoParser.get(nil).should be_nil
59
+ end
60
+ it "地址为不正确的视频地址返回nil" do
61
+ VideoParser.get("http://www.baidu.com").should be_nil
62
+ end
63
+ end
64
+ end
metadata ADDED
@@ -0,0 +1,104 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: video_parser
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - kame
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-04-06 00:00:00 +08:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: httparty
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: nokogiri
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ hash: 3
44
+ segments:
45
+ - 0
46
+ version: "0"
47
+ type: :runtime
48
+ version_requirements: *id002
49
+ description: " This gem is used to get video info from youku, tudou, slideshare.\n"
50
+ email:
51
+ - kamechb@gmail.com
52
+ executables: []
53
+
54
+ extensions: []
55
+
56
+ extra_rdoc_files: []
57
+
58
+ files:
59
+ - lib/video_parser.rb
60
+ - lib/video_parser/tudou.rb
61
+ - lib/video_parser/gateway.rb
62
+ - lib/video_parser/youku.rb
63
+ - lib/video_parser/slideshare.rb
64
+ - lib/video_parser/video_site.rb
65
+ - spec/video_parser_spec.rb
66
+ - spec/spec_helper.rb
67
+ - Rakefile
68
+ - README
69
+ has_rdoc: true
70
+ homepage: https://github.com/kamechb/video_parser
71
+ licenses: []
72
+
73
+ post_install_message:
74
+ rdoc_options: []
75
+
76
+ require_paths:
77
+ - lib
78
+ required_ruby_version: !ruby/object:Gem::Requirement
79
+ none: false
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ hash: 3
84
+ segments:
85
+ - 0
86
+ version: "0"
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ none: false
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ hash: 3
93
+ segments:
94
+ - 0
95
+ version: "0"
96
+ requirements: []
97
+
98
+ rubyforge_project:
99
+ rubygems_version: 1.6.2
100
+ signing_key:
101
+ specification_version: 3
102
+ summary: This gem is used to get video info from youku, tudou, slideshare.
103
+ test_files: []
104
+