video_parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
@@ -0,0 +1,42 @@
1
+ = video_parser
2
+
3
+ == DESCRIPTION:
4
+ It is used to get video info from youku, tudou, slideshare.
5
+
6
+ == SYNOPSIS:
7
+ VideoParser.get(url) # => {:title => "...", :video_src => "...", :pic_path => "..."}
8
+
9
+
10
+ == REQUIREMENTS:
11
+
12
+ * httparty
13
+ * nokogiri
14
+
15
+ == INSTALL:
16
+
17
+ * gem install video_parser
18
+
19
+ == LICENSE:
20
+
21
+ (The MIT License)
22
+
23
+ Copyright (c) 2011 kame
24
+
25
+ Permission is hereby granted, free of charge, to any person obtaining
26
+ a copy of this software and associated documentation files (the
27
+ 'Software'), to deal in the Software without restriction, including
28
+ without limitation the rights to use, copy, modify, merge, publish,
29
+ distribute, sublicense, and/or sell copies of the Software, and to
30
+ permit persons to whom the Software is furnished to do so, subject to
31
+ the following conditions:
32
+
33
+ The above copyright notice and this permission notice shall be
34
+ included in all copies or substantial portions of the Software.
35
+
36
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
37
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
38
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
39
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
40
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
41
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
42
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,72 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/testtask'
4
+ require 'rake/rdoctask'
5
+ require 'rake/gempackagetask'
6
+ require 'spec'
7
+ require 'spec/rake/spectask'
8
+
9
+ desc 'Default: run unit tests.'
10
+ task :default => :test
11
+
12
+ desc 'Test the video_parser gem.'
13
+ Spec::Rake::SpecTask.new(:test) do |t|
14
+ t.spec_files = FileList['spec/**/*_spec.rb']
15
+ t.spec_opts = ['-c','-f','nested']
16
+ end
17
+
18
+ desc 'Generate documentation for the video_parser gem.'
19
+ Rake::RDocTask.new(:rdoc) do |rdoc|
20
+ rdoc.rdoc_dir = 'rdoc'
21
+ rdoc.title = 'VideoParser'
22
+ rdoc.options << '--line-numbers' << '--inline-source'
23
+ rdoc.rdoc_files.include('README')
24
+ rdoc.rdoc_files.include('lib/**/*.rb')
25
+ end
26
+
27
+
28
+ PKG_VERSION = "0.1.0"
29
+ PKG_FILES = FileList[
30
+ "lib/**/*", "spec/**/*", "Rakefile", "README"
31
+ ]
32
+
33
+ # Genereate the package
34
+ spec = Gem::Specification.new do |s|
35
+
36
+ #### Basic information.
37
+
38
+ s.name = 'video_parser'
39
+ s.version = PKG_VERSION
40
+ s.summary = <<-EOF
41
+ This gem is used to get video info from youku, tudou, slideshare.
42
+ EOF
43
+ s.description = <<-EOF
44
+ This gem is used to get video info from youku, tudou, slideshare.
45
+ EOF
46
+ s.homepage = "https://github.com/kamechb/video_parser"
47
+
48
+ #### Which files are to be included in this gem? Everything! (Except CVS directories.)
49
+
50
+ s.files = PKG_FILES
51
+
52
+ #### Load-time details: library and application (you will need one or both).
53
+
54
+ s.require_path = 'lib'
55
+
56
+ s.add_dependency('httparty')
57
+ s.add_dependency('nokogiri')
58
+
59
+ #### Documentation and testing.
60
+
61
+ s.has_rdoc = true
62
+
63
+ #### Author and project details.
64
+
65
+ s.authors = ["kame"]
66
+ s.email = ["kamechb@gmail.com"]
67
+ end
68
+
69
+ Rake::GemPackageTask.new(spec) do |pkg|
70
+ pkg.need_zip = true
71
+ pkg.need_tar = true
72
+ end
@@ -0,0 +1,30 @@
1
+ require 'iconv'
2
+ require 'nokogiri'
3
+ require 'httparty'
4
+
5
+ module GateWay
6
+ def get_doc(uri, options = {})
7
+ options[:timeout] ||= 20
8
+ begin
9
+ page = HTTParty.get(uri.to_s, :timeout => options[:timeout]).to_s
10
+ Nokogiri::HTML(conv_page(page))
11
+ rescue
12
+ nil
13
+ end
14
+ end
15
+
16
+ private
17
+ def conv_page(page)
18
+ charset = get_charset(page)
19
+ if charset && !['UTF8', 'utf8', 'UTF-8', 'utf-8'].include?(charset)
20
+ conved_page = Iconv.conv('UTF-8', charset, page)
21
+ end
22
+
23
+ conved_page || page
24
+ end
25
+
26
+ def get_charset(page)
27
+ page =~ /charset\s*?=\s*?['"]?([^'"]*)['"]?/
28
+ $1
29
+ end
30
+ end
@@ -0,0 +1,38 @@
1
+ require 'video_parser/video_site'
2
+
3
+ module VideoParser
4
+ class SlideShare < VideoSite
5
+ VideoHost = "www.slideshare.net"
6
+ def self.embed_html(video_src )
7
+ <<-EMBED
8
+ <div style='width:425px'>
9
+ <iframe src='#{video_src}' width='425' height='355' frameborder='0' marginwidth='0' marginheight='0' scrolling='no'></iframe>
10
+ </div>
11
+ EMBED
12
+ end
13
+ private
14
+ def extract_video_info
15
+ return nil unless doc
16
+ {:title => title, :video_src => video_src, :pic_path => pic_path}
17
+ end
18
+
19
+ # <meta name="og_title" property="og:title" content="Professional Programmer" />
20
+ def title
21
+ doc.at_css('meta[name~=og_title]')[:content] rescue nil
22
+ end
23
+
24
+ # video src is in script content with iframe_url attribute
25
+ def video_src
26
+ doc.at_css('script[id~=page-json]').inner_text =~ /['"]iframe_url['"]\:\s*?['"](.*?)['"],/
27
+ $1
28
+ rescue
29
+ nil
30
+ end
31
+
32
+ # pic path is in meta tag with name="og_image"
33
+ def pic_path
34
+ doc.at_css('meta[name~=og_image]')[:content] rescue nil
35
+ end
36
+
37
+ end
38
+ end
@@ -0,0 +1,83 @@
1
+ require 'video_parser/video_site'
2
+
3
+ module VideoParser
4
+ # support path begin with "/playlist" or "/programs" format
5
+ #
6
+ class TuDou < VideoSite
7
+ VideoHost = "www.tudou.com"
8
+ attr_reader :is_playlist, :is_programs
9
+
10
+ def self.embed_html(video_src)
11
+ <<-EMBED
12
+ <embed src = '#{video_src}' type='application/x-shockwave-flash' allowscriptaccess='always' allowfullscreen='true' wmode='opaque' width='480' height='400'></embed>
13
+ EMBED
14
+ end
15
+
16
+ private
17
+ def extract_video_info
18
+ return nil unless doc
19
+ {:title => title, :video_src => video_src, :pic_path => pic_path}
20
+ end
21
+
22
+ def title
23
+ doc.at_css("h1").inner_text rescue nil
24
+ end
25
+
26
+ def video_src
27
+ if is_playlist?
28
+ uri.path.match(/\/p\/l\d+i(\d+).*\.html/) ||
29
+ uri.path.match(/[&#\?]iid=(\d+)/) ||
30
+ uri.path.match(/\/p\/a\d+i(\d+).*\.html/)
31
+ iid = $1
32
+ lcode = get_lcode_from_script
33
+ default_iid = get_default_iid_from_script
34
+ iid ||= default_iid
35
+ "http://www.tudou.com/l/#{lcode}/&iid=#{iid}/v.swf"
36
+ elsif is_programs?
37
+ uri.path =~ /view\/(.*?)\//
38
+ lcode = $1
39
+ "http://www.tudou.com/v/#{lcode}/v.swf"
40
+ end
41
+ end
42
+
43
+ # pic in script content is:
44
+ #
45
+ # pic:"http://i4.tdimg.com/112/289/113/p.jpg"
46
+ def pic_path
47
+ if is_playlist?
48
+ script_content =~ /pic:\s*?['"](.*?)['"]\s*?,/
49
+ elsif is_programs?
50
+ script_content =~ /pic\s*?=\s*?['"](.*?)['"]\s*?,/
51
+ end
52
+ $1
53
+ end
54
+
55
+ def is_playlist?
56
+ uri.path =~ /\/playlist/
57
+ end
58
+
59
+ def is_programs?
60
+ uri.path =~ /\/programs/
61
+ end
62
+
63
+ # lcode in script content is:
64
+ #
65
+ # lid_code = lcode = 'uQa8UQMbcWE'
66
+ def get_lcode_from_script
67
+ script_content =~ /lcode\s*?=\s*?['"](.*?)['"]/m
68
+ $1
69
+ end
70
+
71
+ # defaultIid in script content is:
72
+ #
73
+ # defaultIid = 111370446,
74
+ def get_default_iid_from_script
75
+ script_content =~ /defaultIid\s*?=\s*?(\d+?)\s*?,/
76
+ $1
77
+ end
78
+
79
+ def script_content
80
+ @script_content ||= doc.at_css("script").content
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,21 @@
1
+ require 'video_parser/gateway'
2
+
3
+ module VideoParser
4
+ class VideoSite
5
+ include GateWay
6
+ attr_reader :uri, :doc
7
+
8
+ def initialize(uri)
9
+ @uri = uri
10
+ end
11
+
12
+ def get(options = {})
13
+ @doc = get_doc(uri, options)
14
+ extract_video_info
15
+ end
16
+ private
17
+ def extract_video_info
18
+ raise "This is a abstract class, you should implete this method in subclass!"
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,39 @@
1
+ require 'video_parser/video_site'
2
+
3
+ module VideoParser
4
+ class YouKu < VideoSite
5
+ VideoHost = "v.youku.com"
6
+
7
+ def self.embed_html(video_src)
8
+ <<-EMBED
9
+ <embed src='#{video_src}' quality='high' width='480' height='400' align='middle' allowScriptAccess='sameDomain' type='application/x-shockwave-flash'></embed>
10
+ EMBED
11
+ end
12
+
13
+ private
14
+ def extract_video_info
15
+ return nil unless doc
16
+ {:title => title, :video_src => video_src, :pic_path => pic_path}
17
+ end
18
+
19
+ def title
20
+ doc.at_css("h1[class~='title']").inner_text.gsub(/\s/,'') rescue nil
21
+ end
22
+
23
+ def video_src
24
+ doc.at_css('input[id=link2]')[:value] rescue nil
25
+ end
26
+
27
+ def pic_path
28
+ begin
29
+ pic_tag = doc.at_css('a[id=s_sina]')
30
+ pic_tag ||= doc.at_css('a[id=s_sohu]')
31
+ pic_param = pic_tag[:href].split("&").detect{|param|param =~ /pic=/ }
32
+ pic_param.split("=").last
33
+ rescue
34
+ nil
35
+ end
36
+ end
37
+
38
+ end
39
+ end
@@ -0,0 +1,41 @@
1
+ $:.unshift(File.expand_path('../', __FILE__)) unless $:.include?(File.expand_path('../', __FILE__))
2
+ require 'rubygems'
3
+ require 'uri'
4
+ require 'video_parser/gateway'
5
+ require 'video_parser/video_site'
6
+ require 'video_parser/youku'
7
+ require 'video_parser/tudou'
8
+ require 'video_parser/slideshare'
9
+
10
+ # VideoParser support video site:
11
+ # * youku
12
+ # * tudou
13
+ # * slideshare
14
+ module VideoParser
15
+ extend self
16
+ # get video info from url
17
+ #
18
+ # video info include
19
+ # * title
20
+ # * thumbnail pic path
21
+ # * video src
22
+ def get(url, options = {})
23
+ uri = URI.parse(auto_complete_url(url)) rescue nil
24
+ video_site = get_video_site(uri)
25
+ video_site && video_site.new(uri).get(options)
26
+ end
27
+
28
+ private
29
+ def auto_complete_url(url)
30
+ url =~ /^http/ ? url : "http://#{url}"
31
+ end
32
+
33
+ # uri is a URI instance
34
+ def get_video_site(uri)
35
+ return nil unless uri.is_a?(URI)
36
+ return YouKu if uri.host == YouKu::VideoHost
37
+ return TuDou if uri.host == TuDou::VideoHost
38
+ return SlideShare if uri.host == SlideShare::VideoHost
39
+ end
40
+
41
+ end
@@ -0,0 +1,2 @@
1
+ $:.unshift(File.expand_path('../../lib', __FILE__))
2
+ require 'video_parser'
@@ -0,0 +1,64 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe VideoParser do
4
+ describe "youku" do
5
+ it "v_show格式的视频地址应该能解析出正确的信息" do
6
+ correct_info = {
7
+ :video_src => "http://player.youku.com/player.php/sid/XMzIzMzQ0MTA0/v.swf",
8
+ :pic_path => "http://g4.ykimg.com/0100641F464EA66CEB57FB050A7945BF6C192B-D2FE-0C76-A668-7E312C869217",
9
+ :title => "视频:北京现代爱与梦同行"
10
+ }
11
+ VideoParser.get("http://v.youku.com/v_show/id_XMzIzMzQ0MTA0.html").should == correct_info
12
+ end
13
+ it "v_playlist格式的视频地址应该能解析出正确的信息" do
14
+ correct_info = {
15
+ :video_src => "http://player.youku.com/player.php/Type/Folder/Fid/16661073/Ob/1/Pt/0/sid/XMzI0NzkwNTQ4/v.swf",
16
+ :pic_path => "http://g2.ykimg.com/01270F1F464ECA6FF944A40123193CA1419694-4596-7AE5-C968-5CE1DF4383C8",
17
+ :title => "专辑:韩国又抓3艘中国渔船韩媒要求对中国渔船动武"
18
+ }
19
+ VideoParser.get("http://v.youku.com/v_playlist/f16661073o1p0.html").should == correct_info
20
+ end
21
+ end
22
+
23
+ describe "tudou" do
24
+ it "playlist格式的地址应该能解析出正确的信息" do
25
+ correct_info = {
26
+ :video_src => "http://www.tudou.com/l/b5tccWF-afo/&iid=112289113/v.swf",
27
+ :pic_path => "http://i4.tdimg.com/112/289/113/p.jpg",
28
+ :title => "暗拍交警拦运煤车赚外快"
29
+ }
30
+ VideoParser.get("http://www.tudou.com/playlist/p/l14450778.html").should == correct_info
31
+ end
32
+ it "programs格式的地址应该能解析出正确的信息" do
33
+ correct_info = {
34
+ :video_src => "http://www.tudou.com/v/qF_ZI0HPgig/v.swf",
35
+ :pic_path => "http://i1.tdimg.com/112/238/874/p.jpg",
36
+ :title => "元首版 机器猫之歌"
37
+ }
38
+ VideoParser.get("http://www.tudou.com/programs/view/qF_ZI0HPgig/").should == correct_info
39
+ end
40
+ end
41
+
42
+ describe "slideshare" do
43
+ it "正确的slideshare格式应该能解析出正确的信息" do
44
+ correct_info = {
45
+ :video_src => "http://www.slideshare.net/slideshow/embed_code/10251145",
46
+ :title => "Professional Programmer",
47
+ :pic_path => "http://cdn.slidesharecdn.com/craftsman2011-agileday-111121040632-phpapp02-thumbnail-2"
48
+ }
49
+ VideoParser.get("http://www.slideshare.net/gabriele.lana/professional-programmer").should == correct_info
50
+ end
51
+ end
52
+
53
+ describe "错误的地址应该返回nil" do
54
+ it "空地址返回nil" do
55
+ VideoParser.get("").should be_nil
56
+ end
57
+ it "地址为nil返回nil" do
58
+ VideoParser.get(nil).should be_nil
59
+ end
60
+ it "地址为不正确的视频地址返回nil" do
61
+ VideoParser.get("http://www.baidu.com").should be_nil
62
+ end
63
+ end
64
+ end
metadata ADDED
@@ -0,0 +1,104 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: video_parser
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - kame
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-04-06 00:00:00 +08:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: httparty
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: nokogiri
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ hash: 3
44
+ segments:
45
+ - 0
46
+ version: "0"
47
+ type: :runtime
48
+ version_requirements: *id002
49
+ description: " This gem is used to get video info from youku, tudou, slideshare.\n"
50
+ email:
51
+ - kamechb@gmail.com
52
+ executables: []
53
+
54
+ extensions: []
55
+
56
+ extra_rdoc_files: []
57
+
58
+ files:
59
+ - lib/video_parser.rb
60
+ - lib/video_parser/tudou.rb
61
+ - lib/video_parser/gateway.rb
62
+ - lib/video_parser/youku.rb
63
+ - lib/video_parser/slideshare.rb
64
+ - lib/video_parser/video_site.rb
65
+ - spec/video_parser_spec.rb
66
+ - spec/spec_helper.rb
67
+ - Rakefile
68
+ - README
69
+ has_rdoc: true
70
+ homepage: https://github.com/kamechb/video_parser
71
+ licenses: []
72
+
73
+ post_install_message:
74
+ rdoc_options: []
75
+
76
+ require_paths:
77
+ - lib
78
+ required_ruby_version: !ruby/object:Gem::Requirement
79
+ none: false
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ hash: 3
84
+ segments:
85
+ - 0
86
+ version: "0"
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ none: false
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ hash: 3
93
+ segments:
94
+ - 0
95
+ version: "0"
96
+ requirements: []
97
+
98
+ rubyforge_project:
99
+ rubygems_version: 1.6.2
100
+ signing_key:
101
+ specification_version: 3
102
+ summary: This gem is used to get video info from youku, tudou, slideshare.
103
+ test_files: []
104
+