yasuyuki-nicovideo 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,92 @@
1
+ require 'kconv'
2
+
3
+ module Nicovideo
4
+
5
+ class Page
6
+ NV_DEBUG_LEVEL = 0
7
+
8
+ BASE_URL = 'http://www.nicovideo.jp'
9
+ BASE_TITLE1 = '‐ニコニコ動画\(秋\)'.toutf8
10
+ BASE_TITLE2 = 'ニコニコ動画\(秋\)‐'.toutf8
11
+
12
+ def initialize agent
13
+ @agent = agent
14
+ @page = nil
15
+ @title = nil
16
+
17
+ @not_found = false
18
+ end
19
+
20
+ public
21
+ def exists?()
22
+ begin
23
+ @page = @page || get_page
24
+ return true
25
+ rescue
26
+ return false
27
+ end
28
+ end
29
+
30
+ def html()
31
+ page = @page || get_page
32
+ return nil unless page
33
+ page.parser.to_html
34
+ end
35
+
36
+ def title=(title)
37
+ @title = title
38
+ end
39
+
40
+ protected
41
+ def register_getter(params)
42
+ params.each {|p|
43
+ p_noq = p.sub(/\?$/,'')
44
+ eval <<-E
45
+ @#{p_noq} = nil
46
+ def #{p}
47
+ if @#{p_noq}.nil?
48
+ @page ||= get_page(@url)
49
+ end
50
+ @#{p_noq}
51
+ end
52
+ E
53
+ }
54
+ end
55
+
56
+ def parse page
57
+ # to be extended
58
+ end
59
+
60
+ def get_page url, force=false
61
+ return @page if (@page && !force)
62
+ raise NotFound if @not_found
63
+
64
+ puts_info 'getting html page : url = ' + url.to_s
65
+ begin
66
+ page = @agent.get(url)
67
+ puts_debug page.header
68
+ puts_debug page.body
69
+
70
+ parse(page)
71
+ @page = page
72
+ rescue WWW::Mechanize::ResponseCodeError => e
73
+ rc = e.response_code
74
+ puts_info rc
75
+ if rc == "404" || rc == "410"
76
+ @not_found = true
77
+ raise NotFound
78
+ elsif rc == "403"
79
+ raise Forbidden
80
+ else
81
+ raise e
82
+ end
83
+ end
84
+ @page
85
+ end
86
+
87
+ def puts_error str ; puts str if (NV_DEBUG_LEVEL >= 1) ; end
88
+ def puts_info str ; puts str if (NV_DEBUG_LEVEL >= 2) ; end
89
+ def puts_debug str ; puts str if (NV_DEBUG_LEVEL >= 3) ; end
90
+ end
91
+
92
+ end
@@ -0,0 +1,38 @@
1
+ module Nicovideo
2
+ class Random < Page
3
+ include Enumerable
4
+
5
+ def initialize agent
6
+ super(agent)
7
+ @url = url()
8
+ self.register_getter ["videos"]
9
+ end
10
+
11
+ def each
12
+ self.videos.each {|v| yield v }
13
+ end
14
+
15
+ def url
16
+ "#{BASE_URL}/random"
17
+ end
18
+
19
+ def to_a
20
+ videos()
21
+ end
22
+
23
+ def reload
24
+ end
25
+
26
+ protected
27
+ def parse(page)
28
+ result_xpath = page/'//td[@class="random_td"]//p[@class="TXT12"]/a[@class="video"]'
29
+ @videos = result_xpath.inject([]) {|arr,v| #
30
+ #puts v.attributes['href']
31
+ vp = VideoPage.new(@agent, v.attributes['href'].sub(/watch\/(\w+)$/,'\1'))
32
+ vp.title = v.inner_html
33
+ arr << vp
34
+ }
35
+ end
36
+
37
+ end
38
+ end
@@ -0,0 +1,36 @@
1
+ module Nicovideo
2
+ class Ranking < Page
3
+ def initialize agent, type='mylist', span='daily', category='all', pagenum=nil
4
+ super(agent)
5
+ @type = type
6
+ @category = category
7
+ @pagenum = pagenum
8
+ @url = url()
9
+ self.register_getter ["videos"]
10
+ end
11
+
12
+ def url
13
+ url = "#{BASE_URL}/ranking/#{@type}/#{@span}/#{@category}"
14
+ if @pagenum
15
+ url += '?page=' + @pagenum.to_s
16
+ end
17
+ url
18
+ end
19
+
20
+ def to_a
21
+ videos()
22
+ end
23
+
24
+ protected
25
+ def parse(page)
26
+ ranking = page/'h3/a[@class=video]'
27
+ @videos = ranking.inject([]) {|arr,v| #
28
+ #puts v.attributes['href']
29
+ vp = VideoPage.new(@agent, v.attributes['href'].sub(/#{BASE_URL}\/watch\/(\w+)$/,'\1'))
30
+ vp.title = v.inner_html
31
+ arr << vp
32
+ }
33
+ end
34
+
35
+ end
36
+ end
@@ -0,0 +1,100 @@
1
+ require 'cgi'
2
+
3
+ module Nicovideo
4
+ class Search < Page
5
+ include Enumerable
6
+
7
+ def initialize agent, keyword, sort=nil, order=nil, pagenum=1
8
+ super(agent)
9
+ @search_type = 'search'
10
+ #@keyword = CGI.escape(CGI.escape(keyword))
11
+ @keyword = CGI.escape(keyword)
12
+ @sort = sort
13
+ @order = order
14
+ @pagenum = pagenum
15
+
16
+ params = ["videos", "total_size", "has_next?", "has_prev?"]
17
+ self.register_getter params
18
+
19
+ @url = url()
20
+
21
+ puts_info "url = #{@url}"
22
+ puts_info "sort=#{@sort},order=#{@order},pagenum=#{@pagenum}"
23
+ end
24
+
25
+ def url
26
+ url = "#{BASE_URL}/#{@search_type}/#{@keyword}"
27
+ url += '?' if (@sort || @order || @pagenum)
28
+ url += '&sort=' + @sort if @sort
29
+ url += '&order=' + @order if @order
30
+ url += '&page=' + @pagenum.to_s if @pagenum
31
+ url
32
+ end
33
+
34
+ def each
35
+ self.videos.each {|v|
36
+ yield v
37
+ }
38
+ end
39
+
40
+ def to_a() self.videos end
41
+
42
+ def pagenum=(pagenum)
43
+ if @pagenum != pagenum
44
+ @pagenum = pagenum
45
+ get_page(self.url, true)
46
+ end
47
+ @pagenum
48
+ end
49
+
50
+ def page=(pagenum)
51
+ self.pagenum = pagenum
52
+ end
53
+
54
+ def next
55
+ self.pagenum = @pagenum + 1
56
+ self
57
+ end
58
+
59
+ def prev
60
+ self.pagenum = @pagenum - 1
61
+ self
62
+ end
63
+
64
+ protected
65
+ def parse(page)
66
+ if page.body =~ /<\/strong> を含む動画はありません。/
67
+ @not_found = true
68
+ raise NotFound
69
+ end
70
+
71
+ @total_size = page.search('form[@name="sort"]//td[@class="TXT12"]//strong').first.inner_html.sub(/,/,'').to_i
72
+
73
+ @has_next = false
74
+ @has_prev = false
75
+ respages = page/'//div[@class="mb16p4"]//p[@class="TXT12"]//a'
76
+ puts_info respages.size
77
+ respages.each {|r| puts_info r.inner_html }
78
+ if respages.size > 0
79
+ respages.each {|text|
80
+ if text.inner_html =~ /前のページ/
81
+ @has_prev = true
82
+ end
83
+ if text.inner_html =~ /次のページ/
84
+ @has_next = true
85
+ end
86
+ }
87
+ end
88
+
89
+ result_xpath = page/'//div[@class="cmn_thumb_R"]//p[@class="TXT12"]/a[@class="video"]'
90
+
91
+ puts_info result_xpath.size.to_s
92
+ @videos = result_xpath.inject([]) {|arr, v|
93
+ vp = VideoPage.new(@agent, v.attributes['href'].sub(/watch\/(\w+)$/,'\1'))
94
+ vp.title = v.inner_html
95
+ arr << vp
96
+ }
97
+ end
98
+
99
+ end
100
+ end
@@ -0,0 +1,13 @@
1
+ require 'cgi'
2
+
3
+ module Nicovideo
4
+ class TagSearch < Search
5
+
6
+ def initialize agent, keyword, sort=nil, order=nil, pagenum=1
7
+ super(agent, keyword, sort, order, pagenum)
8
+ @search_type = 'tag'
9
+ @url = url()
10
+ end
11
+
12
+ end
13
+ end
@@ -0,0 +1,68 @@
1
+ require 'open-uri'
2
+ require 'timeout'
3
+ require 'rexml/document'
4
+
5
+ module Nicovideo
6
+ class Thumbnail
7
+ def initialize(proxy_url = nil)
8
+ @proxy_url = proxy_url
9
+ end
10
+
11
+ def get(video_id, wait_sec = 10, retry_max = 2)
12
+ root = get_response(video_id, wait_sec, retry_max)
13
+
14
+ get_elements(root.elements["thumb"])
15
+ end
16
+
17
+ def get_response(video_id, wait_sec, retry_max)
18
+ retry_count = 0
19
+ begin
20
+ body = timeout(wait_sec) do
21
+ open("http://ext.nicovideo.jp/api/getthumbinfo/#{video_id}", :proxy => @proxy_url) do |f|
22
+ f.read
23
+ end
24
+ end
25
+
26
+ root = REXML::Document.new(body).root
27
+ raise ::Errno::ENOENT::new(video_id) unless root.attributes.get_attribute('status').value == 'ok'
28
+ root
29
+ rescue TimeoutError => e
30
+ raise e if retry_count >= retry_max
31
+ retry_count += 1
32
+ retry
33
+ end
34
+ end
35
+
36
+ def get_elements(parent)
37
+ thumbnail_info = ThumbInfo.new
38
+
39
+ parent.each_element do |element|
40
+ if element.name == 'tags' then
41
+ thumbnail_info.tags[element.attributes['domain']] = []
42
+ element.each_element do |child|
43
+ thumbnail_info.tags[element.attributes['domain']] << child.text
44
+ end
45
+ next
46
+ end
47
+ thumbnail_info[element.name] = element.text
48
+ end
49
+ thumbnail_info
50
+ end
51
+ end
52
+
53
+ class ThumbInfo < Hash
54
+ attr_accessor :tags
55
+ def initialize
56
+ @tags = {}
57
+ end
58
+
59
+ def has_tag?(tag)
60
+ @tag_hash ||= tag_flatten.inject({}) {|tag_hash, temp_tag| tag_hash[temp_tag] = temp_tag}
61
+ @tag_hash.has_key? tag
62
+ end
63
+
64
+ def tag_flatten
65
+ @tag_flatten ||= @tags.values.flatten
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,9 @@
1
+ module Nicovideo #:nodoc:
2
+ module VERSION #:nodoc:
3
+ MAJOR = 0
4
+ MINOR = 1
5
+ TINY = 7
6
+
7
+ STRING = [MAJOR, MINOR, TINY].join('.')
8
+ end
9
+ end
@@ -0,0 +1,113 @@
1
+ require 'kconv'
2
+ require 'cgi'
3
+
4
+ module Nicovideo
5
+ class VideoPage < Page
6
+ def initialize agent, video_id
7
+ super(agent)
8
+ @video_id = video_id
9
+ @params = nil
10
+ @url = BASE_URL + '/watch/' + @video_id
11
+ register_getter ["title", "tags", "published_at", "csrf_token"]
12
+ end
13
+
14
+ attr_reader :video_id, :url
15
+
16
+ def id() @video_id end
17
+
18
+ def type
19
+ @params ||= get_params
20
+ pattern = %r!^http://.*\.nicovideo\.jp/smile\?(.*?)=.*$!
21
+ CGI.unescape(@params['url']) =~ pattern
22
+ case $1
23
+ when 'm'
24
+ return 'mp4'
25
+ when 's'
26
+ return 'swf'
27
+ else
28
+ return 'flv'
29
+ end
30
+ end
31
+
32
+ def comments(num=500)
33
+ puts_info 'getting comment xml : id = ' + @video_id
34
+ begin
35
+ @params = get_params unless @params
36
+ ms = @params['ms']
37
+ raise ArgError unless ms
38
+
39
+ thread_id = @params['thread_id']
40
+ body = %!<thread res_from="-#{num}" version="20061206" thread="#{thread_id}" />!
41
+ post_url = CGI.unescape(ms)
42
+ comment_xml = @agent.post_data(post_url, body).body
43
+ puts_debug comment_xml
44
+ Comments.new(@video_id, comment_xml)
45
+ end
46
+ end
47
+
48
+ def flv() return video() end
49
+
50
+ def video()
51
+ begin
52
+ @params ||= get_params
53
+ video_url = CGI.unescape(@params['url'])
54
+ video_flv = @agent.get_file(video_url)
55
+ video_flv
56
+ end
57
+ end
58
+
59
+ def title=(title)
60
+ @title = title
61
+ end
62
+
63
+ def openlist(page=1)
64
+ OpenList.new(@agent, @video_id)
65
+ end
66
+
67
+ def low?
68
+ @params ||= get_params
69
+ return true if CGI.unescape(@params['url']) =~ /low$/
70
+ return false
71
+ end
72
+
73
+ private
74
+ def parse(page)
75
+ # title
76
+ @title = page.title.toutf8.sub(/#{BASE_TITLE1}$/ou, '')
77
+
78
+ # tags
79
+ div = page.parser.search("div#video_tags")
80
+ @tags = div.to_html.scan(/<a href=\"tag\/[\w\%]+?\">(.+?)<\/a>/ou).inject([]) {|arr, v|
81
+ puts_debug v[0]
82
+ arr << v[0]
83
+ }
84
+
85
+ # published_at
86
+ str = page.search("div[@id='WATCHHEADER']//p[@class='TXT12']/strong")[0].inner_text
87
+ tm = str.scan(/\d+/)
88
+ @published_at = Time.mktime(*tm)
89
+
90
+ # csrf_token
91
+ @csrf_token = page.search("form[@name='mylist_form']//input[@name='csrf_token']")[0]['value']
92
+ end
93
+
94
+ def get_params
95
+ raise NotFound if @not_found
96
+ begin
97
+ unless @params
98
+ puts_info 'getting params : id = ' + @video_id
99
+ @page ||= get_page(@url)
100
+ content = @agent.get_file(BASE_URL + '/api/getflv?v=' + @video_id)
101
+ puts_debug content
102
+ @params = content.scan(/([^&]+)=([^&]*)/).inject({}){|h, v| h[v[0]] = v[1]; h}
103
+ else
104
+ puts_info 'params have already gotten : id = ' + @video_id
105
+ end
106
+ @params
107
+ rescue
108
+ @not_found = true
109
+ raise NotFound
110
+ end
111
+ end
112
+ end
113
+ end
data/lib/nicovideo.rb ADDED
@@ -0,0 +1,20 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+
3
+ require 'rubygems'
4
+ require 'nicovideo/mechanize-ext'
5
+ require 'nicovideo/base'
6
+ require 'nicovideo/page'
7
+ require 'nicovideo/videopage'
8
+ require 'nicovideo/comments'
9
+ require 'nicovideo/mylist'
10
+ require 'nicovideo/openlist'
11
+ require 'nicovideo/search'
12
+ require 'nicovideo/tagsearch'
13
+ require 'nicovideo/ranking'
14
+ require 'nicovideo/random'
15
+ require 'nicovideo/newarrival'
16
+ require 'nicovideo/thumbnail'
17
+ #require 'nicovideo/tags'
18
+ #require 'nicovideo/ichiba'
19
+ #require 'nicovideo/feed'
20
+
@@ -0,0 +1,42 @@
1
+ require 'rubygems'
2
+ require 'nicovideo'
3
+ require 'yaml'
4
+
5
+ video_ids = ARGV
6
+
7
+ # set account
8
+ account = YAML.load_file(ENV['HOME'] + '/.nicovideo/account.yml')
9
+ mail = account['mail']
10
+ password = account['password']
11
+
12
+ # create instance
13
+ nv = Nicovideo.new(mail, password)
14
+
15
+ # login to Nicovideo (you don't need to login explicitly at v 0.0.4 or later)
16
+ nv.login
17
+
18
+ # get videos and comments
19
+ video_ids.each {|video_id|
20
+
21
+ nv.watch(video_id) {|v|
22
+ # method 'id' and 'video_id' return video ID(string).
23
+ puts 'video id = ' + v.id
24
+
25
+ # method 'title' returns string.
26
+ puts 'title = ' + v.title
27
+
28
+ # method 'tags' returns array of string.
29
+ puts 'tags = ' + v.tags.join(' ')
30
+
31
+ # method 'comments' returns instance of class Comments
32
+ # which has methods 'to_xml', 'to_s'(same).
33
+ puts 'getting comments xml'
34
+ File.open("#{video_id}.xml", "wb") {|f| f.write v.comments(100).to_xml }
35
+
36
+ # method 'flv' and 'video' return raw flv data(binary).
37
+ puts 'getting flv file'
38
+ File.open("#{video_id}.flv", "wb") {|f| f.write v.flv }
39
+ }
40
+
41
+ sleep 3
42
+ }
@@ -0,0 +1,34 @@
1
+ require 'rubygems'
2
+ require 'nicovideo'
3
+ require 'yaml'
4
+
5
+ video_ids = ARGV
6
+
7
+ # set account
8
+ account = YAML.load_file(ENV['HOME'] + '/.nicovideo/account.yml')
9
+ mail = account['mail']
10
+ password = account['password']
11
+
12
+ # create instance
13
+ nv = Nicovideo.new(mail, password)
14
+
15
+ # login to Nicovideo (you don't need to login explicitly at v 0.0.4 or later)
16
+ nv.login
17
+
18
+ # get videos and comments
19
+ video_ids.each {|video_id|
20
+
21
+ # the another way of nv_download
22
+ puts nv.get_title(video_id)
23
+ puts nv.get_tags(video_id).join(' ')
24
+ puts 'getting comments xml'
25
+ File.open("#{video_id}.xml", "wb") {|f|
26
+ f.write nv.get_comments(video_id, 100).to_xml
27
+ }
28
+ puts 'getting flv file'
29
+ File.open("#{video_id}.flv", "wb") {|f|
30
+ f.write nv.get_flv(video_id)
31
+ }
32
+
33
+ sleep 1
34
+ }
@@ -0,0 +1,37 @@
1
+ require 'rubygems'
2
+ require 'nicovideo'
3
+ require 'yaml'
4
+
5
+ mylist_ids = ARGV
6
+
7
+ # set account
8
+ account = YAML.load_file(ENV['HOME'] + '/.nicovideo/account.yml')
9
+ mail = account['mail']
10
+ password = account['password']
11
+
12
+ # create instance
13
+ nv = Nicovideo.new(mail, password)
14
+
15
+ # login to Nicovideo (you don't need to login explicitly at v 0.0.4 or later)
16
+ nv.login
17
+
18
+ # get mylist
19
+ mylist_ids.each {|mylist_id|
20
+
21
+ ml = nv.mylist(mylist_id)
22
+ # method 'id' and 'mylist_id' return mylist ID(string).
23
+ puts 'mylist id = ' + ml.id
24
+
25
+ # method 'title', 'user' and 'description' return string.
26
+ puts 'title = ' + ml.title
27
+ puts 'user = ' + ml.user
28
+ puts 'description = ' + ml.description
29
+
30
+ # method 'videos' returns array of VideoPage.
31
+ videos = ml.videos
32
+ videos.each {|v|
33
+ puts v.id
34
+ }
35
+
36
+ sleep 3
37
+ }
@@ -0,0 +1,35 @@
1
+ require 'rubygems'
2
+ require 'nicovideo'
3
+ require 'yaml'
4
+
5
+ video_ids = ARGV
6
+
7
+ # set account
8
+ account = YAML.load_file(ENV['HOME'] + '/.nicovideo/account.yml')
9
+ mail = account['mail']
10
+ password = account['password']
11
+
12
+ # create instance
13
+ nv = Nicovideo.new(mail, password)
14
+
15
+ # login to Nicovideo (you don't need to login explicitly at v 0.0.4 or later)
16
+ nv.login
17
+
18
+ # get openlist
19
+ video_ids.each {|video_id|
20
+
21
+ ol = nv.openlist(video_id)
22
+ # method 'id' and 'video_id' return video ID(string).
23
+ puts 'video id = ' + ol.id
24
+
25
+ # method 'total_size' returns Fixnum.
26
+ puts 'total_size = ' + ol.total_size.to_s
27
+
28
+ # method 'mylists' returns array of MyList.
29
+ begin
30
+ mls = ol.mylists
31
+ mls.each {|ml| puts ml.id + ':' + ml.title }
32
+ sleep 1
33
+ end while (ol.has_next? && ol.next)
34
+
35
+ }
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/ruby -Ku
2
+
3
+ require 'rubygems'
4
+ require 'nicovideo'
5
+
6
+ conf = YAML.load_file(ENV['HOME'] + '/.nicovideo/account.yml')
7
+ nv = Nicovideo.new(conf['mail'], conf['password'])
8
+
9
+ cnt = 0
10
+ nv.ranking.each do |vp|
11
+ cnt += 1
12
+ printf "%3d位 %-12s %s \n", cnt, vp.video_id, vp.title
13
+ end
14
+
15
+ =begin
16
+ # when using more options
17
+ nv.ranking(type='mylist', span='daily', category='all', pagenum='1').each do |vp|
18
+ cnt += 1
19
+ printf "%3d位 %-12s %s \n", cnt, vp.video_id, vp.title
20
+ end
21
+ =end
data/test/runner.rb ADDED
@@ -0,0 +1,3 @@
1
+ require File.dirname(__FILE__) + '/test_helper.rb'
2
+
3
+ Test::Unit::AutoRunner.run(true, File.dirname(__FILE__))
@@ -0,0 +1,2 @@
1
+ require 'test/unit'
2
+ require File.dirname(__FILE__) + '/../lib/nicovideo'