emergent-nicovideo 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,92 @@
1
+ require 'kconv'
2
+
3
+ module Nicovideo
4
+
5
+ class Page
6
+ NV_DEBUG_LEVEL = 0
7
+
8
+ BASE_URL = 'http://www.nicovideo.jp'
9
+ BASE_TITLE1 = '‐ニコニコ動画\(秋\)'.toutf8
10
+ BASE_TITLE2 = 'ニコニコ動画\(秋\)‐'.toutf8
11
+
12
+ def initialize agent
13
+ @agent = agent
14
+ @page = nil
15
+ @title = nil
16
+
17
+ @not_found = false
18
+ end
19
+
20
+ public
21
+ def exists?()
22
+ begin
23
+ @page = @page || get_page
24
+ return true
25
+ rescue
26
+ return false
27
+ end
28
+ end
29
+
30
+ def html()
31
+ page = @page || get_page
32
+ return nil unless page
33
+ page.parser.to_html
34
+ end
35
+
36
+ def title=(title)
37
+ @title = title
38
+ end
39
+
40
+ protected
41
+ def register_getter(params)
42
+ params.each {|p|
43
+ p_noq = p.sub(/\?$/,'')
44
+ eval <<-E
45
+ @#{p_noq} = nil
46
+ def #{p}
47
+ if @#{p_noq}.nil?
48
+ @page ||= get_page(@url)
49
+ end
50
+ @#{p_noq}
51
+ end
52
+ E
53
+ }
54
+ end
55
+
56
+ def parse page
57
+ # to be extended
58
+ end
59
+
60
+ def get_page url, force=false
61
+ return @page if (@page && !force)
62
+ raise NotFound if @not_found
63
+
64
+ puts_info 'getting html page : url = ' + url.to_s
65
+ begin
66
+ page = @agent.get(url)
67
+ puts_debug page.header
68
+ puts_debug page.body
69
+
70
+ parse(page)
71
+ @page = page
72
+ rescue WWW::Mechanize::ResponseCodeError => e
73
+ rc = e.response_code
74
+ puts_info rc
75
+ if rc == "404" || rc == "410"
76
+ @not_found = true
77
+ raise NotFound
78
+ elsif rc == "403"
79
+ raise Forbidden
80
+ else
81
+ raise e
82
+ end
83
+ end
84
+ @page
85
+ end
86
+
87
+ def puts_error str ; puts str if (NV_DEBUG_LEVEL >= 1) ; end
88
+ def puts_info str ; puts str if (NV_DEBUG_LEVEL >= 2) ; end
89
+ def puts_debug str ; puts str if (NV_DEBUG_LEVEL >= 3) ; end
90
+ end
91
+
92
+ end
@@ -0,0 +1,38 @@
1
+ module Nicovideo
2
+ class Random < Page
3
+ include Enumerable
4
+
5
+ def initialize agent
6
+ super(agent)
7
+ @url = url()
8
+ self.register_getter ["videos"]
9
+ end
10
+
11
+ def each
12
+ self.videos.each {|v| yield v }
13
+ end
14
+
15
+ def url
16
+ "#{BASE_URL}/random"
17
+ end
18
+
19
+ def to_a
20
+ videos()
21
+ end
22
+
23
+ def reload
24
+ end
25
+
26
+ protected
27
+ def parse(page)
28
+ result_xpath = page/'//td[@class="random_td"]//p[@class="TXT12"]/a[@class="video"]'
29
+ @videos = result_xpath.inject([]) {|arr,v| #
30
+ #puts v.attributes['href']
31
+ vp = VideoPage.new(@agent, v.attributes['href'].sub(/watch\/(\w+)$/,'\1'))
32
+ vp.title = v.inner_html
33
+ arr << vp
34
+ }
35
+ end
36
+
37
+ end
38
+ end
@@ -0,0 +1,36 @@
1
+ module Nicovideo
2
+ class Ranking < Page
3
+ def initialize agent, type='mylist', span='daily', category='all', pagenum=nil
4
+ super(agent)
5
+ @type = type
6
+ @category = category
7
+ @pagenum = pagenum
8
+ @url = url()
9
+ self.register_getter ["videos"]
10
+ end
11
+
12
+ def url
13
+ url = "#{BASE_URL}/ranking/#{@type}/#{@span}/#{@category}"
14
+ if @pagenum
15
+ url += '?page=' + @pagenum.to_s
16
+ end
17
+ url
18
+ end
19
+
20
+ def to_a
21
+ videos()
22
+ end
23
+
24
+ protected
25
+ def parse(page)
26
+ ranking = page/'h3/a[@class=video]'
27
+ @videos = ranking.inject([]) {|arr,v| #
28
+ #puts v.attributes['href']
29
+ vp = VideoPage.new(@agent, v.attributes['href'].sub(/#{BASE_URL}\/watch\/(\w+)$/,'\1'))
30
+ vp.title = v.inner_html
31
+ arr << vp
32
+ }
33
+ end
34
+
35
+ end
36
+ end
@@ -0,0 +1,100 @@
1
+ require 'cgi'
2
+
3
+ module Nicovideo
4
+ class Search < Page
5
+ include Enumerable
6
+
7
+ def initialize agent, keyword, sort=nil, order=nil, pagenum=1
8
+ super(agent)
9
+ @search_type = 'search'
10
+ #@keyword = CGI.escape(CGI.escape(keyword))
11
+ @keyword = CGI.escape(keyword)
12
+ @sort = sort
13
+ @order = order
14
+ @pagenum = pagenum
15
+
16
+ params = ["videos", "total_size", "has_next?", "has_prev?"]
17
+ self.register_getter params
18
+
19
+ @url = url()
20
+
21
+ puts_info "url = #{@url}"
22
+ puts_info "sort=#{@sort},order=#{@order},pagenum=#{@pagenum}"
23
+ end
24
+
25
+ def url
26
+ url = "#{BASE_URL}/#{@search_type}/#{@keyword}"
27
+ url += '?' if (@sort || @order || @pagenum)
28
+ url += '&sort=' + @sort if @sort
29
+ url += '&order=' + @order if @order
30
+ url += '&page=' + @pagenum.to_s if @pagenum
31
+ url
32
+ end
33
+
34
+ def each
35
+ self.videos.each {|v|
36
+ yield v
37
+ }
38
+ end
39
+
40
+ def to_a() self.videos end
41
+
42
+ def pagenum=(pagenum)
43
+ if @pagenum != pagenum
44
+ @pagenum = pagenum
45
+ get_page(self.url, true)
46
+ end
47
+ @pagenum
48
+ end
49
+
50
+ def page=(pagenum)
51
+ self.pagenum = pagenum
52
+ end
53
+
54
+ def next
55
+ self.pagenum = @pagenum + 1
56
+ self
57
+ end
58
+
59
+ def prev
60
+ self.pagenum = @pagenum - 1
61
+ self
62
+ end
63
+
64
+ protected
65
+ def parse(page)
66
+ if page.body =~ /<\/strong> を含む動画はありません。/
67
+ @not_found = true
68
+ raise NotFound
69
+ end
70
+
71
+ @total_size = page.search('form[@name="sort"]//td[@class="TXT12"]//strong').first.inner_html.sub(/,/,'').to_i
72
+
73
+ @has_next = false
74
+ @has_prev = false
75
+ respages = page/'//div[@class="mb16p4"]//p[@class="TXT12"]//a'
76
+ puts_info respages.size
77
+ respages.each {|r| puts_info r.inner_html }
78
+ if respages.size > 0
79
+ respages.each {|text|
80
+ if text.inner_html =~ /前のページ/
81
+ @has_prev = true
82
+ end
83
+ if text.inner_html =~ /次のページ/
84
+ @has_next = true
85
+ end
86
+ }
87
+ end
88
+
89
+ result_xpath = page/'//div[@class="cmn_thumb_R"]//p[@class="TXT12"]/a[@class="video"]'
90
+
91
+ puts_info result_xpath.size.to_s
92
+ @videos = result_xpath.inject([]) {|arr, v|
93
+ vp = VideoPage.new(@agent, v.attributes['href'].sub(/watch\/(\w+)$/,'\1'))
94
+ vp.title = v.inner_html
95
+ arr << vp
96
+ }
97
+ end
98
+
99
+ end
100
+ end
@@ -0,0 +1,13 @@
1
+ require 'cgi'
2
+
3
+ module Nicovideo
4
+ class TagSearch < Search
5
+
6
+ def initialize agent, keyword, sort=nil, order=nil, pagenum=1
7
+ super(agent, keyword, sort, order, pagenum)
8
+ @search_type = 'tag'
9
+ @url = url()
10
+ end
11
+
12
+ end
13
+ end
@@ -0,0 +1,68 @@
1
+ require 'open-uri'
2
+ require 'timeout'
3
+ require 'rexml/document'
4
+
5
+ module Nicovideo
6
+ class Thumbnail
7
+ def initialize(proxy_url = nil)
8
+ @proxy_url = proxy_url
9
+ end
10
+
11
+ def get(video_id, wait_sec = 10, retry_max = 2)
12
+ root = get_response(video_id, wait_sec, retry_max)
13
+
14
+ get_elements(root.elements["thumb"])
15
+ end
16
+
17
+ def get_response(video_id, wait_sec, retry_max)
18
+ retry_count = 0
19
+ begin
20
+ body = timeout(wait_sec) do
21
+ open("http://ext.nicovideo.jp/api/getthumbinfo/#{video_id}", :proxy => @proxy_url) do |f|
22
+ f.read
23
+ end
24
+ end
25
+
26
+ root = REXML::Document.new(body).root
27
+ raise ::Errno::ENOENT::new(video_id) unless root.attributes.get_attribute('status').value == 'ok'
28
+ root
29
+ rescue TimeoutError => e
30
+ raise e if retry_count >= retry_max
31
+ retry_count += 1
32
+ retry
33
+ end
34
+ end
35
+
36
+ def get_elements(parent)
37
+ thumbnail_info = ThumbInfo.new
38
+
39
+ parent.each_element do |element|
40
+ if element.name == 'tags' then
41
+ thumbnail_info.tags[element.attributes['domain']] = []
42
+ element.each_element do |child|
43
+ thumbnail_info.tags[element.attributes['domain']] << child.text
44
+ end
45
+ next
46
+ end
47
+ thumbnail_info[element.name] = element.text
48
+ end
49
+ thumbnail_info
50
+ end
51
+ end
52
+
53
+ class ThumbInfo < Hash
54
+ attr_accessor :tags
55
+ def initialize
56
+ @tags = {}
57
+ end
58
+
59
+ def has_tag?(tag)
60
+ @tag_hash ||= tag_flatten.inject({}) {|tag_hash, temp_tag| tag_hash[temp_tag] = temp_tag}
61
+ @tag_hash.has_key? tag
62
+ end
63
+
64
+ def tag_flatten
65
+ @tag_flatten ||= @tags.values.flatten
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,9 @@
1
+ module Nicovideo #:nodoc:
2
+ module VERSION #:nodoc:
3
+ MAJOR = 0
4
+ MINOR = 1
5
+ TINY = 7
6
+
7
+ STRING = [MAJOR, MINOR, TINY].join('.')
8
+ end
9
+ end
@@ -0,0 +1,113 @@
1
+ require 'kconv'
2
+ require 'cgi'
3
+
4
+ module Nicovideo
5
+ class VideoPage < Page
6
+ def initialize agent, video_id
7
+ super(agent)
8
+ @video_id = video_id
9
+ @params = nil
10
+ @url = BASE_URL + '/watch/' + @video_id
11
+ register_getter ["title", "tags", "published_at", "csrf_token"]
12
+ end
13
+
14
+ attr_reader :video_id, :url
15
+
16
+ def id() @video_id end
17
+
18
+ def type
19
+ @params ||= get_params
20
+ pattern = %r!^http://.*\.nicovideo\.jp/smile\?(.*?)=.*$!
21
+ CGI.unescape(@params['url']) =~ pattern
22
+ case $1
23
+ when 'm'
24
+ return 'mp4'
25
+ when 's'
26
+ return 'swf'
27
+ else
28
+ return 'flv'
29
+ end
30
+ end
31
+
32
+ def comments(num=500)
33
+ puts_info 'getting comment xml : id = ' + @video_id
34
+ begin
35
+ @params = get_params unless @params
36
+ ms = @params['ms']
37
+ raise ArgError unless ms
38
+
39
+ thread_id = @params['thread_id']
40
+ body = %!<thread res_from="-#{num}" version="20061206" thread="#{thread_id}" />!
41
+ post_url = CGI.unescape(ms)
42
+ comment_xml = @agent.post_data(post_url, body).body
43
+ puts_debug comment_xml
44
+ Comments.new(@video_id, comment_xml)
45
+ end
46
+ end
47
+
48
+ def flv() return video() end
49
+
50
+ def video()
51
+ begin
52
+ @params ||= get_params
53
+ video_url = CGI.unescape(@params['url'])
54
+ video_flv = @agent.get_file(video_url)
55
+ video_flv
56
+ end
57
+ end
58
+
59
+ def title=(title)
60
+ @title = title
61
+ end
62
+
63
+ def openlist(page=1)
64
+ OpenList.new(@agent, @video_id)
65
+ end
66
+
67
+ def low?
68
+ @params ||= get_params
69
+ return true if CGI.unescape(@params['url']) =~ /low$/
70
+ return false
71
+ end
72
+
73
+ private
74
+ def parse(page)
75
+ # title
76
+ @title = page.title.toutf8.sub(/#{BASE_TITLE1}$/ou, '')
77
+
78
+ # tags
79
+ div = page.parser.search("div#video_tags")
80
+ @tags = div.to_html.scan(/<a href=\"tag\/[\w\%]+?\">(.+?)<\/a>/ou).inject([]) {|arr, v|
81
+ puts_debug v[0]
82
+ arr << v[0]
83
+ }
84
+
85
+ # published_at
86
+ str = page.search("div[@id='WATCHHEADER']//p[@class='TXT12']/strong")[0].inner_text
87
+ tm = str.scan(/\d+/)
88
+ @published_at = Time.mktime(*tm)
89
+
90
+ # csrf_token
91
+ @csrf_token = page.search("form[@name='mylist_form']//input[@name='csrf_token']")[0]['value']
92
+ end
93
+
94
+ def get_params
95
+ raise NotFound if @not_found
96
+ begin
97
+ unless @params
98
+ puts_info 'getting params : id = ' + @video_id
99
+ @page ||= get_page(@url)
100
+ content = @agent.get_file(BASE_URL + '/api/getflv?v=' + @video_id)
101
+ puts_debug content
102
+ @params = content.scan(/([^&]+)=([^&]*)/).inject({}){|h, v| h[v[0]] = v[1]; h}
103
+ else
104
+ puts_info 'params have already gotten : id = ' + @video_id
105
+ end
106
+ @params
107
+ rescue
108
+ @not_found = true
109
+ raise NotFound
110
+ end
111
+ end
112
+ end
113
+ end
data/lib/nicovideo.rb ADDED
@@ -0,0 +1,20 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+
3
+ require 'rubygems'
4
+ require 'nicovideo/mechanize-ext'
5
+ require 'nicovideo/base'
6
+ require 'nicovideo/page'
7
+ require 'nicovideo/videopage'
8
+ require 'nicovideo/comments'
9
+ require 'nicovideo/mylist'
10
+ require 'nicovideo/openlist'
11
+ require 'nicovideo/search'
12
+ require 'nicovideo/tagsearch'
13
+ require 'nicovideo/ranking'
14
+ require 'nicovideo/random'
15
+ require 'nicovideo/newarrival'
16
+ require 'nicovideo/thumbnail'
17
+ #require 'nicovideo/tags'
18
+ #require 'nicovideo/ichiba'
19
+ #require 'nicovideo/feed'
20
+
@@ -0,0 +1,42 @@
1
+ require 'rubygems'
2
+ require 'nicovideo'
3
+ require 'yaml'
4
+
5
+ video_ids = ARGV
6
+
7
+ # set account
8
+ account = YAML.load_file(ENV['HOME'] + '/.nicovideo/account.yml')
9
+ mail = account['mail']
10
+ password = account['password']
11
+
12
+ # create instance
13
+ nv = Nicovideo.new(mail, password)
14
+
15
+ # login to Nicovideo (you don't need to login explicitly at v 0.0.4 or later)
16
+ nv.login
17
+
18
+ # get videos and comments
19
+ video_ids.each {|video_id|
20
+
21
+ nv.watch(video_id) {|v|
22
+ # method 'id' and 'video_id' return video ID(string).
23
+ puts 'video id = ' + v.id
24
+
25
+ # method 'title' returns string.
26
+ puts 'title = ' + v.title
27
+
28
+ # method 'tags' returns array of string.
29
+ puts 'tags = ' + v.tags.join(' ')
30
+
31
+ # method 'comments' returns instance of class Comments
32
+ # which has methods 'to_xml', 'to_s'(same).
33
+ puts 'getting comments xml'
34
+ File.open("#{video_id}.xml", "wb") {|f| f.write v.comments(100).to_xml }
35
+
36
+ # method 'flv' and 'video' return raw flv data(binary).
37
+ puts 'getting flv file'
38
+ File.open("#{video_id}.flv", "wb") {|f| f.write v.flv }
39
+ }
40
+
41
+ sleep 3
42
+ }
@@ -0,0 +1,34 @@
1
+ require 'rubygems'
2
+ require 'nicovideo'
3
+ require 'yaml'
4
+
5
+ video_ids = ARGV
6
+
7
+ # set account
8
+ account = YAML.load_file(ENV['HOME'] + '/.nicovideo/account.yml')
9
+ mail = account['mail']
10
+ password = account['password']
11
+
12
+ # create instance
13
+ nv = Nicovideo.new(mail, password)
14
+
15
+ # login to Nicovideo (you don't need to login explicitly at v 0.0.4 or later)
16
+ nv.login
17
+
18
+ # get videos and comments
19
+ video_ids.each {|video_id|
20
+
21
+ # the another way of nv_download
22
+ puts nv.get_title(video_id)
23
+ puts nv.get_tags(video_id).join(' ')
24
+ puts 'getting comments xml'
25
+ File.open("#{video_id}.xml", "wb") {|f|
26
+ f.write nv.get_comments(video_id, 100).to_xml
27
+ }
28
+ puts 'getting flv file'
29
+ File.open("#{video_id}.flv", "wb") {|f|
30
+ f.write nv.get_flv(video_id)
31
+ }
32
+
33
+ sleep 1
34
+ }
@@ -0,0 +1,37 @@
1
+ require 'rubygems'
2
+ require 'nicovideo'
3
+ require 'yaml'
4
+
5
+ mylist_ids = ARGV
6
+
7
+ # set account
8
+ account = YAML.load_file(ENV['HOME'] + '/.nicovideo/account.yml')
9
+ mail = account['mail']
10
+ password = account['password']
11
+
12
+ # create instance
13
+ nv = Nicovideo.new(mail, password)
14
+
15
+ # login to Nicovideo (you don't need to login explicitly at v 0.0.4 or later)
16
+ nv.login
17
+
18
+ # get mylist
19
+ mylist_ids.each {|mylist_id|
20
+
21
+ ml = nv.mylist(mylist_id)
22
+ # method 'id' and 'mylist_id' return mylist ID(string).
23
+ puts 'mylist id = ' + ml.id
24
+
25
+ # method 'title', 'user' and 'description' return string.
26
+ puts 'title = ' + ml.title
27
+ puts 'user = ' + ml.user
28
+ puts 'description = ' + ml.description
29
+
30
+ # method 'videos' returns array of VideoPage.
31
+ videos = ml.videos
32
+ videos.each {|v|
33
+ puts v.id
34
+ }
35
+
36
+ sleep 3
37
+ }
@@ -0,0 +1,35 @@
1
+ require 'rubygems'
2
+ require 'nicovideo'
3
+ require 'yaml'
4
+
5
+ video_ids = ARGV
6
+
7
+ # set account
8
+ account = YAML.load_file(ENV['HOME'] + '/.nicovideo/account.yml')
9
+ mail = account['mail']
10
+ password = account['password']
11
+
12
+ # create instance
13
+ nv = Nicovideo.new(mail, password)
14
+
15
+ # login to Nicovideo (you don't need to login explicitly at v 0.0.4 or later)
16
+ nv.login
17
+
18
+ # get openlist
19
+ video_ids.each {|video_id|
20
+
21
+ ol = nv.openlist(video_id)
22
+ # method 'id' and 'video_id' return video ID(string).
23
+ puts 'video id = ' + ol.id
24
+
25
+ # method 'total_size' returns Fixnum.
26
+ puts 'total_size = ' + ol.total_size.to_s
27
+
28
+ # method 'mylists' returns array of MyList.
29
+ begin
30
+ mls = ol.mylists
31
+ mls.each {|ml| puts ml.id + ':' + ml.title }
32
+ sleep 1
33
+ end while (ol.has_next? && ol.next)
34
+
35
+ }
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/ruby -Ku
2
+
3
+ require 'rubygems'
4
+ require 'nicovideo'
5
+
6
+ conf = YAML.load_file(ENV['HOME'] + '/.nicovideo/account.yml')
7
+ nv = Nicovideo.new(conf['mail'], conf['password'])
8
+
9
+ cnt = 0
10
+ nv.ranking.each do |vp|
11
+ cnt += 1
12
+ printf "%3d位 %-12s %s \n", cnt, vp.video_id, vp.title
13
+ end
14
+
15
+ =begin
16
+ # when using more options
17
+ nv.ranking(type='mylist', span='daily', category='all', pagenum='1').each do |vp|
18
+ cnt += 1
19
+ printf "%3d位 %-12s %s \n", cnt, vp.video_id, vp.title
20
+ end
21
+ =end
data/test/runner.rb ADDED
@@ -0,0 +1,3 @@
1
+ require File.dirname(__FILE__) + '/test_helper.rb'
2
+
3
+ Test::Unit::AutoRunner.run(true, File.dirname(__FILE__))
@@ -0,0 +1,2 @@
1
+ require 'test/unit'
2
+ require File.dirname(__FILE__) + '/../lib/nicovideo'