emergent-nicovideo 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
data/ChangeLog ADDED
@@ -0,0 +1,53 @@
1
+ 2008-10-09 version 0.1.7
2
+
3
+ * fixed subtitle of Nicovideo (summer -> autumn)
4
+ * fixed mechanize-ext for mechanize 0.8.4
5
+
6
+ 2008-07-23 version 0.1.6
7
+
8
+ * added forgotten file to be added
9
+
10
+ 2008-07-17 version 0.1.5
11
+
12
+ * fixed subtitle of Nicovideo (SP1 -> summer)
13
+
14
+ 2008-03-20 version 0.1.4
15
+
16
+ * added function Nicovideo#newarrival and Nicovideo::VideoPage#type
17
+
18
+ 2008-03-16 version 0.1.3
19
+
20
+ * fixed unable to download with mechanize 0.7.1 or later
21
+
22
+ 2008-03-15 version 0.1.2
23
+
24
+ * removed outputs for debug
25
+ * fixed internal search parameters
26
+ * fixed xpath for sp1
27
+
28
+ 2008-02-25 version 0.1.1
29
+
30
+ * fixed typo in Nicovideo::Ranking#url
31
+ * added sample using Nicovideo#ranking method
32
+ (ref: http://d.hatena.ne.jp/hayori/20080225/1203911215)
33
+
34
+ 2008-02-24 version 0.1.0
35
+
36
+ * added functions to search, tagsearch, ranking
37
+ * added low?() method(ref: http://d.hatena.ne.jp/hayori/20080218/1203312604)
38
+
39
+ 2008-02-16 version 0.0.6
40
+
41
+ * fixed bug of downloading video via mylist
42
+
43
+ 2008-02-03 version 0.0.5
44
+
45
+ * added the getter method to Nicovideo::Base#agent(WWW::Mechanize object)
46
+ * fixed typo of agent's get method
47
+
48
+ 2008-02-02 version 0.0.4
49
+
50
+ * added functions to get mylist and openlist
51
+ * added auto login (ref: http://d.hatena.ne.jp/zorio/20080122/1201018583)
52
+ * added published_at method to VideoPage
53
+ * did unit tests
data/README.txt ADDED
@@ -0,0 +1,37 @@
1
+ = Nicovideo
2
+
3
+ Nicovideo is scraping NICONICO DOUGA utility for Ruby
4
+ (NICONICO DOUGA is Japanese Video Sharing Service)
5
+
6
+ - NICONICO DOUGA : http://www.nicovideo.jp/
7
+
8
+ == Installation
9
+
10
+ $ sudo gem install nicovideo
11
+
12
+ == Usage
13
+
14
+ See sample/nv_download.rb
15
+
16
+ == Author
17
+
18
+ - Satoshi Yoshikawa / emergent ( http://d.hatena.ne.jp/emergent )
19
+ - thanks for many bloggers who wrote scraping scripts
20
+
21
+ == License
22
+
23
+ - 2-clause BSD License
24
+
25
+
26
+
27
+
28
+
29
+
30
+
31
+
32
+
33
+
34
+
35
+
36
+
37
+
data/Rakefile ADDED
@@ -0,0 +1,30 @@
1
+ class NilClass
2
+ def empty?
3
+ true
4
+ end
5
+ end
6
+
7
+ require 'config/requirements'
8
+ require 'config/hoe' # setup Hoe + all gem configuration
9
+
10
+ require 'rake/contrib/rubyforgepublisher'
11
+
12
+ Dir['tasks/**/*.rake'].each { |rake| load rake }
13
+
14
+ Rake::RDocTask.new do |rdoc|
15
+ rdoc.rdoc_dir = 'html'
16
+ rdoc.options += RDOC_OPTS
17
+ rdoc.template = "#{ENV['template']}.rb" if ENV['template']
18
+ if ENV['DOC_FILES']
19
+ rdoc.rdoc_files.include(ENV['DOC_FILES'].split(/,\s*/))
20
+ else
21
+ rdoc.rdoc_files.include('README.txt', 'ChangeLog')
22
+ rdoc.rdoc_files.include('lib/**/*.rb')
23
+ end
24
+ end
25
+
26
+ desc "Publish to RubyForge"
27
+ task :rubyforge => [:rdoc, :package] do
28
+ Rake::RubyForgePublisher.new(RUBYFORGE_PROJECT, 'emergent').upload
29
+ end
30
+
data/config/hoe.rb ADDED
@@ -0,0 +1,73 @@
1
+ require 'nicovideo/version'
2
+
3
+ AUTHOR = 'emergent' # can also be an array of Authors
4
+ EMAIL = "emergent22 (at) livedoor.com"
5
+ DESCRIPTION = "utils for nicovideo"
6
+ GEM_NAME = 'nicovideo' # what ppl will type to install your gem
7
+ RUBYFORGE_PROJECT = 'nicovideo' # The unix name for your project
8
+ HOMEPATH = "http://#{RUBYFORGE_PROJECT}.rubyforge.org"
9
+ DOWNLOAD_PATH = "http://rubyforge.org/projects/#{RUBYFORGE_PROJECT}"
10
+
11
+ @config_file = "~/.rubyforge/user-config.yml"
12
+ @config = nil
13
+ RUBYFORGE_USERNAME = "unknown"
14
+ def rubyforge_username
15
+ unless @config
16
+ begin
17
+ @config = YAML.load(File.read(File.expand_path(@config_file)))
18
+ rescue
19
+ puts <<-EOS
20
+ ERROR: No rubyforge config file found: #{@config_file}
21
+ Run 'rubyforge setup' to prepare your env for access to Rubyforge
22
+ - See http://newgem.rubyforge.org/rubyforge.html for more details
23
+ EOS
24
+ exit
25
+ end
26
+ end
27
+ RUBYFORGE_USERNAME.replace @config["username"]
28
+ end
29
+
30
+
31
+ REV = nil
32
+ # UNCOMMENT IF REQUIRED:
33
+ # REV = `svn info`.each {|line| if line =~ /^Revision:/ then k,v = line.split(': '); break v.chomp; else next; end} rescue nil
34
+ VERS = Nicovideo::VERSION::STRING + (REV ? ".#{REV}" : "")
35
+ RDOC_OPTS = ['--quiet', '--title', 'nicovideo documentation',
36
+ "--opname", "index.html",
37
+ "--line-numbers",
38
+ "--main", "README",
39
+ "--inline-source"]
40
+
41
+ class Hoe
42
+ def extra_deps
43
+ @extra_deps.reject! { |x| Array(x).first == 'hoe' }
44
+ @extra_deps
45
+ end
46
+ end
47
+
48
+ # Generate all the Rake tasks
49
+ # Run 'rake -T' to see list of generated tasks (from gem root directory)
50
+ hoe = Hoe.new(GEM_NAME, VERS) do |p|
51
+ p.author = AUTHOR
52
+ p.description = DESCRIPTION
53
+ p.email = EMAIL
54
+ p.summary = DESCRIPTION
55
+ p.url = HOMEPATH
56
+ p.rubyforge_name = RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
57
+ p.test_globs = ["test/**/test_*.rb"]
58
+ p.clean_globs |= ['**/.*.sw?', '*.gem', '.config', '**/.DS_Store'] #An array of file patterns to delete on clean.
59
+
60
+ # == Optional
61
+ #p.changes = p.paragraphs_of("History.txt", 0..1).join("\\n\\n")
62
+ p.changes = p.paragraphs_of("ChangeLog", 0..1).join("\\n\\n")
63
+ #p.extra_deps = [] # An array of rubygem dependencies [name, version], e.g. [ ['active_support', '>= 1.3.1'] ]
64
+ p.extra_deps = [
65
+ ['mechanize', '>=0.6.0']
66
+ ]
67
+ #p.spec_extras = {} # A hash of extra values to set in the gemspec.
68
+ end
69
+
70
+ #CHANGES = hoe.paragraphs_of('History.txt', 0..1).join("\\n\\n")
71
+ CHANGES = hoe.paragraphs_of('ChangeLog', 0..1).join("\\n\\n")
72
+ PATH = (RUBYFORGE_PROJECT == GEM_NAME) ? RUBYFORGE_PROJECT : "#{RUBYFORGE_PROJECT}/#{GEM_NAME}"
73
+ hoe.remote_rdoc_dir = File.join(PATH.gsub(/^#{RUBYFORGE_PROJECT}\/?/,''), 'rdoc')
@@ -0,0 +1,17 @@
1
+ require 'fileutils'
2
+ include FileUtils
3
+
4
+ require 'rubygems'
5
+ %w[rake hoe newgem rubigen].each do |req_gem|
6
+ begin
7
+ require req_gem
8
+ rescue LoadError
9
+ puts "This Rakefile requires the '#{req_gem}' RubyGem."
10
+ puts "Installation: gem install #{req_gem} -y"
11
+ exit
12
+ end
13
+ end
14
+
15
+ $:.unshift(File.join(File.dirname(__FILE__), %w[.. lib]))
16
+
17
+ require 'nicovideo'
@@ -0,0 +1,154 @@
1
+ module Nicovideo
2
+
3
+ class ArgError < StandardError ; end
4
+ class LoginError < StandardError ; end
5
+ class NotFound < StandardError ; end
6
+ class Forbidden < StandardError ; end
7
+
8
+ class Base
9
+
10
+ def initialize mail=nil, password=nil, auto_login=true
11
+ @mail = mail
12
+ @password = password
13
+ @agent = WWW::Mechanize.new()
14
+ agent_init(auto_login)
15
+ @agent.set_account(@mail, @password)
16
+
17
+ # for parameters current video
18
+ @vp = nil
19
+ self
20
+ end
21
+
22
+ attr_reader :agent
23
+
24
+ def agent_init auto_login=true
25
+ @agent.instance_eval do
26
+ alias raw_get get
27
+ alias raw_post post
28
+
29
+ def set_account(mail, password) @mail=mail; @password=password end
30
+ def authenticated?(page)
31
+ page.header['x-niconico-authflag'] != '0'
32
+ end
33
+
34
+ def login
35
+ raise ArgError unless (@mail && @password)
36
+ account = {'mail' => @mail, 'password' => @password }
37
+ res = raw_post('https://secure.nicovideo.jp/secure/login?site=niconico', account)
38
+ raise LoginError unless authenticated?(res)
39
+ end
40
+ end
41
+
42
+ if auto_login
43
+ @agent.instance_eval do
44
+ @wait_time = 3
45
+ def get(*args) try(:raw_get, *args) end
46
+ def post(*args) try(:raw_post, *args) end
47
+
48
+ def try(name, *args)
49
+ page = method(name).call(*args)
50
+ unless authenticated?(page)
51
+ self.login
52
+ sleep @wait_time
53
+ page = method(name).call(*args)
54
+ raise LoginError unless authenticated?(page)
55
+ end
56
+ page
57
+ end
58
+ end
59
+ end
60
+
61
+ end
62
+
63
+ def login mail=nil, password=nil
64
+ @mail ||= mail
65
+ @password ||= password
66
+ @agent.set_account(@mail, @password)
67
+ @agent.login
68
+ self
69
+ end
70
+
71
+ def watch(video_id)
72
+ videopage = get_videopage(video_id)
73
+ @vp = videopage
74
+ if block_given?
75
+ yield videopage
76
+ end
77
+ videopage
78
+ end
79
+
80
+ def get_tags(video_id)
81
+ get_videopage(video_id).tags
82
+ end
83
+
84
+ def get_title(video_id)
85
+ get_videopage(video_id).title
86
+ end
87
+
88
+ def get_video(video_id)
89
+ self.get_flv(video_id)
90
+ end
91
+
92
+ def get_flv(video_id)
93
+ get_videopage(video_id).flv
94
+ end
95
+
96
+ def get_comments video_id, num=500
97
+ get_videopage(video_id).comments(num)
98
+ end
99
+
100
+ def mylist(mylist_id)
101
+ MyList.new(@agent, mylist_id)
102
+ end
103
+
104
+ def openlist(video_id)
105
+ OpenList.new(@agent, video_id)
106
+ end
107
+
108
+ def random()
109
+ Random.new(@agent)
110
+ end
111
+
112
+ def newarrival(pagenum=1)
113
+ Newarrival.new(@agent,pagenum)
114
+ end
115
+
116
+ # type : 'mylist', 'view' or 'res'
117
+ # span : 'daily', 'newarrival', 'weekly', 'monthly', 'total'
118
+ # category : 'all', 'music' ... and more
119
+ def ranking(type='mylist', span='daily', category='all', pagenum=nil)
120
+ Ranking.new(@agent, type, span, category, pagenum).to_a
121
+ end
122
+
123
+ # keyword : search keyword
124
+ # sort : nil -> published date
125
+ # 'v' -> playback times
126
+ # 'n' -> commented date
127
+ # 'r' -> comment number
128
+ # 'm' -> mylist number
129
+ def search(keyword, sort=nil, order=nil, pagenum=1)
130
+ Search.new(@agent, keyword, sort, order, pagenum)
131
+ end
132
+
133
+ def tagsearch(keyword, sort=nil, order=nil, pagenum=1)
134
+ TagSearch.new(@agent, keyword, sort, order, pagenum)
135
+ end
136
+
137
+ private
138
+ def get_videopage(video_id)
139
+ if @vp.nil? || video_id != @vp.video_id
140
+ @vp = VideoPage.new(@agent, video_id)
141
+ end
142
+ @vp
143
+ end
144
+
145
+ end
146
+
147
+ def Nicovideo.new(mail, password)
148
+ Base.new(mail, password)
149
+ end
150
+
151
+ def Nicovideo.login(mail, password)
152
+ Base.new(mail, password).login
153
+ end
154
+ end
@@ -0,0 +1,9 @@
1
+ class Nicovideo::Comments
2
+ def initialize video_id, xml
3
+ @video_id = video_id
4
+ @xml = xml
5
+ end
6
+
7
+ def to_s() @xml.to_s end
8
+ def to_xml() @xml.to_s end
9
+ end
@@ -0,0 +1,33 @@
1
+ # file: mechanize-ext.rb
2
+ require 'mechanize'
3
+
4
+ module WWW
5
+ class Mechanize
6
+ self.class_eval {
7
+ def post_data(url, data='', enctype=nil)
8
+ cur_page = current_page || Page.new( nil, {'content-type'=>'text/html'})
9
+
10
+ request_data = data
11
+
12
+ log.debug("query: #{ request_data.inspect }") if log
13
+
14
+ # fetch the page
15
+ page = fetch_page( :uri => url,
16
+ :referer => cur_page,
17
+ :verb => :post,
18
+ :params => [request_data],
19
+ :headers => {
20
+ 'Content-Length' => request_data.size.to_s,
21
+ })
22
+ add_to_history(page)
23
+ page
24
+ end
25
+
26
+ class File
27
+ def path
28
+ return @uri.path
29
+ end
30
+ end
31
+ }
32
+ end
33
+ end
@@ -0,0 +1,77 @@
1
+ require 'kconv'
2
+
3
+
4
+ require 'rss/2.0'
5
+ require 'rss/1.0'
6
+ require 'json'
7
+
8
+ module Nicovideo
9
+ class MyList < Page
10
+ NICO_MYLIST = 'マイリスト'
11
+
12
+ def initialize agent, mylist_id
13
+ super(agent)
14
+ @mylist_id = mylist_id
15
+ @raw_url = BASE_URL + '/mylist/' + @mylist_id
16
+ @url = BASE_URL + '/mylist/' + @mylist_id + '?rss=2.0'
17
+
18
+ params = ["title", "user", "description", "videos", "rss"]
19
+ self.register_getter params
20
+ end
21
+
22
+ attr_reader :myliset_id
23
+
24
+ def id() @mylist_id end
25
+ def url() @raw_url end
26
+
27
+ def add(video_id)
28
+ video_page = VideoPage.new @agent, video_id
29
+
30
+ begin
31
+ add_result = @agent.post(video_page.url, {
32
+ :mylist => "add",
33
+ :mylistgroup_name => "",
34
+ :csrf_token => video_page.csrf_token,
35
+ :group_id => @mylist_id,
36
+ :ajax => "1"})
37
+
38
+ result_code = JSON.parse(add_result.body.sub(/^\(?(.*?)\)?$/, '\1'))
39
+
40
+ if result_code["result"] == "success" then
41
+ # added video isn't applied to rss immediately, so add video into list by hand.
42
+ page = @page || get_page(@url)
43
+ @videos << video_page
44
+ return self
45
+ end
46
+ raise ArgError if result_code["result"] == "duperror"
47
+ raise StandardError
48
+ rescue WWW::Mechanize::ResponseCodeError => e
49
+ rc = e.response_code
50
+ puts_info rc
51
+ if rc == "404" || rc == "410"
52
+ @not_found = true
53
+ raise NotFound
54
+ elsif rc == "403"
55
+ raise Forbidden
56
+ else
57
+ raise e
58
+ end
59
+ end
60
+ end
61
+
62
+ private
63
+ def parse(page)
64
+ @rss = RSS::Parser.parse(page.body)
65
+ @title = rss.channel.title.sub(/#{BASE_TITLE2+NICO_MYLIST} /, '')
66
+ @user = rss.channel.managingEditor
67
+ @description = rss.channel.description
68
+
69
+ @videos = rss.items.collect {|i|
70
+ vp = VideoPage.new(@agent, i.link.sub(/^.*watch\/(\w+)$/, '\1'))
71
+ vp.title = i.title
72
+ vp
73
+ }
74
+
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,34 @@
1
+ module Nicovideo
2
+ class Newarrival < Page
3
+ include Enumerable
4
+
5
+ def initialize(agent, pagenum)
6
+ super(agent)
7
+
8
+ @pagenum = pagenum > 10 ? 10 : pagenum
9
+
10
+ params = ["videos"]
11
+ self.register_getter params
12
+
13
+ @url = url()
14
+ end
15
+
16
+ def parse(page)
17
+ result_xpath = page/'//div[@class="cmn_thumb_R"]//p[@class="TXT12"]/a[@class="video"]'
18
+ @videos = result_xpath.inject([]) do |arr, v|
19
+ vp = VideoPage.new(@agent, v.attributes['href'].sub(/watch\/(\w+)$/,'\1'))
20
+ vp.title = v.inner_html
21
+ arr << vp
22
+ end
23
+ end
24
+
25
+ def each
26
+ self.videos.each {|v| yield v }
27
+ end
28
+
29
+ def url
30
+ opt = '?page=' + @pagenum.to_s if @pagenum
31
+ "#{BASE_URL}/newarrival#{opt}"
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,96 @@
1
+ require 'kconv'
2
+
3
+ module Nicovideo
4
+
5
+ # This class doesn't access NICONICO DOUGA when an instance created.
6
+ # At the first time you call this instance method, this accesses NICONICO
7
+ class OpenList < Page
8
+ include Enumerable
9
+
10
+ def initialize agent, video_id, sort='c', order='d', pagenum=1
11
+ super(agent)
12
+ @video_id = video_id
13
+ @pagenum = pagenum
14
+ @sort = sort
15
+ @order = order
16
+ @url = url()
17
+
18
+ params = ["mylists", "total_size", "has_next?", "has_prev?"]
19
+ self.register_getter params
20
+ end
21
+
22
+ attr_reader :pagenum
23
+
24
+ def id() @video_id end
25
+
26
+ # call whenever pagenum changed
27
+ def url
28
+ @url = BASE_URL + '/openlist/' + @video_id + "?page=#{@pagenum}&sort=#{@sort}&order=#{@order}"
29
+ end
30
+
31
+ def each
32
+ self.mylists.each {|ml|
33
+ yield ml
34
+ }
35
+ end
36
+
37
+ def to_a() self.mylists end
38
+
39
+ def pagenum=(pagenum)
40
+ if @pagenum != pagenum
41
+ @pagenum = pagenum
42
+ get_page(self.url, true)
43
+ end
44
+ @pagenum
45
+ end
46
+
47
+ def page=(pagenum)
48
+ self.pagenum = pagenum
49
+ self
50
+ end
51
+
52
+ def next
53
+ self.pagenum = @pagenum + 1
54
+ self
55
+ end
56
+
57
+ def prev
58
+ self.pagenum = @pagenum - 1
59
+ self
60
+ end
61
+
62
+ protected
63
+ def parse(page)
64
+ if page.body =~ /<strong>#{@video_id}<\/strong>を含む公開マイリストはありません。/
65
+ @not_found = true
66
+ raise NotFound
67
+ end
68
+
69
+ @total_size = page.search('//form[@name="sort"]//td[@class="TXT12"]//strong').first.inner_html.sub(/,/,'').to_i
70
+
71
+ @has_next = false
72
+ @has_prev = false
73
+ respages = page/'//div[@class="mb16p4"]//p[@class="TXT12"]//a'
74
+ puts_info respages.size
75
+ respages.each {|r| puts_info r.inner_html }
76
+ if respages.size > 0
77
+ respages.each {|text|
78
+ if text.inner_html =~ /前のページ/
79
+ @has_prev = true
80
+ end
81
+ if text.inner_html =~ /次のページ/
82
+ @has_next = true
83
+ end
84
+ }
85
+ end
86
+
87
+ scanpattern = /<a href=\"#{BASE_URL}\/mylist\/(\d+)\">(.+?)<\/a>/ou
88
+ listrefs = page.parser.to_html.scan(scanpattern)
89
+ @mylists = listrefs.inject([]) {|arr, v| # v[0]: mylist id, v[1]: mylist title
90
+ ml = MyList.new(@agent, v[0])
91
+ ml.title = v[1]
92
+ arr << ml
93
+ }
94
+ end
95
+ end
96
+ end