emergent-nicovideo 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ChangeLog ADDED
@@ -0,0 +1,53 @@
1
+ 2008-10-09 version 0.1.7
2
+
3
+ * fixed subtitle of Nicovideo (summer -> autumn)
4
+ * fixed mechanize-ext for mechanize 0.8.4
5
+
6
+ 2008-07-23 version 0.1.6
7
+
8
+ * added forgotten file to be added
9
+
10
+ 2008-07-17 version 0.1.5
11
+
12
+ * fixed subtitle of Nicovideo (SP1 -> summer)
13
+
14
+ 2008-03-20 version 0.1.4
15
+
16
+ * added function Nicovideo#newarrival and Nicovideo::VideoPage#type
17
+
18
+ 2008-03-16 version 0.1.3
19
+
20
+ * fixed unable to download with mechanize 0.7.1 or later
21
+
22
+ 2008-03-15 version 0.1.2
23
+
24
+ * removed outputs for debug
25
+ * fixed internal search parameters
26
+ * fixed xpath for sp1
27
+
28
+ 2008-02-25 version 0.1.1
29
+
30
+ * fixed typo in Nicovideo::Ranking#url
31
+ * added sample using Nicovideo#ranking method
32
+ (ref: http://d.hatena.ne.jp/hayori/20080225/1203911215)
33
+
34
+ 2008-02-24 version 0.1.0
35
+
36
+ * added functions to search, tagsearch, ranking
37
+ * added low?() method(ref: http://d.hatena.ne.jp/hayori/20080218/1203312604)
38
+
39
+ 2008-02-16 version 0.0.6
40
+
41
+ * fixed bug of downloading video via mylist
42
+
43
+ 2008-02-03 version 0.0.5
44
+
45
+ * added the getter method to Nicovideo::Base#agent(WWW::Mechanize object)
46
+ * fixed typo of agent's get method
47
+
48
+ 2008-02-02 version 0.0.4
49
+
50
+ * added functions to get mylist and openlist
51
+ * added auto login (ref: http://d.hatena.ne.jp/zorio/20080122/1201018583)
52
+ * added published_at method to VideoPage
53
+ * did unit tests
data/README.txt ADDED
@@ -0,0 +1,37 @@
1
+ = Nicovideo
2
+
3
+ Nicovideo is scraping NICONICO DOUGA utility for Ruby
4
+ (NICONICO DOUGA is Japanese Video Sharing Service)
5
+
6
+ - NICONICO DOUGA : http://www.nicovideo.jp/
7
+
8
+ == Installation
9
+
10
+ $ sudo gem install nicovideo
11
+
12
+ == Usage
13
+
14
+ See sample/nv_download.rb
15
+
16
+ == Author
17
+
18
+ - Satoshi Yoshikawa / emergent ( http://d.hatena.ne.jp/emergent )
19
+ - thanks for many bloggers who wrote scraping scripts
20
+
21
+ == License
22
+
23
+ - 2-clause BSD License
24
+
25
+
26
+
27
+
28
+
29
+
30
+
31
+
32
+
33
+
34
+
35
+
36
+
37
+
data/Rakefile ADDED
@@ -0,0 +1,30 @@
1
+ class NilClass
2
+ def empty?
3
+ true
4
+ end
5
+ end
6
+
7
+ require 'config/requirements'
8
+ require 'config/hoe' # setup Hoe + all gem configuration
9
+
10
+ require 'rake/contrib/rubyforgepublisher'
11
+
12
+ Dir['tasks/**/*.rake'].each { |rake| load rake }
13
+
14
+ Rake::RDocTask.new do |rdoc|
15
+ rdoc.rdoc_dir = 'html'
16
+ rdoc.options += RDOC_OPTS
17
+ rdoc.template = "#{ENV['template']}.rb" if ENV['template']
18
+ if ENV['DOC_FILES']
19
+ rdoc.rdoc_files.include(ENV['DOC_FILES'].split(/,\s*/))
20
+ else
21
+ rdoc.rdoc_files.include('README.txt', 'ChangeLog')
22
+ rdoc.rdoc_files.include('lib/**/*.rb')
23
+ end
24
+ end
25
+
26
+ desc "Publish to RubyForge"
27
+ task :rubyforge => [:rdoc, :package] do
28
+ Rake::RubyForgePublisher.new(RUBYFORGE_PROJECT, 'emergent').upload
29
+ end
30
+
data/config/hoe.rb ADDED
@@ -0,0 +1,73 @@
1
+ require 'nicovideo/version'
2
+
3
+ AUTHOR = 'emergent' # can also be an array of Authors
4
+ EMAIL = "emergent22 (at) livedoor.com"
5
+ DESCRIPTION = "utils for nicovideo"
6
+ GEM_NAME = 'nicovideo' # what ppl will type to install your gem
7
+ RUBYFORGE_PROJECT = 'nicovideo' # The unix name for your project
8
+ HOMEPATH = "http://#{RUBYFORGE_PROJECT}.rubyforge.org"
9
+ DOWNLOAD_PATH = "http://rubyforge.org/projects/#{RUBYFORGE_PROJECT}"
10
+
11
+ @config_file = "~/.rubyforge/user-config.yml"
12
+ @config = nil
13
+ RUBYFORGE_USERNAME = "unknown"
14
+ def rubyforge_username
15
+ unless @config
16
+ begin
17
+ @config = YAML.load(File.read(File.expand_path(@config_file)))
18
+ rescue
19
+ puts <<-EOS
20
+ ERROR: No rubyforge config file found: #{@config_file}
21
+ Run 'rubyforge setup' to prepare your env for access to Rubyforge
22
+ - See http://newgem.rubyforge.org/rubyforge.html for more details
23
+ EOS
24
+ exit
25
+ end
26
+ end
27
+ RUBYFORGE_USERNAME.replace @config["username"]
28
+ end
29
+
30
+
31
+ REV = nil
32
+ # UNCOMMENT IF REQUIRED:
33
+ # REV = `svn info`.each {|line| if line =~ /^Revision:/ then k,v = line.split(': '); break v.chomp; else next; end} rescue nil
34
+ VERS = Nicovideo::VERSION::STRING + (REV ? ".#{REV}" : "")
35
+ RDOC_OPTS = ['--quiet', '--title', 'nicovideo documentation',
36
+ "--opname", "index.html",
37
+ "--line-numbers",
38
+ "--main", "README",
39
+ "--inline-source"]
40
+
41
+ class Hoe
42
+ def extra_deps
43
+ @extra_deps.reject! { |x| Array(x).first == 'hoe' }
44
+ @extra_deps
45
+ end
46
+ end
47
+
48
+ # Generate all the Rake tasks
49
+ # Run 'rake -T' to see list of generated tasks (from gem root directory)
50
+ hoe = Hoe.new(GEM_NAME, VERS) do |p|
51
+ p.author = AUTHOR
52
+ p.description = DESCRIPTION
53
+ p.email = EMAIL
54
+ p.summary = DESCRIPTION
55
+ p.url = HOMEPATH
56
+ p.rubyforge_name = RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
57
+ p.test_globs = ["test/**/test_*.rb"]
58
+ p.clean_globs |= ['**/.*.sw?', '*.gem', '.config', '**/.DS_Store'] #An array of file patterns to delete on clean.
59
+
60
+ # == Optional
61
+ #p.changes = p.paragraphs_of("History.txt", 0..1).join("\\n\\n")
62
+ p.changes = p.paragraphs_of("ChangeLog", 0..1).join("\\n\\n")
63
+ #p.extra_deps = [] # An array of rubygem dependencies [name, version], e.g. [ ['active_support', '>= 1.3.1'] ]
64
+ p.extra_deps = [
65
+ ['mechanize', '>=0.6.0']
66
+ ]
67
+ #p.spec_extras = {} # A hash of extra values to set in the gemspec.
68
+ end
69
+
70
+ #CHANGES = hoe.paragraphs_of('History.txt', 0..1).join("\\n\\n")
71
+ CHANGES = hoe.paragraphs_of('ChangeLog', 0..1).join("\\n\\n")
72
+ PATH = (RUBYFORGE_PROJECT == GEM_NAME) ? RUBYFORGE_PROJECT : "#{RUBYFORGE_PROJECT}/#{GEM_NAME}"
73
+ hoe.remote_rdoc_dir = File.join(PATH.gsub(/^#{RUBYFORGE_PROJECT}\/?/,''), 'rdoc')
@@ -0,0 +1,17 @@
1
+ require 'fileutils'
2
+ include FileUtils
3
+
4
+ require 'rubygems'
5
+ %w[rake hoe newgem rubigen].each do |req_gem|
6
+ begin
7
+ require req_gem
8
+ rescue LoadError
9
+ puts "This Rakefile requires the '#{req_gem}' RubyGem."
10
+ puts "Installation: gem install #{req_gem} -y"
11
+ exit
12
+ end
13
+ end
14
+
15
+ $:.unshift(File.join(File.dirname(__FILE__), %w[.. lib]))
16
+
17
+ require 'nicovideo'
@@ -0,0 +1,154 @@
1
+ module Nicovideo
2
+
3
+ class ArgError < StandardError ; end
4
+ class LoginError < StandardError ; end
5
+ class NotFound < StandardError ; end
6
+ class Forbidden < StandardError ; end
7
+
8
+ class Base
9
+
10
+ def initialize mail=nil, password=nil, auto_login=true
11
+ @mail = mail
12
+ @password = password
13
+ @agent = WWW::Mechanize.new()
14
+ agent_init(auto_login)
15
+ @agent.set_account(@mail, @password)
16
+
17
+ # for parameters current video
18
+ @vp = nil
19
+ self
20
+ end
21
+
22
+ attr_reader :agent
23
+
24
+ def agent_init auto_login=true
25
+ @agent.instance_eval do
26
+ alias raw_get get
27
+ alias raw_post post
28
+
29
+ def set_account(mail, password) @mail=mail; @password=password end
30
+ def authenticated?(page)
31
+ page.header['x-niconico-authflag'] != '0'
32
+ end
33
+
34
+ def login
35
+ raise ArgError unless (@mail && @password)
36
+ account = {'mail' => @mail, 'password' => @password }
37
+ res = raw_post('https://secure.nicovideo.jp/secure/login?site=niconico', account)
38
+ raise LoginError unless authenticated?(res)
39
+ end
40
+ end
41
+
42
+ if auto_login
43
+ @agent.instance_eval do
44
+ @wait_time = 3
45
+ def get(*args) try(:raw_get, *args) end
46
+ def post(*args) try(:raw_post, *args) end
47
+
48
+ def try(name, *args)
49
+ page = method(name).call(*args)
50
+ unless authenticated?(page)
51
+ self.login
52
+ sleep @wait_time
53
+ page = method(name).call(*args)
54
+ raise LoginError unless authenticated?(page)
55
+ end
56
+ page
57
+ end
58
+ end
59
+ end
60
+
61
+ end
62
+
63
+ def login mail=nil, password=nil
64
+ @mail ||= mail
65
+ @password ||= password
66
+ @agent.set_account(@mail, @password)
67
+ @agent.login
68
+ self
69
+ end
70
+
71
+ def watch(video_id)
72
+ videopage = get_videopage(video_id)
73
+ @vp = videopage
74
+ if block_given?
75
+ yield videopage
76
+ end
77
+ videopage
78
+ end
79
+
80
+ def get_tags(video_id)
81
+ get_videopage(video_id).tags
82
+ end
83
+
84
+ def get_title(video_id)
85
+ get_videopage(video_id).title
86
+ end
87
+
88
+ def get_video(video_id)
89
+ self.get_flv(video_id)
90
+ end
91
+
92
+ def get_flv(video_id)
93
+ get_videopage(video_id).flv
94
+ end
95
+
96
+ def get_comments video_id, num=500
97
+ get_videopage(video_id).comments(num)
98
+ end
99
+
100
+ def mylist(mylist_id)
101
+ MyList.new(@agent, mylist_id)
102
+ end
103
+
104
+ def openlist(video_id)
105
+ OpenList.new(@agent, video_id)
106
+ end
107
+
108
+ def random()
109
+ Random.new(@agent)
110
+ end
111
+
112
+ def newarrival(pagenum=1)
113
+ Newarrival.new(@agent,pagenum)
114
+ end
115
+
116
+ # type : 'mylist', 'view' or 'res'
117
+ # span : 'daily', 'newarrival', 'weekly', 'monthly', 'total'
118
+ # category : 'all', 'music' ... and more
119
+ def ranking(type='mylist', span='daily', category='all', pagenum=nil)
120
+ Ranking.new(@agent, type, span, category, pagenum).to_a
121
+ end
122
+
123
+ # keyword : search keyword
124
+ # sort : nil -> published date
125
+ # 'v' -> playback times
126
+ # 'n' -> commented date
127
+ # 'r' -> comment number
128
+ # 'm' -> mylist number
129
+ def search(keyword, sort=nil, order=nil, pagenum=1)
130
+ Search.new(@agent, keyword, sort, order, pagenum)
131
+ end
132
+
133
+ def tagsearch(keyword, sort=nil, order=nil, pagenum=1)
134
+ TagSearch.new(@agent, keyword, sort, order, pagenum)
135
+ end
136
+
137
+ private
138
+ def get_videopage(video_id)
139
+ if @vp.nil? || video_id != @vp.video_id
140
+ @vp = VideoPage.new(@agent, video_id)
141
+ end
142
+ @vp
143
+ end
144
+
145
+ end
146
+
147
+ def Nicovideo.new(mail, password)
148
+ Base.new(mail, password)
149
+ end
150
+
151
+ def Nicovideo.login(mail, password)
152
+ Base.new(mail, password).login
153
+ end
154
+ end
@@ -0,0 +1,9 @@
1
+ class Nicovideo::Comments
2
+ def initialize video_id, xml
3
+ @video_id = video_id
4
+ @xml = xml
5
+ end
6
+
7
+ def to_s() @xml.to_s end
8
+ def to_xml() @xml.to_s end
9
+ end
@@ -0,0 +1,33 @@
1
+ # file: mechanize-ext.rb
2
+ require 'mechanize'
3
+
4
+ module WWW
5
+ class Mechanize
6
+ self.class_eval {
7
+ def post_data(url, data='', enctype=nil)
8
+ cur_page = current_page || Page.new( nil, {'content-type'=>'text/html'})
9
+
10
+ request_data = data
11
+
12
+ log.debug("query: #{ request_data.inspect }") if log
13
+
14
+ # fetch the page
15
+ page = fetch_page( :uri => url,
16
+ :referer => cur_page,
17
+ :verb => :post,
18
+ :params => [request_data],
19
+ :headers => {
20
+ 'Content-Length' => request_data.size.to_s,
21
+ })
22
+ add_to_history(page)
23
+ page
24
+ end
25
+
26
+ class File
27
+ def path
28
+ return @uri.path
29
+ end
30
+ end
31
+ }
32
+ end
33
+ end
@@ -0,0 +1,77 @@
1
+ require 'kconv'
2
+
3
+
4
+ require 'rss/2.0'
5
+ require 'rss/1.0'
6
+ require 'json'
7
+
8
+ module Nicovideo
9
+ class MyList < Page
10
+ NICO_MYLIST = 'マイリスト'
11
+
12
+ def initialize agent, mylist_id
13
+ super(agent)
14
+ @mylist_id = mylist_id
15
+ @raw_url = BASE_URL + '/mylist/' + @mylist_id
16
+ @url = BASE_URL + '/mylist/' + @mylist_id + '?rss=2.0'
17
+
18
+ params = ["title", "user", "description", "videos", "rss"]
19
+ self.register_getter params
20
+ end
21
+
22
+ attr_reader :myliset_id
23
+
24
+ def id() @mylist_id end
25
+ def url() @raw_url end
26
+
27
+ def add(video_id)
28
+ video_page = VideoPage.new @agent, video_id
29
+
30
+ begin
31
+ add_result = @agent.post(video_page.url, {
32
+ :mylist => "add",
33
+ :mylistgroup_name => "",
34
+ :csrf_token => video_page.csrf_token,
35
+ :group_id => @mylist_id,
36
+ :ajax => "1"})
37
+
38
+ result_code = JSON.parse(add_result.body.sub(/^\(?(.*?)\)?$/, '\1'))
39
+
40
+ if result_code["result"] == "success" then
41
+ # added video isn't applied to rss immediately, so add video into list by hand.
42
+ page = @page || get_page(@url)
43
+ @videos << video_page
44
+ return self
45
+ end
46
+ raise ArgError if result_code["result"] == "duperror"
47
+ raise StandardError
48
+ rescue WWW::Mechanize::ResponseCodeError => e
49
+ rc = e.response_code
50
+ puts_info rc
51
+ if rc == "404" || rc == "410"
52
+ @not_found = true
53
+ raise NotFound
54
+ elsif rc == "403"
55
+ raise Forbidden
56
+ else
57
+ raise e
58
+ end
59
+ end
60
+ end
61
+
62
+ private
63
+ def parse(page)
64
+ @rss = RSS::Parser.parse(page.body)
65
+ @title = rss.channel.title.sub(/#{BASE_TITLE2+NICO_MYLIST} /, '')
66
+ @user = rss.channel.managingEditor
67
+ @description = rss.channel.description
68
+
69
+ @videos = rss.items.collect {|i|
70
+ vp = VideoPage.new(@agent, i.link.sub(/^.*watch\/(\w+)$/, '\1'))
71
+ vp.title = i.title
72
+ vp
73
+ }
74
+
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,34 @@
1
+ module Nicovideo
2
+ class Newarrival < Page
3
+ include Enumerable
4
+
5
+ def initialize(agent, pagenum)
6
+ super(agent)
7
+
8
+ @pagenum = pagenum > 10 ? 10 : pagenum
9
+
10
+ params = ["videos"]
11
+ self.register_getter params
12
+
13
+ @url = url()
14
+ end
15
+
16
+ def parse(page)
17
+ result_xpath = page/'//div[@class="cmn_thumb_R"]//p[@class="TXT12"]/a[@class="video"]'
18
+ @videos = result_xpath.inject([]) do |arr, v|
19
+ vp = VideoPage.new(@agent, v.attributes['href'].sub(/watch\/(\w+)$/,'\1'))
20
+ vp.title = v.inner_html
21
+ arr << vp
22
+ end
23
+ end
24
+
25
+ def each
26
+ self.videos.each {|v| yield v }
27
+ end
28
+
29
+ def url
30
+ opt = '?page=' + @pagenum.to_s if @pagenum
31
+ "#{BASE_URL}/newarrival#{opt}"
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,96 @@
1
+ require 'kconv'
2
+
3
+ module Nicovideo
4
+
5
+ # This class doesn't access NICONICO DOUGA when an instance created.
6
+ # At the first time you call this instance method, this accesses NICONICO
7
+ class OpenList < Page
8
+ include Enumerable
9
+
10
+ def initialize agent, video_id, sort='c', order='d', pagenum=1
11
+ super(agent)
12
+ @video_id = video_id
13
+ @pagenum = pagenum
14
+ @sort = sort
15
+ @order = order
16
+ @url = url()
17
+
18
+ params = ["mylists", "total_size", "has_next?", "has_prev?"]
19
+ self.register_getter params
20
+ end
21
+
22
+ attr_reader :pagenum
23
+
24
+ def id() @video_id end
25
+
26
+ # call whenever pagenum changed
27
+ def url
28
+ @url = BASE_URL + '/openlist/' + @video_id + "?page=#{@pagenum}&sort=#{@sort}&order=#{@order}"
29
+ end
30
+
31
+ def each
32
+ self.mylists.each {|ml|
33
+ yield ml
34
+ }
35
+ end
36
+
37
+ def to_a() self.mylists end
38
+
39
+ def pagenum=(pagenum)
40
+ if @pagenum != pagenum
41
+ @pagenum = pagenum
42
+ get_page(self.url, true)
43
+ end
44
+ @pagenum
45
+ end
46
+
47
+ def page=(pagenum)
48
+ self.pagenum = pagenum
49
+ self
50
+ end
51
+
52
+ def next
53
+ self.pagenum = @pagenum + 1
54
+ self
55
+ end
56
+
57
+ def prev
58
+ self.pagenum = @pagenum - 1
59
+ self
60
+ end
61
+
62
+ protected
63
+ def parse(page)
64
+ if page.body =~ /<strong>#{@video_id}<\/strong>を含む公開マイリストはありません。/
65
+ @not_found = true
66
+ raise NotFound
67
+ end
68
+
69
+ @total_size = page.search('//form[@name="sort"]//td[@class="TXT12"]//strong').first.inner_html.sub(/,/,'').to_i
70
+
71
+ @has_next = false
72
+ @has_prev = false
73
+ respages = page/'//div[@class="mb16p4"]//p[@class="TXT12"]//a'
74
+ puts_info respages.size
75
+ respages.each {|r| puts_info r.inner_html }
76
+ if respages.size > 0
77
+ respages.each {|text|
78
+ if text.inner_html =~ /前のページ/
79
+ @has_prev = true
80
+ end
81
+ if text.inner_html =~ /次のページ/
82
+ @has_next = true
83
+ end
84
+ }
85
+ end
86
+
87
+ scanpattern = /<a href=\"#{BASE_URL}\/mylist\/(\d+)\">(.+?)<\/a>/ou
88
+ listrefs = page.parser.to_html.scan(scanpattern)
89
+ @mylists = listrefs.inject([]) {|arr, v| # v[0]: mylist id, v[1]: mylist title
90
+ ml = MyList.new(@agent, v[0])
91
+ ml.title = v[1]
92
+ arr << ml
93
+ }
94
+ end
95
+ end
96
+ end