RubyGems - emergent-nicovideo - Versions diffs - 0.1.7 - Mend

emergent-nicovideo 0.1.7

Files changed (39) hide show

data/ChangeLog +53 -0
data/README.txt +37 -0
data/Rakefile +30 -0
data/config/hoe.rb +73 -0
data/config/requirements.rb +17 -0
data/lib/nicovideo/base.rb +154 -0
data/lib/nicovideo/comments.rb +9 -0
data/lib/nicovideo/mechanize-ext.rb +33 -0
data/lib/nicovideo/mylist.rb +77 -0
data/lib/nicovideo/newarrival.rb +34 -0
data/lib/nicovideo/openlist.rb +96 -0
data/lib/nicovideo/page.rb +92 -0
data/lib/nicovideo/random.rb +38 -0
data/lib/nicovideo/ranking.rb +36 -0
data/lib/nicovideo/search.rb +100 -0
data/lib/nicovideo/tagsearch.rb +13 -0
data/lib/nicovideo/thumbnail.rb +68 -0
data/lib/nicovideo/version.rb +9 -0
data/lib/nicovideo/videopage.rb +113 -0
data/lib/nicovideo.rb +20 -0
data/sample/nv_download.rb +42 -0
data/sample/nv_download2.rb +34 -0
data/sample/nv_mylist.rb +37 -0
data/sample/nv_openlist.rb +35 -0
data/sample/nv_ranking.rb +21 -0
data/test/runner.rb +3 -0
data/test/test_helper.rb +2 -0
data/test/test_login.rb +38 -0
data/test/test_mylist.rb +92 -0
data/test/test_newarrival.rb +31 -0
data/test/test_nicovideo.rb +11 -0
data/test/test_openlist.rb +108 -0
data/test/test_random.rb +31 -0
data/test/test_ranking.rb +31 -0
data/test/test_search.rb +96 -0
data/test/test_tagsearch.rb +65 -0
data/test/test_thumbnail.rb +31 -0
data/test/test_videopage.rb +108 -0
metadata +121 -0

data/ChangeLog ADDED Viewed

@@ -0,0 +1,53 @@
+2008-10-09  version 0.1.7
+	* fixed subtitle of Nicovideo (summer -> autumn)
+	* fixed mechanize-ext for mechanize 0.8.4
+2008-07-23  version 0.1.6
+	* added forgotten file to be added
+2008-07-17  version 0.1.5
+	* fixed subtitle of Nicovideo (SP1 -> summer)
+2008-03-20  version 0.1.4
+	* added function Nicovideo#newarrival and Nicovideo::VideoPage#type
+2008-03-16  version 0.1.3
+	* fixed unable to download with mechanize 0.7.1 or later
+2008-03-15  version 0.1.2
+	* removed outputs for debug
+	* fixed internal search parameters
+	* fixed xpath for sp1
+2008-02-25  version 0.1.1
+	* fixed typo in Nicovideo::Ranking#url
+	* added sample using Nicovideo#ranking method
+	        (ref: http://d.hatena.ne.jp/hayori/20080225/1203911215)
+2008-02-24  version 0.1.0
+	* added functions to search, tagsearch, ranking
+	* added low?() method(ref: http://d.hatena.ne.jp/hayori/20080218/1203312604)
+2008-02-16  version 0.0.6
+	* fixed bug of downloading video via mylist
+2008-02-03  version 0.0.5
+	* added the getter method to Nicovideo::Base#agent(WWW::Mechanize object)
+	* fixed typo of agent's get method
+2008-02-02  version 0.0.4
+	* added functions to get mylist and openlist
+	* added auto login (ref: http://d.hatena.ne.jp/zorio/20080122/1201018583)
+	* added published_at method to VideoPage
+	* did unit tests

data/README.txt ADDED Viewed

@@ -0,0 +1,37 @@
+= Nicovideo
+Nicovideo is scraping NICONICO DOUGA utility for Ruby
+(NICONICO DOUGA is Japanese Video Sharing Service)
+- NICONICO DOUGA : http://www.nicovideo.jp/
+== Installation
+ $ sudo gem install nicovideo
+== Usage
+See sample/nv_download.rb
+== Author
+- Satoshi Yoshikawa / emergent ( http://d.hatena.ne.jp/emergent )
+- thanks for many bloggers who wrote scraping scripts
+== License
+- 2-clause BSD License

data/Rakefile ADDED Viewed

@@ -0,0 +1,30 @@
+class NilClass
+  def empty?
+    true
+  end
+end
+require 'config/requirements'
+require 'config/hoe' # setup Hoe + all gem configuration
+require 'rake/contrib/rubyforgepublisher'
+Dir['tasks/**/*.rake'].each { |rake| load rake }
+Rake::RDocTask.new do |rdoc|
+  rdoc.rdoc_dir = 'html'
+  rdoc.options += RDOC_OPTS
+  rdoc.template = "#{ENV['template']}.rb" if ENV['template']
+  if ENV['DOC_FILES']
+    rdoc.rdoc_files.include(ENV['DOC_FILES'].split(/,\s*/))
+  else
+    rdoc.rdoc_files.include('README.txt', 'ChangeLog')
+    rdoc.rdoc_files.include('lib/**/*.rb')
+  end
+end
+desc "Publish to RubyForge"
+task :rubyforge => [:rdoc, :package] do
+  Rake::RubyForgePublisher.new(RUBYFORGE_PROJECT, 'emergent').upload
+end

data/config/hoe.rb ADDED Viewed

@@ -0,0 +1,73 @@
+require 'nicovideo/version'
+AUTHOR = 'emergent'  # can also be an array of Authors
+EMAIL = "emergent22 (at) livedoor.com"
+DESCRIPTION = "utils for nicovideo"
+GEM_NAME = 'nicovideo' # what ppl will type to install your gem
+RUBYFORGE_PROJECT = 'nicovideo' # The unix name for your project
+HOMEPATH = "http://#{RUBYFORGE_PROJECT}.rubyforge.org"
+DOWNLOAD_PATH = "http://rubyforge.org/projects/#{RUBYFORGE_PROJECT}"
+@config_file = "~/.rubyforge/user-config.yml"
+@config = nil
+RUBYFORGE_USERNAME = "unknown"
+def rubyforge_username
+  unless @config
+    begin
+      @config = YAML.load(File.read(File.expand_path(@config_file)))
+    rescue
+      puts <<-EOS
+ERROR: No rubyforge config file found: #{@config_file}
+Run 'rubyforge setup' to prepare your env for access to Rubyforge
+ - See http://newgem.rubyforge.org/rubyforge.html for more details
+      EOS
+      exit
+    end
+  end
+  RUBYFORGE_USERNAME.replace @config["username"]
+end
+REV = nil
+# UNCOMMENT IF REQUIRED:
+# REV = `svn info`.each {|line| if line =~ /^Revision:/ then k,v = line.split(': '); break v.chomp; else next; end} rescue nil
+VERS = Nicovideo::VERSION::STRING + (REV ? ".#{REV}" : "")
+RDOC_OPTS = ['--quiet', '--title', 'nicovideo documentation',
+    "--opname", "index.html",
+    "--line-numbers",
+    "--main", "README",
+    "--inline-source"]
+class Hoe
+  def extra_deps
+    @extra_deps.reject! { |x| Array(x).first == 'hoe' }
+    @extra_deps
+  end
+end
+# Generate all the Rake tasks
+# Run 'rake -T' to see list of generated tasks (from gem root directory)
+hoe = Hoe.new(GEM_NAME, VERS) do |p|
+  p.author = AUTHOR
+  p.description = DESCRIPTION
+  p.email = EMAIL
+  p.summary = DESCRIPTION
+  p.url = HOMEPATH
+  p.rubyforge_name = RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
+  p.test_globs = ["test/**/test_*.rb"]
+  p.clean_globs |= ['**/.*.sw?', '*.gem', '.config', '**/.DS_Store']  #An array of file patterns to delete on clean.
+  # == Optional
+  #p.changes = p.paragraphs_of("History.txt", 0..1).join("\\n\\n")
+  p.changes = p.paragraphs_of("ChangeLog", 0..1).join("\\n\\n")
+  #p.extra_deps = []     # An array of rubygem dependencies [name, version], e.g. [ ['active_support', '>= 1.3.1'] ]
+  p.extra_deps = [
+                  ['mechanize', '>=0.6.0']
+                 ]
+  #p.spec_extras = {}    # A hash of extra values to set in the gemspec.
+end
+#CHANGES = hoe.paragraphs_of('History.txt', 0..1).join("\\n\\n")
+CHANGES = hoe.paragraphs_of('ChangeLog', 0..1).join("\\n\\n")
+PATH    = (RUBYFORGE_PROJECT == GEM_NAME) ? RUBYFORGE_PROJECT : "#{RUBYFORGE_PROJECT}/#{GEM_NAME}"
+hoe.remote_rdoc_dir = File.join(PATH.gsub(/^#{RUBYFORGE_PROJECT}\/?/,''), 'rdoc')

data/config/requirements.rb ADDED Viewed

@@ -0,0 +1,17 @@
+require 'fileutils'
+include FileUtils
+require 'rubygems'
+%w[rake hoe newgem rubigen].each do |req_gem|
+  begin
+    require req_gem
+  rescue LoadError
+    puts "This Rakefile requires the '#{req_gem}' RubyGem."
+    puts "Installation: gem install #{req_gem} -y"
+    exit
+  end
+end
+$:.unshift(File.join(File.dirname(__FILE__), %w[.. lib]))
+require 'nicovideo'

data/lib/nicovideo/base.rb ADDED Viewed

@@ -0,0 +1,154 @@
+module Nicovideo
+  class ArgError         < StandardError ; end
+  class LoginError       < StandardError ; end
+  class NotFound         < StandardError ; end
+  class Forbidden        < StandardError ; end
+  class Base
+    def initialize mail=nil, password=nil, auto_login=true
+      @mail = mail
+      @password = password
+      @agent = WWW::Mechanize.new()
+      agent_init(auto_login)
+      @agent.set_account(@mail, @password)
+      # for parameters current video
+      @vp = nil
+      self
+    end
+    attr_reader :agent
+    def agent_init auto_login=true
+      @agent.instance_eval do
+        alias raw_get get
+        alias raw_post post
+        def set_account(mail, password) @mail=mail; @password=password end
+        def authenticated?(page)
+          page.header['x-niconico-authflag'] != '0'
+        end
+        def login
+          raise ArgError unless (@mail && @password)
+          account = {'mail' => @mail, 'password' => @password }
+          res = raw_post('https://secure.nicovideo.jp/secure/login?site=niconico', account)
+          raise LoginError unless authenticated?(res)
+        end
+      end
+      if auto_login
+        @agent.instance_eval do
+          @wait_time = 3
+          def get(*args) try(:raw_get, *args) end
+          def post(*args) try(:raw_post, *args) end
+          def try(name, *args)
+            page = method(name).call(*args)
+            unless authenticated?(page)
+              self.login
+              sleep @wait_time
+              page = method(name).call(*args)
+              raise LoginError unless authenticated?(page)
+            end
+            page
+          end
+        end
+      end
+    end
+    def login mail=nil, password=nil
+      @mail     ||= mail
+      @password ||= password
+      @agent.set_account(@mail, @password)
+      @agent.login
+      self
+    end
+    def watch(video_id)
+      videopage = get_videopage(video_id)
+      @vp = videopage
+      if block_given?
+        yield videopage
+      end
+      videopage
+    end
+    def get_tags(video_id)
+      get_videopage(video_id).tags
+    end
+    def get_title(video_id)
+      get_videopage(video_id).title
+    end
+    def get_video(video_id)
+      self.get_flv(video_id)
+    end
+    def get_flv(video_id)
+      get_videopage(video_id).flv
+    end
+    def get_comments video_id, num=500
+      get_videopage(video_id).comments(num)
+    end
+    def mylist(mylist_id)
+      MyList.new(@agent, mylist_id)
+    end
+    def openlist(video_id)
+      OpenList.new(@agent, video_id)
+    end
+    def random()
+      Random.new(@agent)
+    end
+    def newarrival(pagenum=1)
+      Newarrival.new(@agent,pagenum)
+    end
+    # type : 'mylist', 'view' or 'res'
+    # span : 'daily', 'newarrival', 'weekly', 'monthly', 'total'
+    # category : 'all', 'music' ... and more
+    def ranking(type='mylist', span='daily', category='all', pagenum=nil)
+      Ranking.new(@agent, type, span, category, pagenum).to_a
+    end
+    # keyword : search keyword
+    # sort    : nil -> published date
+    #           'v' -> playback times
+    #           'n' -> commented date
+    #           'r' -> comment number
+    #           'm' -> mylist number
+    def search(keyword, sort=nil, order=nil, pagenum=1)
+      Search.new(@agent, keyword, sort, order, pagenum)
+    end
+    def tagsearch(keyword, sort=nil, order=nil, pagenum=1)
+      TagSearch.new(@agent, keyword, sort, order, pagenum)
+    end
+    private
+    def get_videopage(video_id)
+      if @vp.nil? || video_id != @vp.video_id
+        @vp = VideoPage.new(@agent, video_id)
+      end
+      @vp
+    end
+  end
+  def Nicovideo.new(mail, password)
+    Base.new(mail, password)
+  end
+  def Nicovideo.login(mail, password)
+    Base.new(mail, password).login
+  end
+end

data/lib/nicovideo/comments.rb ADDED Viewed

@@ -0,0 +1,9 @@
+class Nicovideo::Comments
+  def initialize video_id, xml
+    @video_id = video_id
+    @xml = xml
+  end
+  def to_s() @xml.to_s end
+  def to_xml() @xml.to_s end
+end

data/lib/nicovideo/mechanize-ext.rb ADDED Viewed

@@ -0,0 +1,33 @@
+# file: mechanize-ext.rb
+require 'mechanize'
+module WWW
+  class Mechanize
+    self.class_eval {
+      def post_data(url, data='', enctype=nil)
+        cur_page = current_page || Page.new( nil, {'content-type'=>'text/html'})
+        request_data = data
+        log.debug("query: #{ request_data.inspect }") if log
+        # fetch the page
+        page = fetch_page(  :uri      => url,
+                            :referer  => cur_page,
+                            :verb     => :post,
+                            :params   => [request_data],
+                            :headers  => {
+                              'Content-Length'  => request_data.size.to_s,
+                            })
+        add_to_history(page)
+        page
+      end
+      class File
+        def path
+          return @uri.path
+        end
+      end
+    }
+  end
+end

data/lib/nicovideo/mylist.rb ADDED Viewed

@@ -0,0 +1,77 @@
+require 'kconv'
+require 'rss/2.0'
+require 'rss/1.0'
+require 'json'
+module Nicovideo
+  class MyList < Page
+    NICO_MYLIST = 'マイリスト'
+    def initialize agent, mylist_id
+      super(agent)
+      @mylist_id = mylist_id
+      @raw_url = BASE_URL + '/mylist/' + @mylist_id
+      @url     = BASE_URL + '/mylist/' + @mylist_id + '?rss=2.0'
+      params = ["title", "user", "description", "videos", "rss"]
+      self.register_getter params
+    end
+    attr_reader :myliset_id
+    def id()  @mylist_id end
+    def url() @raw_url   end
+    def add(video_id)
+      video_page = VideoPage.new @agent, video_id
+      begin
+        add_result = @agent.post(video_page.url, {
+            :mylist => "add",
+            :mylistgroup_name => "",
+            :csrf_token => video_page.csrf_token,
+            :group_id => @mylist_id,
+            :ajax => "1"})
+        result_code = JSON.parse(add_result.body.sub(/^\(?(.*?)\)?$/, '\1'))
+        if result_code["result"] == "success" then
+          # added video isn't applied to rss immediately, so add video into list by hand.
+          page = @page || get_page(@url)
+          @videos << video_page
+          return self
+        end
+        raise ArgError if result_code["result"] == "duperror"
+        raise StandardError
+      rescue WWW::Mechanize::ResponseCodeError => e
+        rc = e.response_code
+        puts_info rc
+        if rc == "404" || rc == "410"
+          @not_found = true
+          raise NotFound
+        elsif rc == "403"
+          raise Forbidden
+        else
+          raise e
+        end
+      end
+    end
+    private
+    def parse(page)
+      @rss = RSS::Parser.parse(page.body)
+      @title = rss.channel.title.sub(/#{BASE_TITLE2+NICO_MYLIST} /, '')
+      @user  = rss.channel.managingEditor
+      @description = rss.channel.description
+      @videos = rss.items.collect {|i|
+        vp = VideoPage.new(@agent, i.link.sub(/^.*watch\/(\w+)$/, '\1'))
+        vp.title = i.title
+        vp
+      }
+    end
+  end
+end

data/lib/nicovideo/newarrival.rb ADDED Viewed

@@ -0,0 +1,34 @@
+module Nicovideo
+  class Newarrival < Page
+    include Enumerable
+    def initialize(agent, pagenum)
+      super(agent)
+      @pagenum = pagenum > 10 ? 10 : pagenum
+      params = ["videos"]
+      self.register_getter params
+      @url = url()
+    end
+    def parse(page)
+      result_xpath = page/'//div[@class="cmn_thumb_R"]//p[@class="TXT12"]/a[@class="video"]'
+      @videos = result_xpath.inject([]) do |arr, v|
+        vp = VideoPage.new(@agent, v.attributes['href'].sub(/watch\/(\w+)$/,'\1'))
+        vp.title = v.inner_html
+        arr << vp
+      end
+    end
+    def each
+      self.videos.each {|v| yield v }
+    end
+    def url
+      opt = '?page=' + @pagenum.to_s if @pagenum
+      "#{BASE_URL}/newarrival#{opt}"
+    end
+  end
+end

data/lib/nicovideo/openlist.rb ADDED Viewed

@@ -0,0 +1,96 @@
+require 'kconv'
+module Nicovideo
+  # This class doesn't access NICONICO DOUGA when an instance created.
+  # At the first time you call this instance method, this accesses NICONICO
+  class OpenList < Page
+    include Enumerable
+    def initialize agent, video_id, sort='c', order='d', pagenum=1
+      super(agent)
+      @video_id = video_id
+      @pagenum  = pagenum
+      @sort     = sort
+      @order    = order
+      @url      = url()
+      params = ["mylists", "total_size", "has_next?", "has_prev?"]
+      self.register_getter params
+    end
+    attr_reader :pagenum
+    def id() @video_id end
+    # call whenever pagenum changed
+    def url
+      @url = BASE_URL + '/openlist/' + @video_id + "?page=#{@pagenum}&sort=#{@sort}&order=#{@order}"
+    end
+    def each
+      self.mylists.each {|ml|
+        yield ml
+      }
+    end
+    def to_a() self.mylists end
+    def pagenum=(pagenum)
+      if @pagenum != pagenum
+        @pagenum = pagenum
+        get_page(self.url, true)
+      end
+      @pagenum
+    end
+    def page=(pagenum)
+      self.pagenum = pagenum
+      self
+    end
+    def next
+      self.pagenum = @pagenum + 1
+      self
+    end
+    def prev
+      self.pagenum = @pagenum - 1
+      self
+    end
+    protected
+    def parse(page)
+      if page.body =~ /<strong>#{@video_id}<\/strong>を含む公開マイリストはありません。/
+        @not_found = true
+        raise NotFound
+      end
+      @total_size = page.search('//form[@name="sort"]//td[@class="TXT12"]//strong').first.inner_html.sub(/,/,'').to_i
+      @has_next = false
+      @has_prev = false
+      respages = page/'//div[@class="mb16p4"]//p[@class="TXT12"]//a'
+      puts_info respages.size
+      respages.each {|r| puts_info r.inner_html }
+      if respages.size > 0
+        respages.each {|text|
+          if text.inner_html =~ /前のページ/
+            @has_prev = true
+          end
+          if text.inner_html =~ /次のページ/
+            @has_next = true
+          end
+        }
+      end
+      scanpattern = /<a href=\"#{BASE_URL}\/mylist\/(\d+)\">(.+?)<\/a>/ou
+      listrefs = page.parser.to_html.scan(scanpattern)
+      @mylists = listrefs.inject([]) {|arr, v| # v[0]: mylist id, v[1]: mylist title
+        ml = MyList.new(@agent, v[0])
+        ml.title = v[1]
+        arr << ml
+      }
+    end
+  end
+end