RubyGems - grab_epg - Versions diffs - 0.2.3 → 0.2.4 - Mend

grab_epg 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml CHANGED Viewed

@@ -1,15 +1,15 @@
 ---
 !binary "U0hBMQ==":
   metadata.gz: !binary |-
-    MTI5MTMwMTY1NTRmZjk5NGIwZGM4MTk3NTljNGFjMzAwOWY4NTdiNA==
+    ZWMyZTQzOThkZWI3YTUxYjIwZGU5ZGRkZmQ2ZTQzNTBjYjAxYWE5ZA==
   data.tar.gz: !binary |-
-    NzNjZTc3YzY1OWZkYTZjOGUzNTVjNzVmZjgzYjg3NjQyZWUzNGFmMg==
+    YjM0MDViYzQzZGQ3OWNiYjk3ZmVjMDA0Mzk2OTA0M2UzNjdlNTdkNg==
 !binary "U0hBNTEy":
   metadata.gz: !binary |-
-    ZGMwNThmMzZlY2FmZmI1ZmQzNjY5ODdkYTI4MTk5MWI2NWZiODBlZjQ1YzNk
-    MzllOGE1YmNkZjRiNjc3MDlhY2FjZjMyNjFiNTcxYjFlZTlmYzgwNmVlMmQx
-    NzUxODIwMGE1MjgxZWM0NWY3ZDlmNWE0YmYyN2U0NTY1YjU3NmQ=
+    YjE5ZTgxMzg5OTIwYTNjYTNlMDkzYWVlMWMwODczZDk3ZTZkNzIzYmQzZTI0
+    YWUwYTNhOWVjNjQyNTVhNjAwODYxY2RhOTc4YmM4YWI2ZmMzNzI3ZjRhMmZj
+    MTg2MjJmOWIzNmRhODU1N2MwNDAwZmJhYmQwZTk2ZDU3MTU1YzU=
   data.tar.gz: !binary |-
-    MTA3MmE5MGZkMzU5YzliYjljNTQ1NzljNWViYTQ5YWU5ZmNlZGQ5OWJmZTUz
-    YmZiN2QyNjUzOTk0OGQwMzM0NmZjOTEwOTI2MzJkZjAxMDg5YzdlNzUxNjM3
-    Y2VkNmQzMGUyMjQ0Nzc5MTZkMGE5NjY4Y2IwZTY2ZGI5Y2MyOTA=
+    MGM5MmEzZWU0MjcwZTgyNzliOTRkYjhkOGQ2Njk4OTZlOTI1ZjFkMjI5MGJi
+    MzE4Mjg0MmJjNmE2ZmJkY2YwNTQzM2QzZDcwNDQxNzM4MWE4NTI4ZjU2ZmMz
+    YzMwZTcyMTA1NjJiMTlhY2NhZGE0MTU4NzJiMzBkZTVjNWVkZGM=

data/.grabepg.gemspec CHANGED Viewed

@@ -10,6 +10,6 @@ Gem::Specification.new do |gem|
   gem.files         = `git ls-files`.split($\)
   gem.name          = "grab_epg"
   gem.require_paths = ["lib"]
-  gem.version       = "0.2.3"
+  gem.version       = "0.2.4"
   gem.homepage      = "https://github.com/hahazql/grab_epg"
 end

data/lib/debug.rb CHANGED Viewed

@@ -3,10 +3,12 @@
 require 'nokogiri'
 require 'open-uri'
 require File.expand_path("../test/test_grab_tvsou.rb", __FILE__)
+require File.expand_path("../test/test_grab_tvmao.rb", __FILE__)
 #require 'test/test_grab_tvsou'
 class Debug
   # To change this template use File | Settings | File Templates.
   #proxylist = ["219.234.82.84:24809", "219.234.82.84:17130", "219.234.82.84:23684", "219.234.82.84:18253", "219.234.82.84:33987", "219.234.82.84:17183", "219.234.82.84:13243", "219.234.82.84:16158", "219.234.82.84:14826", "219.234.82.84:8489", "219.234.82.84:22222", "219.234.82.84:6370", "219.234.82.84:7571", "219.234.82.84:33944", "219.234.82.84:9743", "219.234.82.84:8089", "219.234.82.84:20991", "219.234.82.84:34032", "219.234.82.84:9415", "219.234.82.84:26149", "219.234.82.84:11095", "219.234.82.84:21724", "219.234.82.84:9177", "219.234.82.84:34034", "219.234.82.84:17945", "219.234.82.85:32229", "219.234.82.85:28341", "219.234.82.85:36314", "219.234.82.85:30605", "219.234.82.85:23684", "219.234.82.85:34015", "219.234.82.85:33919", "219.234.82.85:30639", "219.234.82.85:33965", "219.234.82.85:37299", "219.234.82.85:20747", "219.234.82.86:6666", "219.234.82.86:34106", "219.234.82.86:25301", "219.234.82.86:32896", "219.234.82.86:23034", "219.234.82.86:22685", "219.234.82.86:13078", "219.234.82.86:38770", "219.234.82.86:28402", "219.234.82.86:18887", "219.234.82.86:6588", "219.234.82.86:7292", "219.234.82.86:24268", "219.234.82.86:16472", "219.234.82.86:32597", "219.234.82.86:31122", "219.234.82.88:8817", "219.234.82.88:8160", "219.234.82.88:9239", "219.234.82.88:6133", "114.141.162.53:8080", "123.125.116.243:17656", "123.125.116.241:29156", "123.125.116.243:6938", "219.234.82.88:29484", "219.234.82.88:8084", "219.234.82.88:32229", "219.234.82.88:22758", "219.234.82.88:5616", "124.225.52.14:8080", "219.234.82.88:30028", "219.234.82.88:23685", "219.234.82.88:29037", "219.234.82.88:8755"]
- p TestGrabTvsou.start
+# p TestGrabTvsou.new.get_channel_logo
+  p TestGrabTvmao.new.test_get_show_type_by_batch
 end

data/lib/grab_tvmao.rb CHANGED Viewed

@@ -3,7 +3,11 @@
 require 'nokogiri'
 require 'open-uri'
-module GrabTvmao
+require File.expand_path("../grabepg/grab_base.rb", __FILE__)
+require File.expand_path("../grabepg/grab_tvsou.rb", __FILE__)
+module Grabepg
+  class GrabTvmao
   # To change this template use File | Settings | File Templates.
@@ -22,10 +26,115 @@ module GrabTvmao
+  def initialize
+    @grabbase = GrabBase.new
+  end
+  #批量从tvmao获取节目类型
+  #channel 节目表属于的屏道
+  #url 节目表获取的网络地址
+  #date 日期
+  #schedule 需要批量修改的时间表
+  #proxylist 代理列表
+  def get_show_type_by_batch(channel,url,date,schedule,proxylist)
+    _schedule =  {}
+    schedule.each do |s|
+     time = s["schedule_start"].gsub(":","").to_i
+     _schedule.merge!(time=>s)
+    end
+    url = get_show_type_url(url,date)
+    schedules = get_schedulelist_atday(channel,url,proxylist)
+    type = nil
+    schedules.each do |schedule|
+      schedule_time_num = schedule["schedule_start"].gsub(":","").to_i
+      if _schedule.has_key?(schedule_time_num)
+        _schedule[schedule_time_num]["type"]=_schedule[schedule_time_num]["type"]|schedule["type"]
+        p "*****************************************************************************************"
+        p "Schedule: #{_schedule[schedule_time_num]}"
+        p "schedule_logo_1: #{_schedule[schedule_time_num]["schedule_logo"]}"
+        p "schedule_logo_2: #{_schedule[schedule_time_num][:schedule_logo]}"
+        if _schedule[schedule_time_num]["schedule_logo"]==""
+          unless schedule["img"]==""
+            _schedule[schedule_time_num]["schedule_logo"]=schedule["img"]
+          end
+        end
+      end
+    end
+    ret = []
+    _schedule.each do |key,value|
+      ret << value
+    end
+    ret
+  end
+  #批量从tvmao获取节目类型
+  #channel 节目表属于的屏道
+  #url 节目表获取的网络地址
+  #date 日期
+  #time 节目开始时间
+  #proxylist 代理列表
+  def get_show_type(channel,url,date,time,proxylist)
+    url = get_show_type_url(url,date)
+    schedules = get_schedulelist_atday(channel,url,proxylist)
+    _time_num = time.gsub(":","").to_i
+    type = nil
+    schedules.each do |schedule|
+      schedule_time_num = schedule["schedule_start"].gsub(":","").to_i
+      if _time_num==schedule_time_num
+        type = schedule["type"]
+      end
+    end
+    if type
+      return type
+    else
+      return []
+    end
+  end
+  def get_show_type_url(url,date)
+    whatday = 0
+    _date = date.split("(")[0]
+    case _date
+      when "星期一"
+        whatday=1
+      when "星期二"
+        whatday=2
+      when "星期三"
+        whatday=3
+      when "星期四"
+        whatday=4
+      when "星期五"
+        whatday=5
+      when "星期六"
+        whatday=6
+      when "星期日"
+        whatday=7
+    end
+    get_week_url = lambda {|url,whatday|
+      _url = "http://www.tvmao.com"
+      urls = []
+      _urls = url.split("-")
+      0.upto(1).each do |i|
+        _url = _url+"#{_urls[i]}"+"-"
+      end
+        url = _url+"w#{whatday}.html"
+      return url
+    }
+    return get_week_url.call(url,whatday)
+  end
 #将星期的wday获取值转化为中文名
 #conversion wady to chinese
-  def self.conversion_what_day(whatday)
+  def conversion_what_day(whatday)
     ret = "星期"
     case whatday.to_i
       when 1
@@ -47,7 +156,7 @@ module GrabTvmao
   end
   #如果时间为1~9的一位则为其在数字前加0补齐二位
-  def self.dispose_time(num)
+  def dispose_time(num)
     num = num.to_s
     if num.length < 2
       num = "0"+num
@@ -56,7 +165,7 @@ module GrabTvmao
   end
   #转化当前时间的格式
-  def self.get_week_date_time(time)
+  def get_week_date_time(time)
     month = time.month
     day = time.day
     whatday = time.wday
@@ -65,26 +174,26 @@ module GrabTvmao
   end
   #前几天需要减去的num
-  def self.del_day_num(day_num)
+  def del_day_num(day_num)
     ret = day_num*60*60*24
     ret
   end
   #获取距离当前多少天的之前的日期
-  def self.get_time_day_prior(num)
+  def get_time_day_prior(num)
     time = Time.now - del_day_num(num)
     ret = get_week_date_time(time)
     ret
   end
   #前面一周要删除的日期的列表
-  def self.del_time_list
+  def del_time_list
     ret = []
     time = Time.now
     wday = time.wday
     if(wday==1)
       for i in 0..7
-        ret<<self.get_time_day_prior(i)
+        ret<<get_time_day_prior(i)
       end
     end
     ret
@@ -94,7 +203,7 @@ module GrabTvmao
   #调用此方法的例子
-  def self.start
+  def start
     #作用是获取俩个字符串的相似度
     #get str1 and str2 similarity
     get_similarity_string = lambda { |str1,str2|
@@ -201,14 +310,14 @@ module GrabTvmao
       end
     end
-  def self.img_down_path
+  def img_down_path
     @img_down_path
   end
   #获取网站的频道表
   #img_path 图片存放路径
-  def self.getchannels(img_dir_path)
+  def getchannels(img_dir_path)
     @channel = []
     @site=DEFAULT_SITE
     @proxyindex = 0
@@ -252,39 +361,76 @@ module GrabTvmao
     {"channel_info"=>channel_info,"channel_urls"=>channel_urls}
   end
-  #使用代理获取url的html的doc值
-  def self.get_doc_with_proxy(proxylist,url)
-    unless @proxyindex
-      @proxyindex = 0
-    end
-    @proxyindex=@proxyindex%proxylist.size
-    if(proxylist[@proxyindex])
-      proxy = proxylist[@proxyindex]
-    else
-      proxy = proxylist[@proxyindex+1]
-    end
-    begin
-      doc = Nokogiri::HTML(open(url,:proxy=>"#{proxy}")) unless proxy.nil?||proxy.empty?
-      doc = Nokogiri::HTML(open(url)) if proxy.nil?||proxy.empty?
-      @no_firest = 0
-    rescue => err
+    def err_doc_proxy(proxy,proxylist,url="",err="")
+      if proxy.empty?||proxy.nil?
+        proxylist.delete_at[@proxyindex]
+      end
       unless @no_firest
         @no_firest = 0
       end
       @no_firest += 1
-      p "*************************Proxy:#{proxy}, url:#{url} Error:#{err.to_s}"
+      p "*************************Proxy:#{proxy}, url:#{url} Error:#{err}"
       #proxylist.delete(proxy)    #删除出错的代理  但如果是此网页错误则会引起BUG待修复
-      get_doc_with_proxy(proxylist,url) if @no_firest<4
-      raise RuntimeError,"Error: #{err.to_s}" unless @no_firest<4
+      @proxyindex += 1
+      @proxyindex=@proxyindex%@size
+      doc=get_doc_with_proxy(proxylist,url) if @no_firest<4
+      unless @no_firest<4
+        @no_firest=0
+        raise RuntimeError,"Error: #{err}"
+      end
+      doc
+    end
+    #使用代理获取url的html的doc值
+    def get_doc_with_proxy(proxylist,url)
+      unless proxylist.nil?||proxylist.empty?
+        unless @proxyindex
+          @proxyindex = 0
+        end
+        @size = proxylist.size
+        @proxyindex=@proxyindex%proxylist.size
+        if(proxylist[@proxyindex])
+          proxy = proxylist[@proxyindex]
+        else
+          proxy = proxylist[@proxyindex+1]
+        end
+        begin
+          doc = Nokogiri::HTML(open(url,:proxy=>"#{proxy}").read) unless proxy.nil?||proxy.empty?
+          if doc.nil?
+            p "DOC is nil"
+            doc=err_doc_proxy(proxy,proxylist,url,"doc nil")
+            @no_firest=0
+          end
+          @no_firest = 0
+        rescue => err
+          p "IN Rescue"
+          doc=err_doc_proxy(proxy,proxylist,url,err.to_s)
+          @no_firest=0
+          p "Get DOC"
+          @proxyindex += 1
+          @proxyindex=@proxyindex%@size
+          return doc
+        end
+        @proxyindex += 1
+        @proxyindex=@proxyindex%@size
+      else
+        begin
+          doc = Nokogiri::HTML(open(url).read) if proxy.nil?||proxy.empty?
+        rescue => err
+          p "Error : Proxy:#{proxy}, url:#{url}"
+          raise RuntimeError,"Error: #{err.to_s} Method:get_doc_with_proxy"
+        end
+      end
+      doc
     end
-    @proxyindex += 1
-    doc
-  end
   #获取某天的节目表
-  def self.get_schedulelist_atday(channel,url,proxylist)
+  def get_schedulelist_atday(channel,url,proxylist)
     p "Grab: #{url}"
     doc = get_doc_with_proxy(proxylist,url)
     show_type = []
@@ -322,6 +468,7 @@ module GrabTvmao
         schedule = schedule.content.split(" ")[1]
         show_name = ""
         unless schedule_herf.nil?||schedule_herf.empty?
+          p "Show_infomation:#{schedule_herf} Time:#{time}"
           show_infomation=get_show_infomation(proxylist,schedule_herf)
           show_type=show_infomation["type"]
           show_name = show_infomation["name"]
@@ -337,7 +484,7 @@ module GrabTvmao
   #获取制定时间和长度url
   #start_time 为int型 开始时间和今天的差值 正数代表之后的第几天 负数代表之前的第几天
   #day_num 为int型 代表抓取的时间从开始时间计算的多少天
-  def self.get_assign_date_url(url,start_time,day_num)
+  def get_assign_date_url(url,start_time,day_num)
     site="http://www.tvmao.com"
     if(@site)
       site=@site
@@ -373,7 +520,7 @@ module GrabTvmao
   #获取指定时间段的节目表
-  def self.getScheduleAssignDate(channel,herf,proxylist,start_num,day_num=0,img_dir_down_path=@img_down_dir_path)
+  def getScheduleAssignDate(channel,herf,proxylist,start_num,day_num=0,img_dir_down_path=@img_down_dir_path)
     begin
       day_num = 1 if day_num<1
     rescue
@@ -394,7 +541,7 @@ module GrabTvmao
     channel_schedule = {}
     get_assign_date_url(herf,start_num,day_num).each do |url|
       @date = ""
-      schedule_list = self.get_schedulelist_atday(channel,url,proxylist)
+      schedule_list = get_schedulelist_atday(channel,url,proxylist)
       channel_schedule.merge!({@date=>schedule_list}) unless @date.empty?
     end
     @img_down_file.close
@@ -407,7 +554,7 @@ module GrabTvmao
   #因原已调用所以保留
   #获取一周节目表
-  def self.getschedule(channel,herf,proxylist,day_num=7,img_dir_down_path=@img_down_dir_path)
+  def getschedule(channel,herf,proxylist,day_num=7,img_dir_down_path=@img_down_dir_path)
     p "Day Num is #{day_num}"
     begin
       day_num = 1 if day_num<1
@@ -442,7 +589,7 @@ module GrabTvmao
     channel_schedule = {}
     get_week_url.call(herf,day_num).each do |url|
       @date = ""
-      schedule_list = self.get_schedulelist_atday(channel,url,proxylist)
+      schedule_list = get_schedulelist_atday(channel,url,proxylist)
       channel_schedule.merge!({@date=>schedule_list}) unless @date.empty?
     end
     @img_down_file.close
@@ -451,16 +598,14 @@ module GrabTvmao
   #获取节目详细信息
-  def self.get_show_infomation(proxy_list,schedule_herf)
+  def get_show_infomation(proxy_list,schedule_herf)
     begin
     @proxyindex = 0
     unless @site
       @site = "http://www.tvmao.com"
     end
     schedule_herf = @site + schedule_herf
-    doc=get_doc_with_proxy(proxy_list,schedule_herf)
-    #title = doc.css("a[herf='#{schedule_herf}+/detail']")[0]['title']
-   # p "title: %s" % title
+    doc = get_doc_with_proxy(proxy_list,schedule_herf)
     type = []
     name = doc.css('span[itemprop="name"]')[0].content
@@ -479,39 +624,42 @@ module GrabTvmao
     end
     url = "#{schedule_herf}/detail"
     doc = get_doc_with_proxy(proxy_list,url)
-    doc.css('span[itemprop="genre"]').each do |_type|
-      type << _type.content
-    end
-    doc.css('a[itemprop="genre"]').each do |_type|
-      type<<_type.content
+    if doc
+      doc.css('span[itemprop="genre"]').each do |_type|
+        type << _type.content
+      end
     end
     type.uniq!
-    @img_down_file.puts("#{name}:#{schedule_img_down_path}")
+    unless @show_schedule
+      @show_schedule={}
+    end
     @show_schedule.merge!(name=>get_show_schedule(proxy_list,schedule_herf)) unless @show_schedule.has_key?(name)
     {"type"=>type,"name"=>name,"img"=>schedule_img_down_path}
-    rescue => e
-      p "Error In get_show_infomation msg : #{e.to_s}"
+    #rescue => e
+    #  p "Error In get_show_infomation msg : #{e.to_s}"
     end
   end
   #获取节目的时间表
-  def self.get_show_schedule(proxylist,herf)
+  def get_show_schedule(proxylist,herf)
     url = herf + "/playingtime"
     doc = get_doc_with_proxy(proxylist,url)
     i = 0
     schedule = []
-    doc.css('div[id="epg"]')[0].css("div[class='c1 col']").each do |epg|
-      unless(i==0)
-        time = epg.css('div[class="f1 fld"]')[0].content
-        channel_name = epg.css('div[class="f2 fld"]')[0].content
-        show_name = epg.css('div[class="f3 fld"]')[0].content
-        times = time.split(" ")
-        week = times[0]
-        date = times[1]
-        _time = times[2]
-        schedule << {"week"=>week,"date"=>date,"time"=>_time,"channel_name"=>channel_name,"show_name"=>show_name}
+    if  doc.css('div[id="epg"]')[0]
+      doc.css('div[id="epg"]')[0].css("div[class='c1 col']").each do |epg|
+        unless(i==0)
+          time = epg.css('div[class="f1 fld"]')[0].content
+          channel_name = epg.css('div[class="f2 fld"]')[0].content
+          show_name = epg.css('div[class="f3 fld"]')[0].content
+          times = time.split(" ")
+          week = times[0]
+          date = times[1]
+          _time = times[2]
+          schedule << {"week"=>week,"date"=>date,"time"=>_time,"channel_name"=>channel_name,"show_name"=>show_name}
+        end
+        i += 1
       end
-      i += 1
     end
     schedule
   end
@@ -521,7 +669,7 @@ module GrabTvmao
   #获取指定访问速度的代理服务器
   #time为最慢速度的时间 int型 代表秒
-  def self.get_topfast_list(use_time)
+  def get_topfast_list(use_time)
     fast_list = []
     time_use = 0
     ips_ports = get_proxy_list()
@@ -555,7 +703,7 @@ module GrabTvmao
   end
   #获取代理列表
-  def self.get_proxy_list()
+  def get_proxy_list()
     list = gg('http://www.proxycn.cn/html_proxy/30fastproxy-1.html')
     if list.count ==0
       list = gg('http://www.proxycn.cn/html_proxy/http-1.html')
@@ -575,7 +723,7 @@ module GrabTvmao
     ips_ports
   end
-  def self.gg(url)
+  def gg(url)
     regex_list = /<TD class="list">.*<\/TD>/
     href =URI.parse(url)
     contxt = ""
@@ -588,5 +736,5 @@ module GrabTvmao
   def save_img
   end
+ end
 end

data/lib/grabepg/grab_base.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 #encoding:utf-8
 require 'nokogiri'
+require 'iconv'
 module Grabepg
@@ -22,13 +23,48 @@ module Grabepg
           ret += "五"
         when 6
           ret += "六"
-        when 7
-          ret += "七"
+        when 0
+          ret += "日"
       end
       ret
     end
+    def self.proxy_list(path)
+      proxy_list = []
+      crt_date = DateTime.now.strftime('%F')
+      proxy_path = "%s/proxy/%s.txt" % [File.dirname(path),crt_date]
+      p "Proxy_Path: #{proxy_path}"
+      if File.exist?(proxy_path)
+        file_proxy = File.open(proxy_path,"r")
+        file_proxy.each_line {|line|
+          proxy_list << line.chomp.to_s
+        }
+        p "Get Proxy_list:#{proxy_list}"
+        file_proxy.flush
+        file_proxy.close
+      else
+        proxy_list=GetProxyList.get_list(ENV["proxy_limit"].to_i,ENV["proxy_page"].to_i)
+        dirpath = "#{File.dirname(path)}/proxy/"
+        Dir.open(dirpath)  {|fna|
+          fna.each do |fn|
+            if(fn.to_s != ".." && fn.to_s != ".")
+              File.delete("#{dirpath + fn.to_s}")
+            end
+          end
+        }
+        file_proxy = File.new(proxy_path,"a")
+        proxy_list.each do |proxy|
+          p "Proxy:#{proxy}"
+          file_proxy.puts proxy
+        end
+        file_proxy.flush
+        file_proxy.close
+      end
+      return proxy_list
+    end
     #获取指定访问速度的代理服务器
     #time为最慢速度的时间 int型 代表秒
     def self.get_topfast_list(use_time)
@@ -157,8 +193,10 @@ module Grabepg
           proxy = proxylist[@proxyindex+1]
         end
         begin
-          doc = Nokogiri::HTML(open(url,:proxy=>"#{proxy}")) unless proxy.nil?||proxy.empty?
+          ic = Iconv.new("UTF-8//IGNORE","GB2312")
+          doc = Nokogiri::HTML(ic.iconv(open(url,:proxy=>"#{proxy}").read)) unless proxy.nil?||proxy.empty?
           if doc.nil?
+          	p "DOC is nil"
             doc=err_doc_proxy(proxy,proxylist,url,"doc nil")
             @no_firest=0
           end
@@ -176,7 +214,8 @@ module Grabepg
         @proxyindex=@proxyindex%@size
       else
         begin
-          doc = Nokogiri::HTML(open(url)) if proxy.nil?||proxy.empty?
+          ic = Iconv.new("GB2312//IGNORE","GB2312")
+          doc = Nokogiri::HTML(ic.iconv(open(url).read)) if proxy.nil?||proxy.empty?
         rescue => err
           p "Error : Proxy:#{proxy}, url:#{url}"
           raise RuntimeError,"Error: #{err.to_s} Method:get_doc_with_proxy"

data/lib/grabepg/grab_tvsou.rb CHANGED Viewed

@@ -36,6 +36,10 @@ module Grabepg
       @site="http://m.tvsou.com"
     end
+    def get_proxy_list
+      @proxy_list
+    end
     #获取从tvsou的什么网站上获取
     #type: mobile,webpage
     def get_url(type)
@@ -44,11 +48,15 @@ module Grabepg
     def get_data_year_month_day(time)
-      month = time.month.to_s
+      month=time.month.to_s
       if month.length<2
-        month = "0"+month
+        month="0"+month
       end
-      return {time:"#{time.year}-#{time.month}-#{time.day}",date:"#{@grabbase.conversion_what_day(time.wday)}(#{month}-#{time.day})"}
+      day = time.day.to_s
+      if day.length<2
+      	day = "0"+day
+      end
+      return {time:"#{time.year}-#{time.month}-#{day}",date:"#{@grabbase.conversion_what_day(time.wday)}(#{month}-#{day})"}
     end
     #获取时间
@@ -111,6 +119,28 @@ module Grabepg
     end
+    #获取频道图标地址
+    # url 手机表的URL值
+    # channel_type 频道类型
+    # no_dis 直接使用URL 不处理
+    def get_channel_logo(_url,channel_type,no_dis=false)
+      if no_dis
+        url = _url
+      else
+        tvs = _url.split("TVid=")
+        tvid = tvs[1].split("&")[0]
+        channelids = _url.split("Channelid=")
+        channelid = channelids[1].split("&")[0]
+        if channel_type=="CCTV"
+          url = "http://epg.tvsou.com/programys/TV_#{tvid}/Channel_#{channelid}/W1.htm"
+        elsif channel_type=="WTV"
+          url = "http://epg.tvsou.com/programws/TV_#{tvid}/Channel_#{channelid}/W1.htm"
+        end
+      end
+      doc = @grabbase.get_doc_with_proxy(@proxy_list,url)
+      logo_network_path=doc.css("div[id='epg_m1']").css("img")[0].get_attribute("src")
+      return logo_network_path
+    end
     #获取频道时间表URL
@@ -131,11 +161,12 @@ module Grabepg
    #根据URL解析时间表页面
    def dispose_schedule_page(url,start_time,use_time)
-     url = @site +"/"+url
+     url = url
      urls = url.split("?")
      begin
      doc = @grabbase.get_doc_with_proxy(@proxy_list,url)
      @error_num = 0
+     _url = doc.css("div[class='week']")[0].css('a')[0].get_attribute("href")
      rescue => err
        unless @error_num
          @error_num = 0
@@ -144,7 +175,6 @@ module Grabepg
        raise err.to_s  if @error_num==5
        dispose_schedule_page(url,start_time,use_time)
      end
-     _url = doc.css("div[class='week']")[0].css('a')[0].get_attribute("href")
      _url = urls[0]+_url
      urls = dispose_href_schedule_data(_url,start_time,use_time)
      ret = {}
@@ -162,7 +192,8 @@ module Grabepg
              _dispose = schedule.content
              _dispose_show =schedule.css("span")[0].text
              time = _dispose.gsub(_dispose_show,"")
-             _url = @site+"/" + schedule.css('a')[0].get_attribute("href") if schedule.css('a')[0]
+             href =schedule.css('a')[schedule.css('a').count-1].get_attribute("href")
+             _url = @site+"/" + href if schedule.css('a')[0]
              schedules << {time:time,schedule_name:_dispose_show.delete(" 剧情"),url:_url}
              now = time.gsub(":","").to_i
              if((now-last_time)<5)
@@ -190,6 +221,14 @@ module Grabepg
     #解析节目详情页面
     def dispose_show_info(url)
       doc = @grabbase.get_doc_with_proxy(@proxy_list,url)
+      if doc.nil?
+        unless @error_num
+          @error_num = 0
+        end
+        @error_num+=1
+        raise err.to_s  if @error_num==5
+        dispose_show_info(url)
+      end
       begin
       show_name = doc.css('div[class="tv_info_top"]')[0].content
       _doc=doc.css("div[class='tv_info']")
@@ -210,4 +249,4 @@ module Grabepg
   end
-end
+end

data/lib/test/test_grab_tvsou.rb CHANGED Viewed

@@ -11,20 +11,19 @@ class TestGrabTvsou
   end
   def get_data(start_time,use_time)
-    @grabtvsou.get_data(0,5)
+    @grabtvsou.get_data(start_time,use_time)
   end
   def dispose_href_schedule_data(href,start_time,use_time)
     @grabtvsou.dispose_href_schedule_data(href,start_time,use_time)
   end
-  def dispose_schedule_page()
-    href = "http://m.tvsou.com/epg.asp?TVid=1&Channelid=1&pro=ys"
+  def dispose_schedule_page(href="http://m.tvsou.com/epg.asp?TVid=1&Channelid=1&pro=ys")
    @grabtvsou.dispose_schedule_page(href,0,1)
   end
   def dispose_show_info
-    hrefs = ["http://m.tvsou.com/jq3.asp?id=81300&tid=3","http://m.tvsou.com/intro.asp?id=145"]
+    hrefs = ["http://msou.com//jq3.asp?id=75928&tid=3","http://m.tvsou.com//jq3.asp?id=89450&tid=3"]
     ret = []
     hrefs.each do |href|
       ret<<@grabtvsou.dispose_show_info(href)
@@ -36,6 +35,11 @@ class TestGrabTvsou
     @grabtvsou.dispose_home_page
   end
+  def get_channel_logo(url="epg.asp?TVid=1&Channelid=1&pro=ys")
+    @grabtvsou.get_channel_logo(url)
+  end
   def self.start
     _grabtvsou = GrabTvsou.new("mobile",[])
     p channels = _grabtvsou.dispose_home_page

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: grab_epg
 version: !ruby/object:Gem::Version
-  version: 0.2.3
+  version: 0.2.4
 platform: ruby
 authors:
 - hahazql
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-06-04 00:00:00.000000000 Z
+date: 2013-06-07 00:00:00.000000000 Z
 dependencies: []
 description: ! '"用于抓取EPG信息"'
 email: