RubyGems - grab_epg - Versions diffs - 0.0.1 → 0.0.2 - Mend

grab_epg 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

checksums.yaml CHANGED Viewed

@@ -1,15 +1,15 @@
 ---
 !binary "U0hBMQ==":
   metadata.gz: !binary |-
-    Mjg3MTAwMjIxM2FlNWQwMzc5MjkzZWEzNzQ4MGMzZjkzZWNiZDgwOA==
+    Njk2ZmY1MjVlMjE5MjQwMDFiOTVmODliNDg0Zjc5OWE5MDVlMzExNA==
   data.tar.gz: !binary |-
-    YmFlYTA3ZWNjNTRlN2FmNmM4NjI5MWFlZTlhNTI3YjRiYWQ4ZGQ1Mg==
+    NTVjMzA3MGQxMDE4ODhmOGZjNmJkM2I4ZTI4ODVhYTBiYzgwZWEwZA==
 !binary "U0hBNTEy":
   metadata.gz: !binary |-
-    ZGExZWJiN2Q4NjUzNTdiODcwMzI5ZjQ3ODAzYzM5YzU0MDI1OGI2ZGI2Yjk3
-    NjQ3YTNjNTg5YzBkYWM0ZjQzNThkODM4Njk1MDI5YWJhZjQwODkxYjFlZmQw
-    NDAyN2VlM2NmODI1M2Y4OGYxMThiMmM5MzI5NGI2Y2UzYzFlZDA=
+    ZmRmOTEwNmM2M2FhOTU5YWUwNzUyNGVjMWVmZjRjMjU3NDAyZTY4YWY1ZmQz
+    N2I5OGE4MzJkMzZkMDg3Mjk1NDM5YmIwZWZmNzRkNTA3NTE4YTA5ZjFiZmM4
+    Mjc5MmZmZDI2NTYwN2M5NDFkN2Y1NGZkNzU1NWI1OTI1ODA5NDY=
   data.tar.gz: !binary |-
-    OGY5ODVkMzk0MjY4NDc1YjgzMTVkZTY3OThkZjZmZmFkZTZkNDI1NTIxZTcw
-    NmQxZjYxODg0YTkzMTE0YzNiNzFiNmE4ZmZiMGMwY2M3OGY0ZDZlYWYwZGMz
-    MzRlMzgzZGFkYTZjYTcyYWIyNGU1MTQ4ZTczZDY5NzBiZDkzMmQ=
+    MDY2YWE0YTM0OTlmNGYxZjMxOGY2YTliZWFhMmExZjhmZTY4OGEyZWZhM2Jl
+    ZWIzYWE4MThlOTY1NzIyMWE0MGU5NGE4NTA1ZTAzZjZlMWIxMjBjNjkyNjRi
+    YTI3MTA3Y2NhMGJlMjcxYjg5NjBmYmI5NmVjNzE3MTdhMTUyYTI=

data/.grabepg.gemspec CHANGED Viewed

@@ -10,5 +10,5 @@ Gem::Specification.new do |gem|
   gem.files         = `git ls-files`.split($\)
   gem.name          = "grab_epg"
   gem.require_paths = ["lib"]
-  gem.version       = "0.0.1"
+  gem.version       = "0.0.2"
 end

data/lib/debug.rb CHANGED Viewed

@@ -5,5 +5,32 @@ require 'open-uri'
 require File.expand_path("../grabepg.rb", __FILE__)
 class Debug
   # To change this template use File | Settings | File Templates.
-  p Grabepg.start
+  proxylist = ["123.125.116.243:6256", "123.125.116.243:28832", "123.125.116.243:29952", "123.125.116.243:9386", "219.234.82.73:7806", "123.125.116.243:38205", "123.125.116.243:11229", "123.125.116.243:12978", "219.234.82.89:8090", "120.197.85.173:20368", "123.125.116.243:8089", "123.125.116.243:8160", "219.234.82.78:31565", "123.125.116.243:21457", "123.125.116.241:17421", "123.125.116.243:14191", "219.234.82.88:29037", "123.125.116.242:13669", "123.125.116.243:19009", "123.125.116.243:6193", "123.125.116.242:15692", "123.125.116.241:20307", "123.125.116.242:18725", "219.234.82.82:29082", "123.125.116.243:5195", "123.125.116.242:21725", "123.125.116.241:32793", "219.234.82.60:8000", "123.125.116.242:17403", "123.125.116.243:6938", "123.125.116.242:16348", "219.234.82.54:8726", "120.197.85.173:20371", "123.125.116.241:9286", "219.234.82.88:19279", "219.234.82.89:13374", "123.125.116.242:5976"]
+  def self.test_get_doc_with_proxy(proxylist)
+    herf = "http://www.tvmao.com/drama/HS5oLCs="
+    Grabepg.get_doc_with_proxy(proxylist,herf)
+  end
+  def self.test_get_show_infomation(proxylist)
+    herf = "http://www.tvmao.com/tvcolumn/cVhPLQ=="
+    Grabepg.get_show_infomation(proxylist,herf)
+  end
+  def self.test_getschedule(proxylist)
+    channel = "HUNANTV"
+    herf = "/program/HUNANTV-HUNANTV-w1.html"
+    Grabepg.getschedule(channel,herf,proxylist,site="http://www.tvmao.com")
+  end
+  def self.test_get_show_schedule(proxylist)
+    herf = "http://www.tvmao.com/tvcolumn/cVhPLQ=="
+    Grabepg.get_show_schedule(proxylist,herf)
+  end
+ Grabepg.start
+ #p test_get_show_schedule(proxylist)
+ #p test_getschedule(proxylist)
+ # p test_get_show_infomation(proxylist)
 end

data/lib/grabepg.rb CHANGED Viewed

@@ -9,21 +9,28 @@ module Grabepg
   attr_reader :channel  #频道列表
   attr_reader :site #网站地址
+  attr_reader :proxyindex #代理的索引
+  attr_reader :show_schedule #根据节目的时间表
   DEFAULT_GrabtvType=["cctv","satellite","digital",]
   DEFAULT_SITE = "http://www.tvmao.com"
+  #调用此方法的例子
   def self.start
     @channel = []
     @site = DEFAULT_SITE
     channel_urls = self.getchannels
-    getSchudle(channel_urls)
+    proxy_list=get_topfast_list
+    channel_urls.each do |channel,url|
+      p "****************************************GetSchedule : #{getschedule(channel,url,proxy_list)}"
+    end
   end
   #获取网站的频道表
   def self.getchannels
+    @proxyindex = 0
     channel_urls = {}
     get_url =lambda { |type|
@@ -55,12 +62,45 @@ module Grabepg
     channel_urls
   end
-  def self.getSchudle(channel,url)
-    _img_url = "http://static.haotv.me/channel/logo/"
+  #使用代理获取url的html的doc值
+  def self.get_doc_with_proxy(proxylist,url)
+    unless @proxyindex
+      @proxyindex = 0
+    end
+    @proxyindex=@proxyindex%proxylist.size
+    if(proxylist[@proxyindex]!="123.125.116.243:6256"||proxylist[@proxyindex]!="http://123.125.116.243:28832")
+      proxy = proxylist[@proxyindex]
+    else
+      proxy = proxylist[@proxyindex+1]
+    end
+    begin
+      doc = Nokogiri::HTML(open(url,:proxy=>"http://#{proxy}"))
+      @no_firest = false
+    rescue => err
+      @no_firest = true
+      p "*************************Proxy:#{proxy}, url:#{url}"
+      get_doc_with_proxy(proxylist,url) unless @no_firest
+      raise RuntimeError,"Error: #{err.to_s}" if @no_firest
+    end
+    @proxyindex += 1
+    unless doc
+      p "*************************Proxy:#{proxy}, url:#{url}"
+    end
+    doc
+  end
+  #获取节目表
+  def self.getschedule(channel,herf,proxylist,site="http://www.tvmao.com")
+    if(@site)
+      site=@site
+    end
+    _img_url = "http://static.haotv.me/channel/logo/"
+    @show_schedule = {}
     get_week_url = lambda {|url|
-       _url = @site
+       _url = site
        urls = []
        _urls = url.split("-")
        0.upto(1).each do |i|
@@ -71,26 +111,173 @@ module Grabepg
       end
       urls
     }
+    channel_schedule = {}
     get_week_url.call(herf).each do |url|
-      p url
-      doc = Nokogiri::HTML(open(url))
+      p "Grab: #{url}"
+      #if(proxylist[proxyidex]!="219.234.82.89:33948")
+      #  proxy = proxylist[@proxyidex]
+      #else
+      #  proxy = proxylist[@proxyidex+1]
+      #end
+      #p "Proxy: http://#{proxy}"
+      #doc = Nokogiri::HTML(open(url,:proxy=>"http://#{proxy}"))
+      #@proxyidex += 1
+      doc = get_doc_with_proxy(proxylist,url)
+      show_type = []
       img_url = _img_url + channel+".jpg"
       data=doc.css('div[class="mt10 clear"]')[0].content.split(" ")
       date = data[0]
       week = data[1]
       p "Channel: #{channel}  Date: #{date} Week: #{week}"
-      doc.css('ul[id="pgrow"]')[0].css("li").each do |schudel|
-        if schudel.content.split(" ").size>1
-          time = schudel.content.split(" ")[0]
-          schudel = schudel.content.split(" ")[1]
-          p "Time: #{time} Schudel: #{schudel}"
+      schedule_list = []
+      doc.css('ul[id="pgrow"]')[0].css("li").each do |schedule|
+        _herf= schedule.xpath('a[@href]')[0]
+        schedule_herf=_herf.get_attribute("href") if _herf
+        unless _herf
+          drama =schedule.css('a[class="drama"]')[0]
+          if drama
+            _herfs=drama.get_attribute("href").gsub("/episode/section","#%#")
+            schedule_herf = _herfs.split("#%#")[0]
+          end
+        end
+        if schedule.content.split(" ").size>1
+          time = schedule.content.split(" ")[0]
+          schedule = schedule.content.split(" ")[1]
+          show_name = ""
+          unless schedule_herf.nil?||schedule_herf.empty?
+            show_infomation=get_show_infomation(proxylist,schedule_herf)
+            show_type=show_infomation["type"]
+            show_name = show_infomation["name"]
+          end
+          p "Time: #{time} schedule: #{schedule} show_infomation_herf: #{schedule_herf}  type: #{show_type} name: #{show_name}"
+          schedule_list << {"time"=>time,"schedule"=>schedule,"show_infomation_herf"=>schedule_herf,"type"=>show_type,"name"=>show_name}
         end
       end
+      channel_schedule.merge!({"#{week}(#{date})"=>schedule_list})
+    end
+    {"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
+  end
+  #获取节目详细信息
+  def self.get_show_infomation(proxy_list,schedule_herf)
+    @proxyindex = 0
+    unless @site
+      @site = "http://www.tvmao.com"
+    end
+    schedule_herf = @site + schedule_herf
+    doc=get_doc_with_proxy(proxy_list,schedule_herf)
+    #title = doc.css("a[herf='#{schedule_herf}+/detail']")[0]['title']
+   # p "title: %s" % title
+    type = []
+    name = doc.css('span[itemprop="name"]')[0].content
+    doc.css('span[itemprop="genre"]').each do |_type|
+      type << _type.content
+    end
+    doc.css('a[itemprop="genre"]').each do |_type|
+      type<<_type.content
+    end
+    url = "#{schedule_herf}/detail"
+    doc = get_doc_with_proxy(proxy_list,url)
+    doc.css('span[itemprop="genre"]').each do |_type|
+      type << _type.content
+    end
+    doc.css('a[itemprop="genre"]').each do |_type|
+      type<<_type.content
+    end
+    type.uniq!
+    @show_schedule.merge!(name=>get_show_schedule(proxy_list,schedule_herf)) unless @show_schedule.has_key?(name)
+    {"type"=>type,"name"=>name}
+  end
+  #获取节目的时间表
+  def self.get_show_schedule(proxylist,herf)
+    url = herf + "/playingtime"
+    doc = get_doc_with_proxy(proxylist,url)
+    i = 0
+    schedule = []
+    doc.css('div[id="epg"]')[0].css("div[class='c1 col']").each do |epg|
+      unless(i==0)
+        time = epg.css('div[class="f1 fld"]')[0].content
+        channel_name = epg.css('div[class="f2 fld"]')[0].content
+        show_name = epg.css('div[class="f3 fld"]')[0].content
+        times = time.split(" ")
+        week = times[0]
+        date = times[1]
+        _time = times[2]
+        schedule << {"week"=>week,"date"=>date,"time"=>_time,"channel_name"=>channel_name,"show_name"=>show_name}
+      end
+      i += 1
     end
+    schedule
   end
+  #获取指定访问速度的代理服务器
+  def self.get_topfast_list()
+    fast_list = []
+    time_use = 0
+    ips_ports = get_proxy_list()
+    ips_ports.each do |ip_port|
+      time_start = Time.now.to_i
+      begin
+        timeout(5) do
+          doc = Nokogiri::HTML(open("http://www.tvmao.com/program",:proxy=> "http://#{ip_port}"))
+        end
+        time_end = Time.now.to_i
+        time_use = time_end - time_start
+        p  "http://#{ip_port}   use_time:#{time_use}"
+      rescue Exception =>e
+        case e
+          when Errno::ETIMEDOUT
+            p "Use http://#{ip_port} timeout"
+          when Timeout::Error
+            p "Use http://#{ip_port} timeout"
+          when Errno::ECONNREFUSED
+            p "Use http://#{ip_port} Error connection"
+          else
+            p "Use http://#{ip_port} Error:#{e.to_s}"
+        end
+        time_use = -1
+      end
+      if(time_use > 0 &&time_use < 8)
+        fast_list << ip_port
+      end
+    end
+    fast_list
+  end
+  #获取代理列表
+  def self.get_proxy_list()
+    list = gg('http://www.proxycn.cn/html_proxy/30fastproxy-1.html')
+    if list.count ==0
+      list = gg('http://www.proxycn.cn/html_proxy/http-1.html')
+    end
+    ips_ports = []
+    regex_port = /(?<=<TD class="list">)[0-9]*?(?=<\/TD>)/
+    regex_ip   = /(?<=a href\=whois.php\?whois\=)[0-9,.]*/
+    list.each do |proxy_txt|
+      port = proxy_txt[regex_port]
+      ip = proxy_txt[regex_ip]
+      if(ip != ""&& !port.to_s.eql?('3128'))
+        port_ip = ip.to_s + ":" + port.to_s
+        ips_ports << port_ip
+      end
+    end
+    p "Count: #{ips_ports.count}"
+    ips_ports
+  end
+  def self.gg(url)
+    regex_list = /<TD class="list">.*<\/TD>/
+    href =URI.parse(url)
+    contxt = ""
+    href.open{ |f|
+      f.each_line {|line| contxt =contxt + line + "\n"}
+    }
+    list = contxt.scan(regex_list)
+  end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: grab_epg
 version: !ruby/object:Gem::Version
-  version: 0.0.1
+  version: 0.0.2
 platform: ruby
 authors:
 - hahazql
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-04-25 00:00:00.000000000 Z
+date: 2013-04-26 00:00:00.000000000 Z
 dependencies: []
 description: ! '"用于从TVMAO抓取EPG信息"'
 email: