RubyGems - grab_epg - Versions diffs - 0.0.1 - Mend

grab_epg 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml +15 -0
data/.grabepg.gemspec +14 -0
data/Gemfile +5 -0
data/lib/debug.rb +9 -0
data/lib/grabepg.rb +96 -0
data/lib/grabepg.rb~ +115 -0
data/projectFilesBackup/.idea/grabepg.iml +9 -0
metadata +50 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,15 @@
+---
+!binary "U0hBMQ==":
+  metadata.gz: !binary |-
+    Mjg3MTAwMjIxM2FlNWQwMzc5MjkzZWEzNzQ4MGMzZjkzZWNiZDgwOA==
+  data.tar.gz: !binary |-
+    YmFlYTA3ZWNjNTRlN2FmNmM4NjI5MWFlZTlhNTI3YjRiYWQ4ZGQ1Mg==
+!binary "U0hBNTEy":
+  metadata.gz: !binary |-
+    ZGExZWJiN2Q4NjUzNTdiODcwMzI5ZjQ3ODAzYzM5YzU0MDI1OGI2ZGI2Yjk3
+    NjQ3YTNjNTg5YzBkYWM0ZjQzNThkODM4Njk1MDI5YWJhZjQwODkxYjFlZmQw
+    NDAyN2VlM2NmODI1M2Y4OGYxMThiMmM5MzI5NGI2Y2UzYzFlZDA=
+  data.tar.gz: !binary |-
+    OGY5ODVkMzk0MjY4NDc1YjgzMTVkZTY3OThkZjZmZmFkZTZkNDI1NTIxZTcw
+    NmQxZjYxODg0YTkzMTE0YzNiNzFiNmE4ZmZiMGMwY2M3OGY0ZDZlYWYwZGMz
+    MzRlMzgzZGFkYTZjYTcyYWIyNGU1MTQ4ZTczZDY5NzBiZDkzMmQ=

data/.grabepg.gemspec ADDED Viewed

@@ -0,0 +1,14 @@
+# encoding: utf-8
+Gem::Specification.new do |gem|
+  gem.authors       = ["hahazql"]
+  gem.email         = ["hahazhouqunli@gmail.com"]
+  gem.description   = %q{"用于从TVMAO抓取EPG信息"}
+  gem.summary       = %q{"Grab EPG"}
+  gem.homepage      = ""
+  gem.files         = `git ls-files`.split($\)
+  gem.name          = "grab_epg"
+  gem.require_paths = ["lib"]
+  gem.version       = "0.0.1"
+end

data/Gemfile ADDED Viewed

@@ -0,0 +1,5 @@
+#encoding:utf-8
+#source 'https://rubygems.org'
+source 'http://ruby.taobao.org'
+gem "nokogiri"

data/lib/debug.rb ADDED Viewed

@@ -0,0 +1,9 @@
+#encoding:utf-8
+require 'nokogiri'
+require 'open-uri'
+require File.expand_path("../grabepg.rb", __FILE__)
+class Debug
+  # To change this template use File | Settings | File Templates.
+  p Grabepg.start
+end

data/lib/grabepg.rb ADDED Viewed

@@ -0,0 +1,96 @@
+#encoding:utf-8
+require 'nokogiri'
+require 'open-uri'
+module Grabepg
+  # To change this template use File | Settings | File Templates.
+  attr_reader :channel  #频道列表
+  attr_reader :site #网站地址
+  DEFAULT_GrabtvType=["cctv","satellite","digital",]
+  DEFAULT_SITE = "http://www.tvmao.com"
+  def self.start
+    @channel = []
+    @site = DEFAULT_SITE
+    channel_urls = self.getchannels
+    getSchudle(channel_urls)
+  end
+  #获取网站的频道表
+  def self.getchannels
+    channel_urls = {}
+    get_url =lambda { |type|
+      @site + "/program/duration/#{type}/w1.html" unless (type.nil?||type.empty?)
+        }
+    get_channel_id = lambda {|url|
+      channel_id = url.split("/")[2].split("-")[1] unless (url.nil?||url.empty?)
+    }
+    DEFAULT_GrabtvType.each do |type|
+      url = get_url.call(type)
+      p url
+      doc = Nokogiri::HTML(open(url))
+      p doc.content
+      p "*************************************************************"
+      doc.css('td[class="tdchn"]').each do |td|
+       channel_name=td.content
+       herf = ""
+       td.css('a').each do |a|
+        herf=a['href']
+       end
+        channel_id = get_channel_id.call(herf)
+        @channel<<({channel_id=>{name:channel_name,herf:herf,type:type}})
+        channel_urls.merge!({channel_id=>herf})
+      end
+    end
+    p "Channel: #{@channel}"
+    channel_urls
+  end
+  def self.getSchudle(channel,url)
+    _img_url = "http://static.haotv.me/channel/logo/"
+    get_week_url = lambda {|url|
+       _url = @site
+       urls = []
+       _urls = url.split("-")
+       0.upto(1).each do |i|
+        _url = _url+"#{_urls[i]}"+"-"
+       end
+      1.upto(7).each do |i|
+        urls << _url+"w#{i}.html"
+      end
+      urls
+    }
+    get_week_url.call(herf).each do |url|
+      p url
+      doc = Nokogiri::HTML(open(url))
+      img_url = _img_url + channel+".jpg"
+      data=doc.css('div[class="mt10 clear"]')[0].content.split(" ")
+      date = data[0]
+      week = data[1]
+      p "Channel: #{channel}  Date: #{date} Week: #{week}"
+      doc.css('ul[id="pgrow"]')[0].css("li").each do |schudel|
+        if schudel.content.split(" ").size>1
+          time = schudel.content.split(" ")[0]
+          schudel = schudel.content.split(" ")[1]
+          p "Time: #{time} Schudel: #{schudel}"
+        end
+      end
+    end
+  end
+end

data/lib/grabepg.rb~ ADDED Viewed

@@ -0,0 +1,115 @@
+model Grabepg
+  # To change this template use File | Settings | File Templates.
+  attr_accessor :channel
+  attr_accessor :data
+  DEFAULT_GrabtvType=["cctv","satellite","digital",]
+  def self.start
+  end
+  #获取网站的频道表
+  def self.getchannels
+  end
+  def initialize(channel, date)
+    p 'Tvmao grab %s - %s - %s' % [channel._id,channel.name, date.strftime('%F')]
+    count = ::Kfd::TvmaoChannel.count
+    if count == 0
+      kk = Kanke::Kfd::Grab::TvmaoChannel.new()
+      kk.load_in_database()
+    end
+    tmp_ch = ::Kfd::TvmaoChannel.where(zid:channel._id,enable: true).first
+    if tmp_ch.nil?
+      p 'can not find the %s | %s' % [channel._id,tmp_ch]
+      @request_url = nil
+    else
+      #request_url必须进行处理  w1 ~ w14
+      t_url = tmp_ch.url
+      regex_url = /http:\/\/www\.tvmao\.com\/program\/.*-w(?=.*\.html)/
+      _url = regex_url.match(t_url).to_s
+      if date > Date.today.end_of_week
+        _wd = date.wday
+        _wd	+= 7
+        _wd += 7 if _wd == 7
+      else
+        _wd = date.wday
+        _wd = 7 if date.wday == 0
+      end
+      @request_url = '%s%s.html' % [_url,_wd]
+    end
+  end
+  # 返回数据
+  def next_schedule
+    #need proxy
+    unless @request_url.nil?
+      all_schedule = get_channel_the_date_show(@request_url)
+      p 'we get %d schedule' % all_schedule.count
+      @enum = all_schedule.each
+      @request_url = nil
+    end
+    #begin
+    if @enum.nil?
+      nil
+    else
+      sch = @enum.next
+      p '%s %s' % [sch['begin_at'].strftime('%F %R'),sch['name']]
+      Kanke::Kfd::Grab::Schedule.new(channel,sch['name'],sch['begin_at'], Kfd::Source::TVMAO)
+    end
+    #rescue StopIteration
+    #  nil
+    # end
+  end
+  # 解析频道某天的节目安排
+  def get_channel_the_date_show(url)
+    p '    request url %s ' % url
+    doc = nil
+    doc = Kanke::Kfd::Grab::Util.get_parse_doc(url)
+    if(doc.nil?)
+      p "doc nil"
+      doc = Kanke::Kfd::Grab::Util.get_parse_doc(url,false)
+    end
+    return [] if(doc.nil?)
+    all_schedule = []
+    regex_schedule = /<span.*/
+    doc.css('ul#pgrow li').each{|li|
+      time = li.search('span').first
+      next unless time
+      next if time.eql?('')
+      tvgd = li.search('div').first
+      name = li.content.gsub("剧照","")
+      if(!tvgd.nil?)
+        name = name.gsub(tvgd.content,"")
+      end
+      name = name.gsub("剧照","")
+      name = name.gsub("剧情","")
+      name = name.gsub("演员表","")
+      name = name.gsub(time.content,"")
+      name = name.lstrip
+      name = name.chomp
+      begin
+        the_show_time = DateTime.strptime("#{date.strftime('%F')} #{time.content}",'%F %R')
+        all_schedule << {'name'=>name.strip,'begin_at' => the_show_time}
+      rescue Exception =>e
+        p e
+      end
+    }
+    all_schedule
+  end
+end

data/projectFilesBackup/.idea/grabepg.iml ADDED Viewed

@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="RUBY_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

metadata ADDED Viewed

@@ -0,0 +1,50 @@
+--- !ruby/object:Gem::Specification
+name: grab_epg
+version: !ruby/object:Gem::Version
+  version: 0.0.1
+platform: ruby
+authors:
+- hahazql
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2013-04-25 00:00:00.000000000 Z
+dependencies: []
+description: ! '"用于从TVMAO抓取EPG信息"'
+email:
+- hahazhouqunli@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .grabepg.gemspec
+- Gemfile
+- lib/debug.rb
+- lib/grabepg.rb
+- lib/grabepg.rb~
+- projectFilesBackup/.idea/grabepg.iml
+homepage: ''
+licenses: []
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.0.0
+signing_key:
+specification_version: 4
+summary: ! '"Grab EPG"'
+test_files: []
+has_rdoc: