grab_epg 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ Mjg3MTAwMjIxM2FlNWQwMzc5MjkzZWEzNzQ4MGMzZjkzZWNiZDgwOA==
5
+ data.tar.gz: !binary |-
6
+ YmFlYTA3ZWNjNTRlN2FmNmM4NjI5MWFlZTlhNTI3YjRiYWQ4ZGQ1Mg==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ ZGExZWJiN2Q4NjUzNTdiODcwMzI5ZjQ3ODAzYzM5YzU0MDI1OGI2ZGI2Yjk3
10
+ NjQ3YTNjNTg5YzBkYWM0ZjQzNThkODM4Njk1MDI5YWJhZjQwODkxYjFlZmQw
11
+ NDAyN2VlM2NmODI1M2Y4OGYxMThiMmM5MzI5NGI2Y2UzYzFlZDA=
12
+ data.tar.gz: !binary |-
13
+ OGY5ODVkMzk0MjY4NDc1YjgzMTVkZTY3OThkZjZmZmFkZTZkNDI1NTIxZTcw
14
+ NmQxZjYxODg0YTkzMTE0YzNiNzFiNmE4ZmZiMGMwY2M3OGY0ZDZlYWYwZGMz
15
+ MzRlMzgzZGFkYTZjYTcyYWIyNGU1MTQ4ZTczZDY5NzBiZDkzMmQ=
data/.grabepg.gemspec ADDED
@@ -0,0 +1,14 @@
1
+ # encoding: utf-8
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.authors = ["hahazql"]
5
+ gem.email = ["hahazhouqunli@gmail.com"]
6
+ gem.description = %q{"用于从TVMAO抓取EPG信息"}
7
+ gem.summary = %q{"Grab EPG"}
8
+ gem.homepage = ""
9
+
10
+ gem.files = `git ls-files`.split($\)
11
+ gem.name = "grab_epg"
12
+ gem.require_paths = ["lib"]
13
+ gem.version = "0.0.1"
14
+ end
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ #encoding:utf-8
2
+ #source 'https://rubygems.org'
3
+ source 'http://ruby.taobao.org'
4
+
5
+ gem "nokogiri"
data/lib/debug.rb ADDED
@@ -0,0 +1,9 @@
1
+ #encoding:utf-8
2
+
3
+ require 'nokogiri'
4
+ require 'open-uri'
5
+ require File.expand_path("../grabepg.rb", __FILE__)
6
+ class Debug
7
+ # To change this template use File | Settings | File Templates.
8
+ p Grabepg.start
9
+ end
data/lib/grabepg.rb ADDED
@@ -0,0 +1,96 @@
1
+ #encoding:utf-8
2
+
3
+ require 'nokogiri'
4
+ require 'open-uri'
5
+
6
+ module Grabepg
7
+ # To change this template use File | Settings | File Templates.
8
+
9
+
10
+ attr_reader :channel #频道列表
11
+ attr_reader :site #网站地址
12
+
13
+
14
+ DEFAULT_GrabtvType=["cctv","satellite","digital",]
15
+ DEFAULT_SITE = "http://www.tvmao.com"
16
+
17
+ def self.start
18
+ @channel = []
19
+ @site = DEFAULT_SITE
20
+ channel_urls = self.getchannels
21
+ getSchudle(channel_urls)
22
+ end
23
+
24
+
25
+ #获取网站的频道表
26
+ def self.getchannels
27
+ channel_urls = {}
28
+
29
+ get_url =lambda { |type|
30
+ @site + "/program/duration/#{type}/w1.html" unless (type.nil?||type.empty?)
31
+ }
32
+
33
+ get_channel_id = lambda {|url|
34
+ channel_id = url.split("/")[2].split("-")[1] unless (url.nil?||url.empty?)
35
+ }
36
+
37
+ DEFAULT_GrabtvType.each do |type|
38
+ url = get_url.call(type)
39
+ p url
40
+ doc = Nokogiri::HTML(open(url))
41
+ p doc.content
42
+ p "*************************************************************"
43
+ doc.css('td[class="tdchn"]').each do |td|
44
+ channel_name=td.content
45
+ herf = ""
46
+ td.css('a').each do |a|
47
+ herf=a['href']
48
+ end
49
+ channel_id = get_channel_id.call(herf)
50
+ @channel<<({channel_id=>{name:channel_name,herf:herf,type:type}})
51
+ channel_urls.merge!({channel_id=>herf})
52
+ end
53
+ end
54
+ p "Channel: #{@channel}"
55
+ channel_urls
56
+ end
57
+
58
+ def self.getSchudle(channel,url)
59
+ _img_url = "http://static.haotv.me/channel/logo/"
60
+
61
+
62
+ get_week_url = lambda {|url|
63
+ _url = @site
64
+ urls = []
65
+ _urls = url.split("-")
66
+ 0.upto(1).each do |i|
67
+ _url = _url+"#{_urls[i]}"+"-"
68
+ end
69
+ 1.upto(7).each do |i|
70
+ urls << _url+"w#{i}.html"
71
+ end
72
+ urls
73
+ }
74
+
75
+ get_week_url.call(herf).each do |url|
76
+ p url
77
+ doc = Nokogiri::HTML(open(url))
78
+ img_url = _img_url + channel+".jpg"
79
+ data=doc.css('div[class="mt10 clear"]')[0].content.split(" ")
80
+ date = data[0]
81
+ week = data[1]
82
+ p "Channel: #{channel} Date: #{date} Week: #{week}"
83
+ doc.css('ul[id="pgrow"]')[0].css("li").each do |schudel|
84
+ if schudel.content.split(" ").size>1
85
+ time = schudel.content.split(" ")[0]
86
+ schudel = schudel.content.split(" ")[1]
87
+ p "Time: #{time} Schudel: #{schudel}"
88
+ end
89
+ end
90
+ end
91
+ end
92
+
93
+
94
+
95
+
96
+ end
data/lib/grabepg.rb~ ADDED
@@ -0,0 +1,115 @@
1
+ model Grabepg
2
+ # To change this template use File | Settings | File Templates.
3
+
4
+
5
+ attr_accessor :channel
6
+ attr_accessor :data
7
+
8
+ DEFAULT_GrabtvType=["cctv","satellite","digital",]
9
+
10
+
11
+ def self.start
12
+
13
+ end
14
+
15
+
16
+ #获取网站的频道表
17
+ def self.getchannels
18
+
19
+ end
20
+
21
+
22
+
23
+
24
+ def initialize(channel, date)
25
+ p 'Tvmao grab %s - %s - %s' % [channel._id,channel.name, date.strftime('%F')]
26
+ count = ::Kfd::TvmaoChannel.count
27
+ if count == 0
28
+ kk = Kanke::Kfd::Grab::TvmaoChannel.new()
29
+ kk.load_in_database()
30
+ end
31
+ tmp_ch = ::Kfd::TvmaoChannel.where(zid:channel._id,enable: true).first
32
+ if tmp_ch.nil?
33
+ p 'can not find the %s | %s' % [channel._id,tmp_ch]
34
+ @request_url = nil
35
+ else
36
+ #request_url必须进行处理 w1 ~ w14
37
+ t_url = tmp_ch.url
38
+ regex_url = /http:\/\/www\.tvmao\.com\/program\/.*-w(?=.*\.html)/
39
+ _url = regex_url.match(t_url).to_s
40
+ if date > Date.today.end_of_week
41
+ _wd = date.wday
42
+ _wd += 7
43
+ _wd += 7 if _wd == 7
44
+ else
45
+ _wd = date.wday
46
+ _wd = 7 if date.wday == 0
47
+ end
48
+ @request_url = '%s%s.html' % [_url,_wd]
49
+ end
50
+ end
51
+
52
+
53
+ # 返回数据
54
+ def next_schedule
55
+ #need proxy
56
+ unless @request_url.nil?
57
+ all_schedule = get_channel_the_date_show(@request_url)
58
+ p 'we get %d schedule' % all_schedule.count
59
+ @enum = all_schedule.each
60
+ @request_url = nil
61
+ end
62
+
63
+ #begin
64
+ if @enum.nil?
65
+ nil
66
+ else
67
+ sch = @enum.next
68
+ p '%s %s' % [sch['begin_at'].strftime('%F %R'),sch['name']]
69
+ Kanke::Kfd::Grab::Schedule.new(channel,sch['name'],sch['begin_at'], Kfd::Source::TVMAO)
70
+ end
71
+ #rescue StopIteration
72
+ # nil
73
+ # end
74
+ end
75
+
76
+ # 解析频道某天的节目安排
77
+ def get_channel_the_date_show(url)
78
+ p ' request url %s ' % url
79
+ doc = nil
80
+ doc = Kanke::Kfd::Grab::Util.get_parse_doc(url)
81
+ if(doc.nil?)
82
+ p "doc nil"
83
+ doc = Kanke::Kfd::Grab::Util.get_parse_doc(url,false)
84
+ end
85
+ return [] if(doc.nil?)
86
+
87
+ all_schedule = []
88
+ regex_schedule = /<span.*/
89
+ doc.css('ul#pgrow li').each{|li|
90
+ time = li.search('span').first
91
+ next unless time
92
+ next if time.eql?('')
93
+
94
+ tvgd = li.search('div').first
95
+ name = li.content.gsub("剧照","")
96
+ if(!tvgd.nil?)
97
+ name = name.gsub(tvgd.content,"")
98
+ end
99
+ name = name.gsub("剧照","")
100
+ name = name.gsub("剧情","")
101
+ name = name.gsub("演员表","")
102
+ name = name.gsub(time.content,"")
103
+ name = name.lstrip
104
+ name = name.chomp
105
+ begin
106
+ the_show_time = DateTime.strptime("#{date.strftime('%F')} #{time.content}",'%F %R')
107
+ all_schedule << {'name'=>name.strip,'begin_at' => the_show_time}
108
+ rescue Exception =>e
109
+ p e
110
+ end
111
+ }
112
+ all_schedule
113
+ end
114
+
115
+ end
@@ -0,0 +1,9 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="RUBY_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="inheritedJdk" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ </module>
9
+
metadata ADDED
@@ -0,0 +1,50 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: grab_epg
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - hahazql
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-04-25 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: ! '"用于从TVMAO抓取EPG信息"'
14
+ email:
15
+ - hahazhouqunli@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - .grabepg.gemspec
21
+ - Gemfile
22
+ - lib/debug.rb
23
+ - lib/grabepg.rb
24
+ - lib/grabepg.rb~
25
+ - projectFilesBackup/.idea/grabepg.iml
26
+ homepage: ''
27
+ licenses: []
28
+ metadata: {}
29
+ post_install_message:
30
+ rdoc_options: []
31
+ require_paths:
32
+ - lib
33
+ required_ruby_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ required_rubygems_version: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ! '>='
41
+ - !ruby/object:Gem::Version
42
+ version: '0'
43
+ requirements: []
44
+ rubyforge_project:
45
+ rubygems_version: 2.0.0
46
+ signing_key:
47
+ specification_version: 4
48
+ summary: ! '"Grab EPG"'
49
+ test_files: []
50
+ has_rdoc: