grab_epg 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.grabepg.gemspec +14 -0
- data/Gemfile +5 -0
- data/lib/debug.rb +9 -0
- data/lib/grabepg.rb +96 -0
- data/lib/grabepg.rb~ +115 -0
- data/projectFilesBackup/.idea/grabepg.iml +9 -0
- metadata +50 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
Mjg3MTAwMjIxM2FlNWQwMzc5MjkzZWEzNzQ4MGMzZjkzZWNiZDgwOA==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
YmFlYTA3ZWNjNTRlN2FmNmM4NjI5MWFlZTlhNTI3YjRiYWQ4ZGQ1Mg==
|
7
|
+
!binary "U0hBNTEy":
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ZGExZWJiN2Q4NjUzNTdiODcwMzI5ZjQ3ODAzYzM5YzU0MDI1OGI2ZGI2Yjk3
|
10
|
+
NjQ3YTNjNTg5YzBkYWM0ZjQzNThkODM4Njk1MDI5YWJhZjQwODkxYjFlZmQw
|
11
|
+
NDAyN2VlM2NmODI1M2Y4OGYxMThiMmM5MzI5NGI2Y2UzYzFlZDA=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
OGY5ODVkMzk0MjY4NDc1YjgzMTVkZTY3OThkZjZmZmFkZTZkNDI1NTIxZTcw
|
14
|
+
NmQxZjYxODg0YTkzMTE0YzNiNzFiNmE4ZmZiMGMwY2M3OGY0ZDZlYWYwZGMz
|
15
|
+
MzRlMzgzZGFkYTZjYTcyYWIyNGU1MTQ4ZTczZDY5NzBiZDkzMmQ=
|
data/.grabepg.gemspec
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
Gem::Specification.new do |gem|
|
4
|
+
gem.authors = ["hahazql"]
|
5
|
+
gem.email = ["hahazhouqunli@gmail.com"]
|
6
|
+
gem.description = %q{"用于从TVMAO抓取EPG信息"}
|
7
|
+
gem.summary = %q{"Grab EPG"}
|
8
|
+
gem.homepage = ""
|
9
|
+
|
10
|
+
gem.files = `git ls-files`.split($\)
|
11
|
+
gem.name = "grab_epg"
|
12
|
+
gem.require_paths = ["lib"]
|
13
|
+
gem.version = "0.0.1"
|
14
|
+
end
|
data/Gemfile
ADDED
data/lib/debug.rb
ADDED
data/lib/grabepg.rb
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
#encoding:utf-8
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'open-uri'
|
5
|
+
|
6
|
+
module Grabepg
|
7
|
+
# To change this template use File | Settings | File Templates.
|
8
|
+
|
9
|
+
|
10
|
+
attr_reader :channel #频道列表
|
11
|
+
attr_reader :site #网站地址
|
12
|
+
|
13
|
+
|
14
|
+
DEFAULT_GrabtvType=["cctv","satellite","digital",]
|
15
|
+
DEFAULT_SITE = "http://www.tvmao.com"
|
16
|
+
|
17
|
+
def self.start
|
18
|
+
@channel = []
|
19
|
+
@site = DEFAULT_SITE
|
20
|
+
channel_urls = self.getchannels
|
21
|
+
getSchudle(channel_urls)
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
#获取网站的频道表
|
26
|
+
def self.getchannels
|
27
|
+
channel_urls = {}
|
28
|
+
|
29
|
+
get_url =lambda { |type|
|
30
|
+
@site + "/program/duration/#{type}/w1.html" unless (type.nil?||type.empty?)
|
31
|
+
}
|
32
|
+
|
33
|
+
get_channel_id = lambda {|url|
|
34
|
+
channel_id = url.split("/")[2].split("-")[1] unless (url.nil?||url.empty?)
|
35
|
+
}
|
36
|
+
|
37
|
+
DEFAULT_GrabtvType.each do |type|
|
38
|
+
url = get_url.call(type)
|
39
|
+
p url
|
40
|
+
doc = Nokogiri::HTML(open(url))
|
41
|
+
p doc.content
|
42
|
+
p "*************************************************************"
|
43
|
+
doc.css('td[class="tdchn"]').each do |td|
|
44
|
+
channel_name=td.content
|
45
|
+
herf = ""
|
46
|
+
td.css('a').each do |a|
|
47
|
+
herf=a['href']
|
48
|
+
end
|
49
|
+
channel_id = get_channel_id.call(herf)
|
50
|
+
@channel<<({channel_id=>{name:channel_name,herf:herf,type:type}})
|
51
|
+
channel_urls.merge!({channel_id=>herf})
|
52
|
+
end
|
53
|
+
end
|
54
|
+
p "Channel: #{@channel}"
|
55
|
+
channel_urls
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.getSchudle(channel,url)
|
59
|
+
_img_url = "http://static.haotv.me/channel/logo/"
|
60
|
+
|
61
|
+
|
62
|
+
get_week_url = lambda {|url|
|
63
|
+
_url = @site
|
64
|
+
urls = []
|
65
|
+
_urls = url.split("-")
|
66
|
+
0.upto(1).each do |i|
|
67
|
+
_url = _url+"#{_urls[i]}"+"-"
|
68
|
+
end
|
69
|
+
1.upto(7).each do |i|
|
70
|
+
urls << _url+"w#{i}.html"
|
71
|
+
end
|
72
|
+
urls
|
73
|
+
}
|
74
|
+
|
75
|
+
get_week_url.call(herf).each do |url|
|
76
|
+
p url
|
77
|
+
doc = Nokogiri::HTML(open(url))
|
78
|
+
img_url = _img_url + channel+".jpg"
|
79
|
+
data=doc.css('div[class="mt10 clear"]')[0].content.split(" ")
|
80
|
+
date = data[0]
|
81
|
+
week = data[1]
|
82
|
+
p "Channel: #{channel} Date: #{date} Week: #{week}"
|
83
|
+
doc.css('ul[id="pgrow"]')[0].css("li").each do |schudel|
|
84
|
+
if schudel.content.split(" ").size>1
|
85
|
+
time = schudel.content.split(" ")[0]
|
86
|
+
schudel = schudel.content.split(" ")[1]
|
87
|
+
p "Time: #{time} Schudel: #{schudel}"
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
|
96
|
+
end
|
data/lib/grabepg.rb~
ADDED
@@ -0,0 +1,115 @@
|
|
1
|
+
model Grabepg
|
2
|
+
# To change this template use File | Settings | File Templates.
|
3
|
+
|
4
|
+
|
5
|
+
attr_accessor :channel
|
6
|
+
attr_accessor :data
|
7
|
+
|
8
|
+
DEFAULT_GrabtvType=["cctv","satellite","digital",]
|
9
|
+
|
10
|
+
|
11
|
+
def self.start
|
12
|
+
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
#获取网站的频道表
|
17
|
+
def self.getchannels
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
def initialize(channel, date)
|
25
|
+
p 'Tvmao grab %s - %s - %s' % [channel._id,channel.name, date.strftime('%F')]
|
26
|
+
count = ::Kfd::TvmaoChannel.count
|
27
|
+
if count == 0
|
28
|
+
kk = Kanke::Kfd::Grab::TvmaoChannel.new()
|
29
|
+
kk.load_in_database()
|
30
|
+
end
|
31
|
+
tmp_ch = ::Kfd::TvmaoChannel.where(zid:channel._id,enable: true).first
|
32
|
+
if tmp_ch.nil?
|
33
|
+
p 'can not find the %s | %s' % [channel._id,tmp_ch]
|
34
|
+
@request_url = nil
|
35
|
+
else
|
36
|
+
#request_url必须进行处理 w1 ~ w14
|
37
|
+
t_url = tmp_ch.url
|
38
|
+
regex_url = /http:\/\/www\.tvmao\.com\/program\/.*-w(?=.*\.html)/
|
39
|
+
_url = regex_url.match(t_url).to_s
|
40
|
+
if date > Date.today.end_of_week
|
41
|
+
_wd = date.wday
|
42
|
+
_wd += 7
|
43
|
+
_wd += 7 if _wd == 7
|
44
|
+
else
|
45
|
+
_wd = date.wday
|
46
|
+
_wd = 7 if date.wday == 0
|
47
|
+
end
|
48
|
+
@request_url = '%s%s.html' % [_url,_wd]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
# 返回数据
|
54
|
+
def next_schedule
|
55
|
+
#need proxy
|
56
|
+
unless @request_url.nil?
|
57
|
+
all_schedule = get_channel_the_date_show(@request_url)
|
58
|
+
p 'we get %d schedule' % all_schedule.count
|
59
|
+
@enum = all_schedule.each
|
60
|
+
@request_url = nil
|
61
|
+
end
|
62
|
+
|
63
|
+
#begin
|
64
|
+
if @enum.nil?
|
65
|
+
nil
|
66
|
+
else
|
67
|
+
sch = @enum.next
|
68
|
+
p '%s %s' % [sch['begin_at'].strftime('%F %R'),sch['name']]
|
69
|
+
Kanke::Kfd::Grab::Schedule.new(channel,sch['name'],sch['begin_at'], Kfd::Source::TVMAO)
|
70
|
+
end
|
71
|
+
#rescue StopIteration
|
72
|
+
# nil
|
73
|
+
# end
|
74
|
+
end
|
75
|
+
|
76
|
+
# 解析频道某天的节目安排
|
77
|
+
def get_channel_the_date_show(url)
|
78
|
+
p ' request url %s ' % url
|
79
|
+
doc = nil
|
80
|
+
doc = Kanke::Kfd::Grab::Util.get_parse_doc(url)
|
81
|
+
if(doc.nil?)
|
82
|
+
p "doc nil"
|
83
|
+
doc = Kanke::Kfd::Grab::Util.get_parse_doc(url,false)
|
84
|
+
end
|
85
|
+
return [] if(doc.nil?)
|
86
|
+
|
87
|
+
all_schedule = []
|
88
|
+
regex_schedule = /<span.*/
|
89
|
+
doc.css('ul#pgrow li').each{|li|
|
90
|
+
time = li.search('span').first
|
91
|
+
next unless time
|
92
|
+
next if time.eql?('')
|
93
|
+
|
94
|
+
tvgd = li.search('div').first
|
95
|
+
name = li.content.gsub("剧照","")
|
96
|
+
if(!tvgd.nil?)
|
97
|
+
name = name.gsub(tvgd.content,"")
|
98
|
+
end
|
99
|
+
name = name.gsub("剧照","")
|
100
|
+
name = name.gsub("剧情","")
|
101
|
+
name = name.gsub("演员表","")
|
102
|
+
name = name.gsub(time.content,"")
|
103
|
+
name = name.lstrip
|
104
|
+
name = name.chomp
|
105
|
+
begin
|
106
|
+
the_show_time = DateTime.strptime("#{date.strftime('%F')} #{time.content}",'%F %R')
|
107
|
+
all_schedule << {'name'=>name.strip,'begin_at' => the_show_time}
|
108
|
+
rescue Exception =>e
|
109
|
+
p e
|
110
|
+
end
|
111
|
+
}
|
112
|
+
all_schedule
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<module type="RUBY_MODULE" version="4">
|
3
|
+
<component name="NewModuleRootManager">
|
4
|
+
<content url="file://$MODULE_DIR$" />
|
5
|
+
<orderEntry type="inheritedJdk" />
|
6
|
+
<orderEntry type="sourceFolder" forTests="false" />
|
7
|
+
</component>
|
8
|
+
</module>
|
9
|
+
|
metadata
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: grab_epg
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- hahazql
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-04-25 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: ! '"用于从TVMAO抓取EPG信息"'
|
14
|
+
email:
|
15
|
+
- hahazhouqunli@gmail.com
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- .grabepg.gemspec
|
21
|
+
- Gemfile
|
22
|
+
- lib/debug.rb
|
23
|
+
- lib/grabepg.rb
|
24
|
+
- lib/grabepg.rb~
|
25
|
+
- projectFilesBackup/.idea/grabepg.iml
|
26
|
+
homepage: ''
|
27
|
+
licenses: []
|
28
|
+
metadata: {}
|
29
|
+
post_install_message:
|
30
|
+
rdoc_options: []
|
31
|
+
require_paths:
|
32
|
+
- lib
|
33
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
39
|
+
requirements:
|
40
|
+
- - ! '>='
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: '0'
|
43
|
+
requirements: []
|
44
|
+
rubyforge_project:
|
45
|
+
rubygems_version: 2.0.0
|
46
|
+
signing_key:
|
47
|
+
specification_version: 4
|
48
|
+
summary: ! '"Grab EPG"'
|
49
|
+
test_files: []
|
50
|
+
has_rdoc:
|