grab_epg 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/.grabepg.gemspec +1 -1
- data/README.md +6 -0
- data/lib/debug.rb +1 -2
- data/lib/grabepg.rb +50 -16
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
YmFmODhhZjdhMzkwMjFkMWMxZThiYzViYzA1NjFmMDQwY2YyMjQ3NQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ZjY0ODlhNjUwYWI4ODJlN2EyMmMzMWU1ZGI5MDNlZWRmMTcyZWE1Mg==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZDVlODljMzUxOWYxOTIxNjAxZmViYjZhODAwZjBmOTE5NTVhYmIzNDQwNTgw
|
10
|
+
NzU4ODcwOTExNTFlNGYxMDcwZGJkYzMyZmM0ZTBkNzc2MmY0ODY2MTY1MjM3
|
11
|
+
ODJjZTdlYjcxNWY0Zjk3MmVmYzUxOGUxMTFlOGU4Yzk2Yzk5OTk=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZGJkNmQ4MTQxZGIyZDI2MmIzMWMyNjgyYjFkNDZiY2NhOWUxZWQ3ZThkNGM5
|
14
|
+
YzkyOTFmNWY0N2ExZTdlMWRmMzk4NDRiZTBhMDc4YWJjY2ViMGJmYzJiMDky
|
15
|
+
MzczMDMyZTNhNWNkYTQyZjMwZTJkNmE2NTNjM2Q1OWUzZTQ0MGE=
|
data/.grabepg.gemspec
CHANGED
data/README.md
CHANGED
@@ -4,14 +4,20 @@ eg:
|
|
4
4
|
gem grab_epg
|
5
5
|
|
6
6
|
proxy_list=Grabepg.get_topfast_list
|
7
|
+
|
7
8
|
Grabepg.getchannels.each do |channel,url|
|
9
|
+
|
8
10
|
Grabepg.getschedule(channel,url,proxy_list)
|
11
|
+
|
9
12
|
end
|
10
13
|
|
11
14
|
|
12
15
|
Grabepg.getschedule 的返回:
|
16
|
+
|
13
17
|
channel_schedule 是日期的时间表
|
18
|
+
|
14
19
|
show_schedule 是根据节目的时间表
|
20
|
+
|
15
21
|
{"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
|
16
22
|
|
17
23
|
|
data/lib/debug.rb
CHANGED
@@ -5,8 +5,7 @@ require 'open-uri'
|
|
5
5
|
require File.expand_path("../grabepg.rb", __FILE__)
|
6
6
|
class Debug
|
7
7
|
# To change this template use File | Settings | File Templates.
|
8
|
-
proxylist = ["
|
9
|
-
|
8
|
+
proxylist = [""]
|
10
9
|
|
11
10
|
def self.test_get_doc_with_proxy(proxylist)
|
12
11
|
herf = "http://www.tvmao.com/drama/HS5oLCs="
|
data/lib/grabepg.rb
CHANGED
@@ -7,10 +7,14 @@ module Grabepg
|
|
7
7
|
# To change this template use File | Settings | File Templates.
|
8
8
|
|
9
9
|
|
10
|
+
#图片的获取: Net::HTTP.get(url)
|
11
|
+
#图片的文件类型获取:
|
12
|
+
|
10
13
|
attr_reader :channel #频道列表
|
11
14
|
attr_reader :site #网站地址
|
12
15
|
attr_reader :proxyindex #代理的索引
|
13
16
|
attr_reader :show_schedule #根据节目的时间表
|
17
|
+
attr_reader :img_down_path #图片下载路径存放
|
14
18
|
|
15
19
|
DEFAULT_GrabtvType=["cctv","satellite","digital",]
|
16
20
|
DEFAULT_SITE = "http://www.tvmao.com"
|
@@ -20,19 +24,29 @@ module Grabepg
|
|
20
24
|
def self.start
|
21
25
|
@channel = []
|
22
26
|
@site = DEFAULT_SITE
|
23
|
-
channel_urls = self.getchannels
|
27
|
+
channel_urls = self.getchannels("/home/zql")
|
24
28
|
proxy_list=get_topfast_list
|
29
|
+
img_down_path = self.img_down_path
|
30
|
+
p img_down_path
|
25
31
|
channel_urls.each do |channel,url|
|
26
32
|
p "****************************************GetSchedule : #{getschedule(channel,url,proxy_list)}"
|
27
33
|
end
|
28
34
|
end
|
29
35
|
|
36
|
+
def self.img_down_path
|
37
|
+
@img_down_path
|
38
|
+
end
|
39
|
+
|
30
40
|
|
31
41
|
#获取网站的频道表
|
32
|
-
|
42
|
+
#img_path 图片存放路径
|
43
|
+
def self.getchannels(img_dir_path)
|
33
44
|
@channel = []
|
34
45
|
@site=DEFAULT_SITE
|
35
46
|
@proxyindex = 0
|
47
|
+
@img_down_dir_path = img_dir_path
|
48
|
+
@img_down_file = File.new(File.join(img_dir_path,"channel_img_down_path"),'w+')
|
49
|
+
|
36
50
|
channel_urls = {}
|
37
51
|
|
38
52
|
get_url =lambda { |type|
|
@@ -56,10 +70,15 @@ module Grabepg
|
|
56
70
|
herf=a['href']
|
57
71
|
end
|
58
72
|
channel_id = get_channel_id.call(herf)
|
73
|
+
|
74
|
+
#获取频道图片的地址
|
75
|
+
img_path = "http://static.haotv.me/channel/logo/#{channel_id}.jpg"
|
76
|
+
@img_down_file.puts("#{channel_id}:#{img_path}")
|
59
77
|
@channel<<({channel_id=>{name:channel_name,herf:herf,type:type}})
|
60
78
|
channel_urls.merge!({channel_id=>herf})
|
61
79
|
end
|
62
80
|
end
|
81
|
+
@img_down_file.close
|
63
82
|
p "Channel: #{@channel}"
|
64
83
|
channel_urls
|
65
84
|
end
|
@@ -70,13 +89,14 @@ module Grabepg
|
|
70
89
|
@proxyindex = 0
|
71
90
|
end
|
72
91
|
@proxyindex=@proxyindex%proxylist.size
|
73
|
-
if(proxylist[@proxyindex]
|
92
|
+
if(proxylist[@proxyindex])
|
74
93
|
proxy = proxylist[@proxyindex]
|
75
94
|
else
|
76
95
|
proxy = proxylist[@proxyindex+1]
|
77
96
|
end
|
78
97
|
begin
|
79
|
-
doc = Nokogiri::HTML(open(url,:proxy=>"http://#{proxy}"))
|
98
|
+
doc = Nokogiri::HTML(open(url,:proxy=>"http://#{proxy}")) unless proxy.nil?||proxy.empty?
|
99
|
+
doc = Nokogiri::HTML(open(url)) if proxy.nil?||proxy.empty?
|
80
100
|
@no_firest = false
|
81
101
|
rescue => err
|
82
102
|
@no_firest = true
|
@@ -94,7 +114,12 @@ module Grabepg
|
|
94
114
|
|
95
115
|
|
96
116
|
#获取节目表
|
97
|
-
def self.getschedule(channel,herf,proxylist,site="http://www.tvmao.com")
|
117
|
+
def self.getschedule(channel,herf,proxylist,site="http://www.tvmao.com",img_dir_down_path=@img_down_dir_path)
|
118
|
+
unless img_dir_down_path
|
119
|
+
img_dir_down_path = __FILE__
|
120
|
+
end
|
121
|
+
@img_down_file = File.new(File.join(img_dir_down_path,"schedule_img_down_path"),"w+")
|
122
|
+
|
98
123
|
if(@site)
|
99
124
|
site=@site
|
100
125
|
end
|
@@ -116,14 +141,6 @@ module Grabepg
|
|
116
141
|
channel_schedule = {}
|
117
142
|
get_week_url.call(herf).each do |url|
|
118
143
|
p "Grab: #{url}"
|
119
|
-
#if(proxylist[proxyidex]!="219.234.82.89:33948")
|
120
|
-
# proxy = proxylist[@proxyidex]
|
121
|
-
#else
|
122
|
-
# proxy = proxylist[@proxyidex+1]
|
123
|
-
#end
|
124
|
-
#p "Proxy: http://#{proxy}"
|
125
|
-
#doc = Nokogiri::HTML(open(url,:proxy=>"http://#{proxy}"))
|
126
|
-
#@proxyidex += 1
|
127
144
|
doc = get_doc_with_proxy(proxylist,url)
|
128
145
|
show_type = []
|
129
146
|
img_url = _img_url + channel+".jpg"
|
@@ -150,19 +167,22 @@ module Grabepg
|
|
150
167
|
show_infomation=get_show_infomation(proxylist,schedule_herf)
|
151
168
|
show_type=show_infomation["type"]
|
152
169
|
show_name = show_infomation["name"]
|
170
|
+
show_img = show_infomation["img"]
|
153
171
|
end
|
154
|
-
p "Time: #{time} schedule: #{schedule} show_infomation_herf: #{schedule_herf} type: #{show_type} name: #{show_name}"
|
155
|
-
schedule_list << {"time"=>time,"schedule"=>schedule,"show_infomation_herf"=>schedule_herf,"type"=>show_type,"name"=>show_name}
|
172
|
+
p "Time: #{time} schedule: #{schedule} show_infomation_herf: #{schedule_herf} type: #{show_type} name: #{show_name} img:#{show_img}"
|
173
|
+
schedule_list << {"time"=>time,"schedule"=>schedule,"show_infomation_herf"=>schedule_herf,"type"=>show_type,"name"=>show_name,"img"=>show_img}
|
156
174
|
end
|
157
175
|
end
|
158
176
|
channel_schedule.merge!({"#{week}(#{date})"=>schedule_list})
|
159
177
|
end
|
178
|
+
@img_down_file.close
|
160
179
|
{"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
|
161
180
|
end
|
162
181
|
|
163
182
|
|
164
183
|
#获取节目详细信息
|
165
184
|
def self.get_show_infomation(proxy_list,schedule_herf)
|
185
|
+
begin
|
166
186
|
@proxyindex = 0
|
167
187
|
unless @site
|
168
188
|
@site = "http://www.tvmao.com"
|
@@ -173,6 +193,12 @@ module Grabepg
|
|
173
193
|
# p "title: %s" % title
|
174
194
|
type = []
|
175
195
|
name = doc.css('span[itemprop="name"]')[0].content
|
196
|
+
|
197
|
+
#获取节目的图片
|
198
|
+
schedule_img_down_path = doc.css('img[class="tvc"]')[0].get_attribute('src') if doc.css('img[class="tvc"]')
|
199
|
+
|
200
|
+
|
201
|
+
|
176
202
|
doc.css('span[itemprop="genre"]').each do |_type|
|
177
203
|
type << _type.content
|
178
204
|
end
|
@@ -188,8 +214,12 @@ module Grabepg
|
|
188
214
|
type<<_type.content
|
189
215
|
end
|
190
216
|
type.uniq!
|
217
|
+
@img_down_file.puts("#{name}:#{schedule_img_down_path}")
|
191
218
|
@show_schedule.merge!(name=>get_show_schedule(proxy_list,schedule_herf)) unless @show_schedule.has_key?(name)
|
192
|
-
{"type"=>type,"name"=>name}
|
219
|
+
{"type"=>type,"name"=>name,"img"=>schedule_img_down_path}
|
220
|
+
rescue => e
|
221
|
+
p "Error In get_show_infomation msg : #{e.to_s}"
|
222
|
+
end
|
193
223
|
end
|
194
224
|
|
195
225
|
#获取节目的时间表
|
@@ -282,4 +312,8 @@ module Grabepg
|
|
282
312
|
list = contxt.scan(regex_list)
|
283
313
|
end
|
284
314
|
|
315
|
+
def save_img
|
316
|
+
|
317
|
+
end
|
318
|
+
|
285
319
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: grab_epg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hahazql
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-04-
|
11
|
+
date: 2013-04-27 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: ! '"用于从TVMAO抓取EPG信息"'
|
14
14
|
email:
|