grab_epg 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/.grabepg.gemspec +1 -1
- data/README.md +6 -0
- data/lib/debug.rb +1 -2
- data/lib/grabepg.rb +50 -16
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
YmFmODhhZjdhMzkwMjFkMWMxZThiYzViYzA1NjFmMDQwY2YyMjQ3NQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ZjY0ODlhNjUwYWI4ODJlN2EyMmMzMWU1ZGI5MDNlZWRmMTcyZWE1Mg==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZDVlODljMzUxOWYxOTIxNjAxZmViYjZhODAwZjBmOTE5NTVhYmIzNDQwNTgw
|
10
|
+
NzU4ODcwOTExNTFlNGYxMDcwZGJkYzMyZmM0ZTBkNzc2MmY0ODY2MTY1MjM3
|
11
|
+
ODJjZTdlYjcxNWY0Zjk3MmVmYzUxOGUxMTFlOGU4Yzk2Yzk5OTk=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZGJkNmQ4MTQxZGIyZDI2MmIzMWMyNjgyYjFkNDZiY2NhOWUxZWQ3ZThkNGM5
|
14
|
+
YzkyOTFmNWY0N2ExZTdlMWRmMzk4NDRiZTBhMDc4YWJjY2ViMGJmYzJiMDky
|
15
|
+
MzczMDMyZTNhNWNkYTQyZjMwZTJkNmE2NTNjM2Q1OWUzZTQ0MGE=
|
data/.grabepg.gemspec
CHANGED
data/README.md
CHANGED
@@ -4,14 +4,20 @@ eg:
|
|
4
4
|
gem grab_epg
|
5
5
|
|
6
6
|
proxy_list=Grabepg.get_topfast_list
|
7
|
+
|
7
8
|
Grabepg.getchannels.each do |channel,url|
|
9
|
+
|
8
10
|
Grabepg.getschedule(channel,url,proxy_list)
|
11
|
+
|
9
12
|
end
|
10
13
|
|
11
14
|
|
12
15
|
Grabepg.getschedule 的返回:
|
16
|
+
|
13
17
|
channel_schedule 是日期的时间表
|
18
|
+
|
14
19
|
show_schedule 是根据节目的时间表
|
20
|
+
|
15
21
|
{"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
|
16
22
|
|
17
23
|
|
data/lib/debug.rb
CHANGED
@@ -5,8 +5,7 @@ require 'open-uri'
|
|
5
5
|
require File.expand_path("../grabepg.rb", __FILE__)
|
6
6
|
class Debug
|
7
7
|
# To change this template use File | Settings | File Templates.
|
8
|
-
proxylist = ["
|
9
|
-
|
8
|
+
proxylist = [""]
|
10
9
|
|
11
10
|
def self.test_get_doc_with_proxy(proxylist)
|
12
11
|
herf = "http://www.tvmao.com/drama/HS5oLCs="
|
data/lib/grabepg.rb
CHANGED
@@ -7,10 +7,14 @@ module Grabepg
|
|
7
7
|
# To change this template use File | Settings | File Templates.
|
8
8
|
|
9
9
|
|
10
|
+
#图片的获取: Net::HTTP.get(url)
|
11
|
+
#图片的文件类型获取:
|
12
|
+
|
10
13
|
attr_reader :channel #频道列表
|
11
14
|
attr_reader :site #网站地址
|
12
15
|
attr_reader :proxyindex #代理的索引
|
13
16
|
attr_reader :show_schedule #根据节目的时间表
|
17
|
+
attr_reader :img_down_path #图片下载路径存放
|
14
18
|
|
15
19
|
DEFAULT_GrabtvType=["cctv","satellite","digital",]
|
16
20
|
DEFAULT_SITE = "http://www.tvmao.com"
|
@@ -20,19 +24,29 @@ module Grabepg
|
|
20
24
|
def self.start
|
21
25
|
@channel = []
|
22
26
|
@site = DEFAULT_SITE
|
23
|
-
channel_urls = self.getchannels
|
27
|
+
channel_urls = self.getchannels("/home/zql")
|
24
28
|
proxy_list=get_topfast_list
|
29
|
+
img_down_path = self.img_down_path
|
30
|
+
p img_down_path
|
25
31
|
channel_urls.each do |channel,url|
|
26
32
|
p "****************************************GetSchedule : #{getschedule(channel,url,proxy_list)}"
|
27
33
|
end
|
28
34
|
end
|
29
35
|
|
36
|
+
def self.img_down_path
|
37
|
+
@img_down_path
|
38
|
+
end
|
39
|
+
|
30
40
|
|
31
41
|
#获取网站的频道表
|
32
|
-
|
42
|
+
#img_path 图片存放路径
|
43
|
+
def self.getchannels(img_dir_path)
|
33
44
|
@channel = []
|
34
45
|
@site=DEFAULT_SITE
|
35
46
|
@proxyindex = 0
|
47
|
+
@img_down_dir_path = img_dir_path
|
48
|
+
@img_down_file = File.new(File.join(img_dir_path,"channel_img_down_path"),'w+')
|
49
|
+
|
36
50
|
channel_urls = {}
|
37
51
|
|
38
52
|
get_url =lambda { |type|
|
@@ -56,10 +70,15 @@ module Grabepg
|
|
56
70
|
herf=a['href']
|
57
71
|
end
|
58
72
|
channel_id = get_channel_id.call(herf)
|
73
|
+
|
74
|
+
#获取频道图片的地址
|
75
|
+
img_path = "http://static.haotv.me/channel/logo/#{channel_id}.jpg"
|
76
|
+
@img_down_file.puts("#{channel_id}:#{img_path}")
|
59
77
|
@channel<<({channel_id=>{name:channel_name,herf:herf,type:type}})
|
60
78
|
channel_urls.merge!({channel_id=>herf})
|
61
79
|
end
|
62
80
|
end
|
81
|
+
@img_down_file.close
|
63
82
|
p "Channel: #{@channel}"
|
64
83
|
channel_urls
|
65
84
|
end
|
@@ -70,13 +89,14 @@ module Grabepg
|
|
70
89
|
@proxyindex = 0
|
71
90
|
end
|
72
91
|
@proxyindex=@proxyindex%proxylist.size
|
73
|
-
if(proxylist[@proxyindex]
|
92
|
+
if(proxylist[@proxyindex])
|
74
93
|
proxy = proxylist[@proxyindex]
|
75
94
|
else
|
76
95
|
proxy = proxylist[@proxyindex+1]
|
77
96
|
end
|
78
97
|
begin
|
79
|
-
doc = Nokogiri::HTML(open(url,:proxy=>"http://#{proxy}"))
|
98
|
+
doc = Nokogiri::HTML(open(url,:proxy=>"http://#{proxy}")) unless proxy.nil?||proxy.empty?
|
99
|
+
doc = Nokogiri::HTML(open(url)) if proxy.nil?||proxy.empty?
|
80
100
|
@no_firest = false
|
81
101
|
rescue => err
|
82
102
|
@no_firest = true
|
@@ -94,7 +114,12 @@ module Grabepg
|
|
94
114
|
|
95
115
|
|
96
116
|
#获取节目表
|
97
|
-
def self.getschedule(channel,herf,proxylist,site="http://www.tvmao.com")
|
117
|
+
def self.getschedule(channel,herf,proxylist,site="http://www.tvmao.com",img_dir_down_path=@img_down_dir_path)
|
118
|
+
unless img_dir_down_path
|
119
|
+
img_dir_down_path = __FILE__
|
120
|
+
end
|
121
|
+
@img_down_file = File.new(File.join(img_dir_down_path,"schedule_img_down_path"),"w+")
|
122
|
+
|
98
123
|
if(@site)
|
99
124
|
site=@site
|
100
125
|
end
|
@@ -116,14 +141,6 @@ module Grabepg
|
|
116
141
|
channel_schedule = {}
|
117
142
|
get_week_url.call(herf).each do |url|
|
118
143
|
p "Grab: #{url}"
|
119
|
-
#if(proxylist[proxyidex]!="219.234.82.89:33948")
|
120
|
-
# proxy = proxylist[@proxyidex]
|
121
|
-
#else
|
122
|
-
# proxy = proxylist[@proxyidex+1]
|
123
|
-
#end
|
124
|
-
#p "Proxy: http://#{proxy}"
|
125
|
-
#doc = Nokogiri::HTML(open(url,:proxy=>"http://#{proxy}"))
|
126
|
-
#@proxyidex += 1
|
127
144
|
doc = get_doc_with_proxy(proxylist,url)
|
128
145
|
show_type = []
|
129
146
|
img_url = _img_url + channel+".jpg"
|
@@ -150,19 +167,22 @@ module Grabepg
|
|
150
167
|
show_infomation=get_show_infomation(proxylist,schedule_herf)
|
151
168
|
show_type=show_infomation["type"]
|
152
169
|
show_name = show_infomation["name"]
|
170
|
+
show_img = show_infomation["img"]
|
153
171
|
end
|
154
|
-
p "Time: #{time} schedule: #{schedule} show_infomation_herf: #{schedule_herf} type: #{show_type} name: #{show_name}"
|
155
|
-
schedule_list << {"time"=>time,"schedule"=>schedule,"show_infomation_herf"=>schedule_herf,"type"=>show_type,"name"=>show_name}
|
172
|
+
p "Time: #{time} schedule: #{schedule} show_infomation_herf: #{schedule_herf} type: #{show_type} name: #{show_name} img:#{show_img}"
|
173
|
+
schedule_list << {"time"=>time,"schedule"=>schedule,"show_infomation_herf"=>schedule_herf,"type"=>show_type,"name"=>show_name,"img"=>show_img}
|
156
174
|
end
|
157
175
|
end
|
158
176
|
channel_schedule.merge!({"#{week}(#{date})"=>schedule_list})
|
159
177
|
end
|
178
|
+
@img_down_file.close
|
160
179
|
{"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
|
161
180
|
end
|
162
181
|
|
163
182
|
|
164
183
|
#获取节目详细信息
|
165
184
|
def self.get_show_infomation(proxy_list,schedule_herf)
|
185
|
+
begin
|
166
186
|
@proxyindex = 0
|
167
187
|
unless @site
|
168
188
|
@site = "http://www.tvmao.com"
|
@@ -173,6 +193,12 @@ module Grabepg
|
|
173
193
|
# p "title: %s" % title
|
174
194
|
type = []
|
175
195
|
name = doc.css('span[itemprop="name"]')[0].content
|
196
|
+
|
197
|
+
#获取节目的图片
|
198
|
+
schedule_img_down_path = doc.css('img[class="tvc"]')[0].get_attribute('src') if doc.css('img[class="tvc"]')
|
199
|
+
|
200
|
+
|
201
|
+
|
176
202
|
doc.css('span[itemprop="genre"]').each do |_type|
|
177
203
|
type << _type.content
|
178
204
|
end
|
@@ -188,8 +214,12 @@ module Grabepg
|
|
188
214
|
type<<_type.content
|
189
215
|
end
|
190
216
|
type.uniq!
|
217
|
+
@img_down_file.puts("#{name}:#{schedule_img_down_path}")
|
191
218
|
@show_schedule.merge!(name=>get_show_schedule(proxy_list,schedule_herf)) unless @show_schedule.has_key?(name)
|
192
|
-
{"type"=>type,"name"=>name}
|
219
|
+
{"type"=>type,"name"=>name,"img"=>schedule_img_down_path}
|
220
|
+
rescue => e
|
221
|
+
p "Error In get_show_infomation msg : #{e.to_s}"
|
222
|
+
end
|
193
223
|
end
|
194
224
|
|
195
225
|
#获取节目的时间表
|
@@ -282,4 +312,8 @@ module Grabepg
|
|
282
312
|
list = contxt.scan(regex_list)
|
283
313
|
end
|
284
314
|
|
315
|
+
def save_img
|
316
|
+
|
317
|
+
end
|
318
|
+
|
285
319
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: grab_epg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hahazql
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-04-
|
11
|
+
date: 2013-04-27 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: ! '"用于从TVMAO抓取EPG信息"'
|
14
14
|
email:
|