grab_epg 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- YTNkOGZkMTk5NjFhODc3MmViMTQxODY2MzNjZTA2NzJmNTZjZDAyMA==
4
+ YmFmODhhZjdhMzkwMjFkMWMxZThiYzViYzA1NjFmMDQwY2YyMjQ3NQ==
5
5
  data.tar.gz: !binary |-
6
- YTdiYmZmY2JhZDRjNDE4ZWQzZWY1ZWNlM2U2ZjI4MGQ3NGVhOWVhZQ==
6
+ ZjY0ODlhNjUwYWI4ODJlN2EyMmMzMWU1ZGI5MDNlZWRmMTcyZWE1Mg==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- MmFmZmI2ODQyOTFkZDZiNzA5OTIxZGMzZTNhNzQxODA4MWUzOGRjNDI4OWU4
10
- ZGQwYWZmNzBkYTQxYWEyY2Y1NzcxNDNhMzg1MTUwZGE5NWM5OWYxMWI5MmZi
11
- NmIwZDE1OTMyYzk2MTgxMGI0OTgyYzAyNDYwZjI3M2M1ZjYwOTg=
9
+ ZDVlODljMzUxOWYxOTIxNjAxZmViYjZhODAwZjBmOTE5NTVhYmIzNDQwNTgw
10
+ NzU4ODcwOTExNTFlNGYxMDcwZGJkYzMyZmM0ZTBkNzc2MmY0ODY2MTY1MjM3
11
+ ODJjZTdlYjcxNWY0Zjk3MmVmYzUxOGUxMTFlOGU4Yzk2Yzk5OTk=
12
12
  data.tar.gz: !binary |-
13
- ZTFiNjExY2RlYTI3YWU4ZmYzODA2ZjUyMzQwN2MxZjhkN2NkYTdjMmFjYjJi
14
- NjU4YzE5MWZjYTMwZDQ1Y2QxMmM3M2YwZjE0MjBmOTQ0YWIzYjgzNjIwODQ3
15
- NzlmY2MxMTJkYjkyYzM0MDc4ZmRiYTk1MzY5NmM2ZjIyMTEzZjY=
13
+ ZGJkNmQ4MTQxZGIyZDI2MmIzMWMyNjgyYjFkNDZiY2NhOWUxZWQ3ZThkNGM5
14
+ YzkyOTFmNWY0N2ExZTdlMWRmMzk4NDRiZTBhMDc4YWJjY2ViMGJmYzJiMDky
15
+ MzczMDMyZTNhNWNkYTQyZjMwZTJkNmE2NTNjM2Q1OWUzZTQ0MGE=
data/.grabepg.gemspec CHANGED
@@ -10,6 +10,6 @@ Gem::Specification.new do |gem|
10
10
  gem.files = `git ls-files`.split($\)
11
11
  gem.name = "grab_epg"
12
12
  gem.require_paths = ["lib"]
13
- gem.version = "0.0.3"
13
+ gem.version = "0.0.4"
14
14
  gem.homepage = "https://github.com/hahazql/grab_epg"
15
15
  end
data/README.md CHANGED
@@ -4,14 +4,20 @@ eg:
4
4
  gem grab_epg
5
5
 
6
6
  proxy_list=Grabepg.get_topfast_list
7
+
7
8
  Grabepg.getchannels.each do |channel,url|
9
+
8
10
  Grabepg.getschedule(channel,url,proxy_list)
11
+
9
12
  end
10
13
 
11
14
 
12
15
  Grabepg.getschedule 的返回:
16
+
13
17
  channel_schedule 是日期的时间表
18
+
14
19
  show_schedule 是根据节目的时间表
20
+
15
21
  {"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
16
22
 
17
23
 
data/lib/debug.rb CHANGED
@@ -5,8 +5,7 @@ require 'open-uri'
5
5
  require File.expand_path("../grabepg.rb", __FILE__)
6
6
  class Debug
7
7
  # To change this template use File | Settings | File Templates.
8
- proxylist = ["123.125.116.243:6256", "123.125.116.243:28832", "123.125.116.243:29952", "123.125.116.243:9386", "219.234.82.73:7806", "123.125.116.243:38205", "123.125.116.243:11229", "123.125.116.243:12978", "219.234.82.89:8090", "120.197.85.173:20368", "123.125.116.243:8089", "123.125.116.243:8160", "219.234.82.78:31565", "123.125.116.243:21457", "123.125.116.241:17421", "123.125.116.243:14191", "219.234.82.88:29037", "123.125.116.242:13669", "123.125.116.243:19009", "123.125.116.243:6193", "123.125.116.242:15692", "123.125.116.241:20307", "123.125.116.242:18725", "219.234.82.82:29082", "123.125.116.243:5195", "123.125.116.242:21725", "123.125.116.241:32793", "219.234.82.60:8000", "123.125.116.242:17403", "123.125.116.243:6938", "123.125.116.242:16348", "219.234.82.54:8726", "120.197.85.173:20371", "123.125.116.241:9286", "219.234.82.88:19279", "219.234.82.89:13374", "123.125.116.242:5976"]
9
-
8
+ proxylist = [""]
10
9
 
11
10
  def self.test_get_doc_with_proxy(proxylist)
12
11
  herf = "http://www.tvmao.com/drama/HS5oLCs="
data/lib/grabepg.rb CHANGED
@@ -7,10 +7,14 @@ module Grabepg
7
7
  # To change this template use File | Settings | File Templates.
8
8
 
9
9
 
10
+ #图片的获取: Net::HTTP.get(url)
11
+ #图片的文件类型获取:
12
+
10
13
  attr_reader :channel #频道列表
11
14
  attr_reader :site #网站地址
12
15
  attr_reader :proxyindex #代理的索引
13
16
  attr_reader :show_schedule #根据节目的时间表
17
+ attr_reader :img_down_path #图片下载路径存放
14
18
 
15
19
  DEFAULT_GrabtvType=["cctv","satellite","digital",]
16
20
  DEFAULT_SITE = "http://www.tvmao.com"
@@ -20,19 +24,29 @@ module Grabepg
20
24
  def self.start
21
25
  @channel = []
22
26
  @site = DEFAULT_SITE
23
- channel_urls = self.getchannels
27
+ channel_urls = self.getchannels("/home/zql")
24
28
  proxy_list=get_topfast_list
29
+ img_down_path = self.img_down_path
30
+ p img_down_path
25
31
  channel_urls.each do |channel,url|
26
32
  p "****************************************GetSchedule : #{getschedule(channel,url,proxy_list)}"
27
33
  end
28
34
  end
29
35
 
36
+ def self.img_down_path
37
+ @img_down_path
38
+ end
39
+
30
40
 
31
41
  #获取网站的频道表
32
- def self.getchannels
42
+ #img_path 图片存放路径
43
+ def self.getchannels(img_dir_path)
33
44
  @channel = []
34
45
  @site=DEFAULT_SITE
35
46
  @proxyindex = 0
47
+ @img_down_dir_path = img_dir_path
48
+ @img_down_file = File.new(File.join(img_dir_path,"channel_img_down_path"),'w+')
49
+
36
50
  channel_urls = {}
37
51
 
38
52
  get_url =lambda { |type|
@@ -56,10 +70,15 @@ module Grabepg
56
70
  herf=a['href']
57
71
  end
58
72
  channel_id = get_channel_id.call(herf)
73
+
74
+ #获取频道图片的地址
75
+ img_path = "http://static.haotv.me/channel/logo/#{channel_id}.jpg"
76
+ @img_down_file.puts("#{channel_id}:#{img_path}")
59
77
  @channel<<({channel_id=>{name:channel_name,herf:herf,type:type}})
60
78
  channel_urls.merge!({channel_id=>herf})
61
79
  end
62
80
  end
81
+ @img_down_file.close
63
82
  p "Channel: #{@channel}"
64
83
  channel_urls
65
84
  end
@@ -70,13 +89,14 @@ module Grabepg
70
89
  @proxyindex = 0
71
90
  end
72
91
  @proxyindex=@proxyindex%proxylist.size
73
- if(proxylist[@proxyindex]!="123.125.116.243:6256"||proxylist[@proxyindex]!="http://123.125.116.243:28832")
92
+ if(proxylist[@proxyindex])
74
93
  proxy = proxylist[@proxyindex]
75
94
  else
76
95
  proxy = proxylist[@proxyindex+1]
77
96
  end
78
97
  begin
79
- doc = Nokogiri::HTML(open(url,:proxy=>"http://#{proxy}"))
98
+ doc = Nokogiri::HTML(open(url,:proxy=>"http://#{proxy}")) unless proxy.nil?||proxy.empty?
99
+ doc = Nokogiri::HTML(open(url)) if proxy.nil?||proxy.empty?
80
100
  @no_firest = false
81
101
  rescue => err
82
102
  @no_firest = true
@@ -94,7 +114,12 @@ module Grabepg
94
114
 
95
115
 
96
116
  #获取节目表
97
- def self.getschedule(channel,herf,proxylist,site="http://www.tvmao.com")
117
+ def self.getschedule(channel,herf,proxylist,site="http://www.tvmao.com",img_dir_down_path=@img_down_dir_path)
118
+ unless img_dir_down_path
119
+ img_dir_down_path = __FILE__
120
+ end
121
+ @img_down_file = File.new(File.join(img_dir_down_path,"schedule_img_down_path"),"w+")
122
+
98
123
  if(@site)
99
124
  site=@site
100
125
  end
@@ -116,14 +141,6 @@ module Grabepg
116
141
  channel_schedule = {}
117
142
  get_week_url.call(herf).each do |url|
118
143
  p "Grab: #{url}"
119
- #if(proxylist[proxyidex]!="219.234.82.89:33948")
120
- # proxy = proxylist[@proxyidex]
121
- #else
122
- # proxy = proxylist[@proxyidex+1]
123
- #end
124
- #p "Proxy: http://#{proxy}"
125
- #doc = Nokogiri::HTML(open(url,:proxy=>"http://#{proxy}"))
126
- #@proxyidex += 1
127
144
  doc = get_doc_with_proxy(proxylist,url)
128
145
  show_type = []
129
146
  img_url = _img_url + channel+".jpg"
@@ -150,19 +167,22 @@ module Grabepg
150
167
  show_infomation=get_show_infomation(proxylist,schedule_herf)
151
168
  show_type=show_infomation["type"]
152
169
  show_name = show_infomation["name"]
170
+ show_img = show_infomation["img"]
153
171
  end
154
- p "Time: #{time} schedule: #{schedule} show_infomation_herf: #{schedule_herf} type: #{show_type} name: #{show_name}"
155
- schedule_list << {"time"=>time,"schedule"=>schedule,"show_infomation_herf"=>schedule_herf,"type"=>show_type,"name"=>show_name}
172
+ p "Time: #{time} schedule: #{schedule} show_infomation_herf: #{schedule_herf} type: #{show_type} name: #{show_name} img:#{show_img}"
173
+ schedule_list << {"time"=>time,"schedule"=>schedule,"show_infomation_herf"=>schedule_herf,"type"=>show_type,"name"=>show_name,"img"=>show_img}
156
174
  end
157
175
  end
158
176
  channel_schedule.merge!({"#{week}(#{date})"=>schedule_list})
159
177
  end
178
+ @img_down_file.close
160
179
  {"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
161
180
  end
162
181
 
163
182
 
164
183
  #获取节目详细信息
165
184
  def self.get_show_infomation(proxy_list,schedule_herf)
185
+ begin
166
186
  @proxyindex = 0
167
187
  unless @site
168
188
  @site = "http://www.tvmao.com"
@@ -173,6 +193,12 @@ module Grabepg
173
193
  # p "title: %s" % title
174
194
  type = []
175
195
  name = doc.css('span[itemprop="name"]')[0].content
196
+
197
+ #获取节目的图片
198
+ schedule_img_down_path = doc.css('img[class="tvc"]')[0].get_attribute('src') if doc.css('img[class="tvc"]')
199
+
200
+
201
+
176
202
  doc.css('span[itemprop="genre"]').each do |_type|
177
203
  type << _type.content
178
204
  end
@@ -188,8 +214,12 @@ module Grabepg
188
214
  type<<_type.content
189
215
  end
190
216
  type.uniq!
217
+ @img_down_file.puts("#{name}:#{schedule_img_down_path}")
191
218
  @show_schedule.merge!(name=>get_show_schedule(proxy_list,schedule_herf)) unless @show_schedule.has_key?(name)
192
- {"type"=>type,"name"=>name}
219
+ {"type"=>type,"name"=>name,"img"=>schedule_img_down_path}
220
+ rescue => e
221
+ p "Error In get_show_infomation msg : #{e.to_s}"
222
+ end
193
223
  end
194
224
 
195
225
  #获取节目的时间表
@@ -282,4 +312,8 @@ module Grabepg
282
312
  list = contxt.scan(regex_list)
283
313
  end
284
314
 
315
+ def save_img
316
+
317
+ end
318
+
285
319
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: grab_epg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - hahazql
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-04-26 00:00:00.000000000 Z
11
+ date: 2013-04-27 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: ! '"用于从TVMAO抓取EPG信息"'
14
14
  email: