grab_epg 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- YTNkOGZkMTk5NjFhODc3MmViMTQxODY2MzNjZTA2NzJmNTZjZDAyMA==
4
+ YmFmODhhZjdhMzkwMjFkMWMxZThiYzViYzA1NjFmMDQwY2YyMjQ3NQ==
5
5
  data.tar.gz: !binary |-
6
- YTdiYmZmY2JhZDRjNDE4ZWQzZWY1ZWNlM2U2ZjI4MGQ3NGVhOWVhZQ==
6
+ ZjY0ODlhNjUwYWI4ODJlN2EyMmMzMWU1ZGI5MDNlZWRmMTcyZWE1Mg==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- MmFmZmI2ODQyOTFkZDZiNzA5OTIxZGMzZTNhNzQxODA4MWUzOGRjNDI4OWU4
10
- ZGQwYWZmNzBkYTQxYWEyY2Y1NzcxNDNhMzg1MTUwZGE5NWM5OWYxMWI5MmZi
11
- NmIwZDE1OTMyYzk2MTgxMGI0OTgyYzAyNDYwZjI3M2M1ZjYwOTg=
9
+ ZDVlODljMzUxOWYxOTIxNjAxZmViYjZhODAwZjBmOTE5NTVhYmIzNDQwNTgw
10
+ NzU4ODcwOTExNTFlNGYxMDcwZGJkYzMyZmM0ZTBkNzc2MmY0ODY2MTY1MjM3
11
+ ODJjZTdlYjcxNWY0Zjk3MmVmYzUxOGUxMTFlOGU4Yzk2Yzk5OTk=
12
12
  data.tar.gz: !binary |-
13
- ZTFiNjExY2RlYTI3YWU4ZmYzODA2ZjUyMzQwN2MxZjhkN2NkYTdjMmFjYjJi
14
- NjU4YzE5MWZjYTMwZDQ1Y2QxMmM3M2YwZjE0MjBmOTQ0YWIzYjgzNjIwODQ3
15
- NzlmY2MxMTJkYjkyYzM0MDc4ZmRiYTk1MzY5NmM2ZjIyMTEzZjY=
13
+ ZGJkNmQ4MTQxZGIyZDI2MmIzMWMyNjgyYjFkNDZiY2NhOWUxZWQ3ZThkNGM5
14
+ YzkyOTFmNWY0N2ExZTdlMWRmMzk4NDRiZTBhMDc4YWJjY2ViMGJmYzJiMDky
15
+ MzczMDMyZTNhNWNkYTQyZjMwZTJkNmE2NTNjM2Q1OWUzZTQ0MGE=
data/.grabepg.gemspec CHANGED
@@ -10,6 +10,6 @@ Gem::Specification.new do |gem|
10
10
  gem.files = `git ls-files`.split($\)
11
11
  gem.name = "grab_epg"
12
12
  gem.require_paths = ["lib"]
13
- gem.version = "0.0.3"
13
+ gem.version = "0.0.4"
14
14
  gem.homepage = "https://github.com/hahazql/grab_epg"
15
15
  end
data/README.md CHANGED
@@ -4,14 +4,20 @@ eg:
4
4
  gem grab_epg
5
5
 
6
6
  proxy_list=Grabepg.get_topfast_list
7
+
7
8
  Grabepg.getchannels.each do |channel,url|
9
+
8
10
  Grabepg.getschedule(channel,url,proxy_list)
11
+
9
12
  end
10
13
 
11
14
 
12
15
  Grabepg.getschedule 的返回:
16
+
13
17
  channel_schedule 是日期的时间表
18
+
14
19
  show_schedule 是根据节目的时间表
20
+
15
21
  {"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
16
22
 
17
23
 
data/lib/debug.rb CHANGED
@@ -5,8 +5,7 @@ require 'open-uri'
5
5
  require File.expand_path("../grabepg.rb", __FILE__)
6
6
  class Debug
7
7
  # To change this template use File | Settings | File Templates.
8
- proxylist = ["123.125.116.243:6256", "123.125.116.243:28832", "123.125.116.243:29952", "123.125.116.243:9386", "219.234.82.73:7806", "123.125.116.243:38205", "123.125.116.243:11229", "123.125.116.243:12978", "219.234.82.89:8090", "120.197.85.173:20368", "123.125.116.243:8089", "123.125.116.243:8160", "219.234.82.78:31565", "123.125.116.243:21457", "123.125.116.241:17421", "123.125.116.243:14191", "219.234.82.88:29037", "123.125.116.242:13669", "123.125.116.243:19009", "123.125.116.243:6193", "123.125.116.242:15692", "123.125.116.241:20307", "123.125.116.242:18725", "219.234.82.82:29082", "123.125.116.243:5195", "123.125.116.242:21725", "123.125.116.241:32793", "219.234.82.60:8000", "123.125.116.242:17403", "123.125.116.243:6938", "123.125.116.242:16348", "219.234.82.54:8726", "120.197.85.173:20371", "123.125.116.241:9286", "219.234.82.88:19279", "219.234.82.89:13374", "123.125.116.242:5976"]
9
-
8
+ proxylist = [""]
10
9
 
11
10
  def self.test_get_doc_with_proxy(proxylist)
12
11
  herf = "http://www.tvmao.com/drama/HS5oLCs="
data/lib/grabepg.rb CHANGED
@@ -7,10 +7,14 @@ module Grabepg
7
7
  # To change this template use File | Settings | File Templates.
8
8
 
9
9
 
10
+ #图片的获取: Net::HTTP.get(url)
11
+ #图片的文件类型获取:
12
+
10
13
  attr_reader :channel #频道列表
11
14
  attr_reader :site #网站地址
12
15
  attr_reader :proxyindex #代理的索引
13
16
  attr_reader :show_schedule #根据节目的时间表
17
+ attr_reader :img_down_path #图片下载路径存放
14
18
 
15
19
  DEFAULT_GrabtvType=["cctv","satellite","digital",]
16
20
  DEFAULT_SITE = "http://www.tvmao.com"
@@ -20,19 +24,29 @@ module Grabepg
20
24
  def self.start
21
25
  @channel = []
22
26
  @site = DEFAULT_SITE
23
- channel_urls = self.getchannels
27
+ channel_urls = self.getchannels("/home/zql")
24
28
  proxy_list=get_topfast_list
29
+ img_down_path = self.img_down_path
30
+ p img_down_path
25
31
  channel_urls.each do |channel,url|
26
32
  p "****************************************GetSchedule : #{getschedule(channel,url,proxy_list)}"
27
33
  end
28
34
  end
29
35
 
36
+ def self.img_down_path
37
+ @img_down_path
38
+ end
39
+
30
40
 
31
41
  #获取网站的频道表
32
- def self.getchannels
42
+ #img_path 图片存放路径
43
+ def self.getchannels(img_dir_path)
33
44
  @channel = []
34
45
  @site=DEFAULT_SITE
35
46
  @proxyindex = 0
47
+ @img_down_dir_path = img_dir_path
48
+ @img_down_file = File.new(File.join(img_dir_path,"channel_img_down_path"),'w+')
49
+
36
50
  channel_urls = {}
37
51
 
38
52
  get_url =lambda { |type|
@@ -56,10 +70,15 @@ module Grabepg
56
70
  herf=a['href']
57
71
  end
58
72
  channel_id = get_channel_id.call(herf)
73
+
74
+ #获取频道图片的地址
75
+ img_path = "http://static.haotv.me/channel/logo/#{channel_id}.jpg"
76
+ @img_down_file.puts("#{channel_id}:#{img_path}")
59
77
  @channel<<({channel_id=>{name:channel_name,herf:herf,type:type}})
60
78
  channel_urls.merge!({channel_id=>herf})
61
79
  end
62
80
  end
81
+ @img_down_file.close
63
82
  p "Channel: #{@channel}"
64
83
  channel_urls
65
84
  end
@@ -70,13 +89,14 @@ module Grabepg
70
89
  @proxyindex = 0
71
90
  end
72
91
  @proxyindex=@proxyindex%proxylist.size
73
- if(proxylist[@proxyindex]!="123.125.116.243:6256"||proxylist[@proxyindex]!="http://123.125.116.243:28832")
92
+ if(proxylist[@proxyindex])
74
93
  proxy = proxylist[@proxyindex]
75
94
  else
76
95
  proxy = proxylist[@proxyindex+1]
77
96
  end
78
97
  begin
79
- doc = Nokogiri::HTML(open(url,:proxy=>"http://#{proxy}"))
98
+ doc = Nokogiri::HTML(open(url,:proxy=>"http://#{proxy}")) unless proxy.nil?||proxy.empty?
99
+ doc = Nokogiri::HTML(open(url)) if proxy.nil?||proxy.empty?
80
100
  @no_firest = false
81
101
  rescue => err
82
102
  @no_firest = true
@@ -94,7 +114,12 @@ module Grabepg
94
114
 
95
115
 
96
116
  #获取节目表
97
- def self.getschedule(channel,herf,proxylist,site="http://www.tvmao.com")
117
+ def self.getschedule(channel,herf,proxylist,site="http://www.tvmao.com",img_dir_down_path=@img_down_dir_path)
118
+ unless img_dir_down_path
119
+ img_dir_down_path = __FILE__
120
+ end
121
+ @img_down_file = File.new(File.join(img_dir_down_path,"schedule_img_down_path"),"w+")
122
+
98
123
  if(@site)
99
124
  site=@site
100
125
  end
@@ -116,14 +141,6 @@ module Grabepg
116
141
  channel_schedule = {}
117
142
  get_week_url.call(herf).each do |url|
118
143
  p "Grab: #{url}"
119
- #if(proxylist[proxyidex]!="219.234.82.89:33948")
120
- # proxy = proxylist[@proxyidex]
121
- #else
122
- # proxy = proxylist[@proxyidex+1]
123
- #end
124
- #p "Proxy: http://#{proxy}"
125
- #doc = Nokogiri::HTML(open(url,:proxy=>"http://#{proxy}"))
126
- #@proxyidex += 1
127
144
  doc = get_doc_with_proxy(proxylist,url)
128
145
  show_type = []
129
146
  img_url = _img_url + channel+".jpg"
@@ -150,19 +167,22 @@ module Grabepg
150
167
  show_infomation=get_show_infomation(proxylist,schedule_herf)
151
168
  show_type=show_infomation["type"]
152
169
  show_name = show_infomation["name"]
170
+ show_img = show_infomation["img"]
153
171
  end
154
- p "Time: #{time} schedule: #{schedule} show_infomation_herf: #{schedule_herf} type: #{show_type} name: #{show_name}"
155
- schedule_list << {"time"=>time,"schedule"=>schedule,"show_infomation_herf"=>schedule_herf,"type"=>show_type,"name"=>show_name}
172
+ p "Time: #{time} schedule: #{schedule} show_infomation_herf: #{schedule_herf} type: #{show_type} name: #{show_name} img:#{show_img}"
173
+ schedule_list << {"time"=>time,"schedule"=>schedule,"show_infomation_herf"=>schedule_herf,"type"=>show_type,"name"=>show_name,"img"=>show_img}
156
174
  end
157
175
  end
158
176
  channel_schedule.merge!({"#{week}(#{date})"=>schedule_list})
159
177
  end
178
+ @img_down_file.close
160
179
  {"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
161
180
  end
162
181
 
163
182
 
164
183
  #获取节目详细信息
165
184
  def self.get_show_infomation(proxy_list,schedule_herf)
185
+ begin
166
186
  @proxyindex = 0
167
187
  unless @site
168
188
  @site = "http://www.tvmao.com"
@@ -173,6 +193,12 @@ module Grabepg
173
193
  # p "title: %s" % title
174
194
  type = []
175
195
  name = doc.css('span[itemprop="name"]')[0].content
196
+
197
+ #获取节目的图片
198
+ schedule_img_down_path = doc.css('img[class="tvc"]')[0].get_attribute('src') if doc.css('img[class="tvc"]')
199
+
200
+
201
+
176
202
  doc.css('span[itemprop="genre"]').each do |_type|
177
203
  type << _type.content
178
204
  end
@@ -188,8 +214,12 @@ module Grabepg
188
214
  type<<_type.content
189
215
  end
190
216
  type.uniq!
217
+ @img_down_file.puts("#{name}:#{schedule_img_down_path}")
191
218
  @show_schedule.merge!(name=>get_show_schedule(proxy_list,schedule_herf)) unless @show_schedule.has_key?(name)
192
- {"type"=>type,"name"=>name}
219
+ {"type"=>type,"name"=>name,"img"=>schedule_img_down_path}
220
+ rescue => e
221
+ p "Error In get_show_infomation msg : #{e.to_s}"
222
+ end
193
223
  end
194
224
 
195
225
  #获取节目的时间表
@@ -282,4 +312,8 @@ module Grabepg
282
312
  list = contxt.scan(regex_list)
283
313
  end
284
314
 
315
+ def save_img
316
+
317
+ end
318
+
285
319
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: grab_epg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - hahazql
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-04-26 00:00:00.000000000 Z
11
+ date: 2013-04-27 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: ! '"用于从TVMAO抓取EPG信息"'
14
14
  email: