grab_epg 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ Mjg3MTAwMjIxM2FlNWQwMzc5MjkzZWEzNzQ4MGMzZjkzZWNiZDgwOA==
5
+ data.tar.gz: !binary |-
6
+ YmFlYTA3ZWNjNTRlN2FmNmM4NjI5MWFlZTlhNTI3YjRiYWQ4ZGQ1Mg==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ ZGExZWJiN2Q4NjUzNTdiODcwMzI5ZjQ3ODAzYzM5YzU0MDI1OGI2ZGI2Yjk3
10
+ NjQ3YTNjNTg5YzBkYWM0ZjQzNThkODM4Njk1MDI5YWJhZjQwODkxYjFlZmQw
11
+ NDAyN2VlM2NmODI1M2Y4OGYxMThiMmM5MzI5NGI2Y2UzYzFlZDA=
12
+ data.tar.gz: !binary |-
13
+ OGY5ODVkMzk0MjY4NDc1YjgzMTVkZTY3OThkZjZmZmFkZTZkNDI1NTIxZTcw
14
+ NmQxZjYxODg0YTkzMTE0YzNiNzFiNmE4ZmZiMGMwY2M3OGY0ZDZlYWYwZGMz
15
+ MzRlMzgzZGFkYTZjYTcyYWIyNGU1MTQ4ZTczZDY5NzBiZDkzMmQ=
data/.grabepg.gemspec ADDED
@@ -0,0 +1,14 @@
1
+ # encoding: utf-8
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.authors = ["hahazql"]
5
+ gem.email = ["hahazhouqunli@gmail.com"]
6
+ gem.description = %q{"用于从TVMAO抓取EPG信息"}
7
+ gem.summary = %q{"Grab EPG"}
8
+ gem.homepage = ""
9
+
10
+ gem.files = `git ls-files`.split($\)
11
+ gem.name = "grab_epg"
12
+ gem.require_paths = ["lib"]
13
+ gem.version = "0.0.1"
14
+ end
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ #encoding:utf-8
2
+ #source 'https://rubygems.org'
3
+ source 'http://ruby.taobao.org'
4
+
5
+ gem "nokogiri"
data/lib/debug.rb ADDED
@@ -0,0 +1,9 @@
1
+ #encoding:utf-8
2
+
3
+ require 'nokogiri'
4
+ require 'open-uri'
5
+ require File.expand_path("../grabepg.rb", __FILE__)
6
+ class Debug
7
+ # To change this template use File | Settings | File Templates.
8
+ p Grabepg.start
9
+ end
data/lib/grabepg.rb ADDED
@@ -0,0 +1,96 @@
1
+ #encoding:utf-8
2
+
3
+ require 'nokogiri'
4
+ require 'open-uri'
5
+
6
+ module Grabepg
7
+ # To change this template use File | Settings | File Templates.
8
+
9
+
10
+ attr_reader :channel #频道列表
11
+ attr_reader :site #网站地址
12
+
13
+
14
+ DEFAULT_GrabtvType=["cctv","satellite","digital",]
15
+ DEFAULT_SITE = "http://www.tvmao.com"
16
+
17
+ def self.start
18
+ @channel = []
19
+ @site = DEFAULT_SITE
20
+ channel_urls = self.getchannels
21
+ getSchudle(channel_urls)
22
+ end
23
+
24
+
25
+ #获取网站的频道表
26
+ def self.getchannels
27
+ channel_urls = {}
28
+
29
+ get_url =lambda { |type|
30
+ @site + "/program/duration/#{type}/w1.html" unless (type.nil?||type.empty?)
31
+ }
32
+
33
+ get_channel_id = lambda {|url|
34
+ channel_id = url.split("/")[2].split("-")[1] unless (url.nil?||url.empty?)
35
+ }
36
+
37
+ DEFAULT_GrabtvType.each do |type|
38
+ url = get_url.call(type)
39
+ p url
40
+ doc = Nokogiri::HTML(open(url))
41
+ p doc.content
42
+ p "*************************************************************"
43
+ doc.css('td[class="tdchn"]').each do |td|
44
+ channel_name=td.content
45
+ herf = ""
46
+ td.css('a').each do |a|
47
+ herf=a['href']
48
+ end
49
+ channel_id = get_channel_id.call(herf)
50
+ @channel<<({channel_id=>{name:channel_name,herf:herf,type:type}})
51
+ channel_urls.merge!({channel_id=>herf})
52
+ end
53
+ end
54
+ p "Channel: #{@channel}"
55
+ channel_urls
56
+ end
57
+
58
+ def self.getSchudle(channel,url)
59
+ _img_url = "http://static.haotv.me/channel/logo/"
60
+
61
+
62
+ get_week_url = lambda {|url|
63
+ _url = @site
64
+ urls = []
65
+ _urls = url.split("-")
66
+ 0.upto(1).each do |i|
67
+ _url = _url+"#{_urls[i]}"+"-"
68
+ end
69
+ 1.upto(7).each do |i|
70
+ urls << _url+"w#{i}.html"
71
+ end
72
+ urls
73
+ }
74
+
75
+ get_week_url.call(herf).each do |url|
76
+ p url
77
+ doc = Nokogiri::HTML(open(url))
78
+ img_url = _img_url + channel+".jpg"
79
+ data=doc.css('div[class="mt10 clear"]')[0].content.split(" ")
80
+ date = data[0]
81
+ week = data[1]
82
+ p "Channel: #{channel} Date: #{date} Week: #{week}"
83
+ doc.css('ul[id="pgrow"]')[0].css("li").each do |schudel|
84
+ if schudel.content.split(" ").size>1
85
+ time = schudel.content.split(" ")[0]
86
+ schudel = schudel.content.split(" ")[1]
87
+ p "Time: #{time} Schudel: #{schudel}"
88
+ end
89
+ end
90
+ end
91
+ end
92
+
93
+
94
+
95
+
96
+ end
data/lib/grabepg.rb~ ADDED
@@ -0,0 +1,115 @@
1
+ model Grabepg
2
+ # To change this template use File | Settings | File Templates.
3
+
4
+
5
+ attr_accessor :channel
6
+ attr_accessor :data
7
+
8
+ DEFAULT_GrabtvType=["cctv","satellite","digital",]
9
+
10
+
11
+ def self.start
12
+
13
+ end
14
+
15
+
16
+ #获取网站的频道表
17
+ def self.getchannels
18
+
19
+ end
20
+
21
+
22
+
23
+
24
+ def initialize(channel, date)
25
+ p 'Tvmao grab %s - %s - %s' % [channel._id,channel.name, date.strftime('%F')]
26
+ count = ::Kfd::TvmaoChannel.count
27
+ if count == 0
28
+ kk = Kanke::Kfd::Grab::TvmaoChannel.new()
29
+ kk.load_in_database()
30
+ end
31
+ tmp_ch = ::Kfd::TvmaoChannel.where(zid:channel._id,enable: true).first
32
+ if tmp_ch.nil?
33
+ p 'can not find the %s | %s' % [channel._id,tmp_ch]
34
+ @request_url = nil
35
+ else
36
+ #request_url必须进行处理 w1 ~ w14
37
+ t_url = tmp_ch.url
38
+ regex_url = /http:\/\/www\.tvmao\.com\/program\/.*-w(?=.*\.html)/
39
+ _url = regex_url.match(t_url).to_s
40
+ if date > Date.today.end_of_week
41
+ _wd = date.wday
42
+ _wd += 7
43
+ _wd += 7 if _wd == 7
44
+ else
45
+ _wd = date.wday
46
+ _wd = 7 if date.wday == 0
47
+ end
48
+ @request_url = '%s%s.html' % [_url,_wd]
49
+ end
50
+ end
51
+
52
+
53
+ # 返回数据
54
+ def next_schedule
55
+ #need proxy
56
+ unless @request_url.nil?
57
+ all_schedule = get_channel_the_date_show(@request_url)
58
+ p 'we get %d schedule' % all_schedule.count
59
+ @enum = all_schedule.each
60
+ @request_url = nil
61
+ end
62
+
63
+ #begin
64
+ if @enum.nil?
65
+ nil
66
+ else
67
+ sch = @enum.next
68
+ p '%s %s' % [sch['begin_at'].strftime('%F %R'),sch['name']]
69
+ Kanke::Kfd::Grab::Schedule.new(channel,sch['name'],sch['begin_at'], Kfd::Source::TVMAO)
70
+ end
71
+ #rescue StopIteration
72
+ # nil
73
+ # end
74
+ end
75
+
76
+ # 解析频道某天的节目安排
77
+ def get_channel_the_date_show(url)
78
+ p ' request url %s ' % url
79
+ doc = nil
80
+ doc = Kanke::Kfd::Grab::Util.get_parse_doc(url)
81
+ if(doc.nil?)
82
+ p "doc nil"
83
+ doc = Kanke::Kfd::Grab::Util.get_parse_doc(url,false)
84
+ end
85
+ return [] if(doc.nil?)
86
+
87
+ all_schedule = []
88
+ regex_schedule = /<span.*/
89
+ doc.css('ul#pgrow li').each{|li|
90
+ time = li.search('span').first
91
+ next unless time
92
+ next if time.eql?('')
93
+
94
+ tvgd = li.search('div').first
95
+ name = li.content.gsub("剧照","")
96
+ if(!tvgd.nil?)
97
+ name = name.gsub(tvgd.content,"")
98
+ end
99
+ name = name.gsub("剧照","")
100
+ name = name.gsub("剧情","")
101
+ name = name.gsub("演员表","")
102
+ name = name.gsub(time.content,"")
103
+ name = name.lstrip
104
+ name = name.chomp
105
+ begin
106
+ the_show_time = DateTime.strptime("#{date.strftime('%F')} #{time.content}",'%F %R')
107
+ all_schedule << {'name'=>name.strip,'begin_at' => the_show_time}
108
+ rescue Exception =>e
109
+ p e
110
+ end
111
+ }
112
+ all_schedule
113
+ end
114
+
115
+ end
@@ -0,0 +1,9 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="RUBY_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="inheritedJdk" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ </module>
9
+
metadata ADDED
@@ -0,0 +1,50 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: grab_epg
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - hahazql
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-04-25 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: ! '"用于从TVMAO抓取EPG信息"'
14
+ email:
15
+ - hahazhouqunli@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - .grabepg.gemspec
21
+ - Gemfile
22
+ - lib/debug.rb
23
+ - lib/grabepg.rb
24
+ - lib/grabepg.rb~
25
+ - projectFilesBackup/.idea/grabepg.iml
26
+ homepage: ''
27
+ licenses: []
28
+ metadata: {}
29
+ post_install_message:
30
+ rdoc_options: []
31
+ require_paths:
32
+ - lib
33
+ required_ruby_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ required_rubygems_version: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ! '>='
41
+ - !ruby/object:Gem::Version
42
+ version: '0'
43
+ requirements: []
44
+ rubyforge_project:
45
+ rubygems_version: 2.0.0
46
+ signing_key:
47
+ specification_version: 4
48
+ summary: ! '"Grab EPG"'
49
+ test_files: []
50
+ has_rdoc: