grab_epg 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.grabepg.gemspec +14 -0
- data/Gemfile +5 -0
- data/lib/debug.rb +9 -0
- data/lib/grabepg.rb +96 -0
- data/lib/grabepg.rb~ +115 -0
- data/projectFilesBackup/.idea/grabepg.iml +9 -0
- metadata +50 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
Mjg3MTAwMjIxM2FlNWQwMzc5MjkzZWEzNzQ4MGMzZjkzZWNiZDgwOA==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
YmFlYTA3ZWNjNTRlN2FmNmM4NjI5MWFlZTlhNTI3YjRiYWQ4ZGQ1Mg==
|
7
|
+
!binary "U0hBNTEy":
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ZGExZWJiN2Q4NjUzNTdiODcwMzI5ZjQ3ODAzYzM5YzU0MDI1OGI2ZGI2Yjk3
|
10
|
+
NjQ3YTNjNTg5YzBkYWM0ZjQzNThkODM4Njk1MDI5YWJhZjQwODkxYjFlZmQw
|
11
|
+
NDAyN2VlM2NmODI1M2Y4OGYxMThiMmM5MzI5NGI2Y2UzYzFlZDA=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
OGY5ODVkMzk0MjY4NDc1YjgzMTVkZTY3OThkZjZmZmFkZTZkNDI1NTIxZTcw
|
14
|
+
NmQxZjYxODg0YTkzMTE0YzNiNzFiNmE4ZmZiMGMwY2M3OGY0ZDZlYWYwZGMz
|
15
|
+
MzRlMzgzZGFkYTZjYTcyYWIyNGU1MTQ4ZTczZDY5NzBiZDkzMmQ=
|
data/.grabepg.gemspec
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
Gem::Specification.new do |gem|
|
4
|
+
gem.authors = ["hahazql"]
|
5
|
+
gem.email = ["hahazhouqunli@gmail.com"]
|
6
|
+
gem.description = %q{"用于从TVMAO抓取EPG信息"}
|
7
|
+
gem.summary = %q{"Grab EPG"}
|
8
|
+
gem.homepage = ""
|
9
|
+
|
10
|
+
gem.files = `git ls-files`.split($\)
|
11
|
+
gem.name = "grab_epg"
|
12
|
+
gem.require_paths = ["lib"]
|
13
|
+
gem.version = "0.0.1"
|
14
|
+
end
|
data/Gemfile
ADDED
data/lib/debug.rb
ADDED
data/lib/grabepg.rb
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
#encoding:utf-8
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'open-uri'
|
5
|
+
|
6
|
+
module Grabepg
|
7
|
+
# To change this template use File | Settings | File Templates.
|
8
|
+
|
9
|
+
|
10
|
+
attr_reader :channel #频道列表
|
11
|
+
attr_reader :site #网站地址
|
12
|
+
|
13
|
+
|
14
|
+
DEFAULT_GrabtvType=["cctv","satellite","digital",]
|
15
|
+
DEFAULT_SITE = "http://www.tvmao.com"
|
16
|
+
|
17
|
+
def self.start
|
18
|
+
@channel = []
|
19
|
+
@site = DEFAULT_SITE
|
20
|
+
channel_urls = self.getchannels
|
21
|
+
getSchudle(channel_urls)
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
#获取网站的频道表
|
26
|
+
def self.getchannels
|
27
|
+
channel_urls = {}
|
28
|
+
|
29
|
+
get_url =lambda { |type|
|
30
|
+
@site + "/program/duration/#{type}/w1.html" unless (type.nil?||type.empty?)
|
31
|
+
}
|
32
|
+
|
33
|
+
get_channel_id = lambda {|url|
|
34
|
+
channel_id = url.split("/")[2].split("-")[1] unless (url.nil?||url.empty?)
|
35
|
+
}
|
36
|
+
|
37
|
+
DEFAULT_GrabtvType.each do |type|
|
38
|
+
url = get_url.call(type)
|
39
|
+
p url
|
40
|
+
doc = Nokogiri::HTML(open(url))
|
41
|
+
p doc.content
|
42
|
+
p "*************************************************************"
|
43
|
+
doc.css('td[class="tdchn"]').each do |td|
|
44
|
+
channel_name=td.content
|
45
|
+
herf = ""
|
46
|
+
td.css('a').each do |a|
|
47
|
+
herf=a['href']
|
48
|
+
end
|
49
|
+
channel_id = get_channel_id.call(herf)
|
50
|
+
@channel<<({channel_id=>{name:channel_name,herf:herf,type:type}})
|
51
|
+
channel_urls.merge!({channel_id=>herf})
|
52
|
+
end
|
53
|
+
end
|
54
|
+
p "Channel: #{@channel}"
|
55
|
+
channel_urls
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.getSchudle(channel,url)
|
59
|
+
_img_url = "http://static.haotv.me/channel/logo/"
|
60
|
+
|
61
|
+
|
62
|
+
get_week_url = lambda {|url|
|
63
|
+
_url = @site
|
64
|
+
urls = []
|
65
|
+
_urls = url.split("-")
|
66
|
+
0.upto(1).each do |i|
|
67
|
+
_url = _url+"#{_urls[i]}"+"-"
|
68
|
+
end
|
69
|
+
1.upto(7).each do |i|
|
70
|
+
urls << _url+"w#{i}.html"
|
71
|
+
end
|
72
|
+
urls
|
73
|
+
}
|
74
|
+
|
75
|
+
get_week_url.call(herf).each do |url|
|
76
|
+
p url
|
77
|
+
doc = Nokogiri::HTML(open(url))
|
78
|
+
img_url = _img_url + channel+".jpg"
|
79
|
+
data=doc.css('div[class="mt10 clear"]')[0].content.split(" ")
|
80
|
+
date = data[0]
|
81
|
+
week = data[1]
|
82
|
+
p "Channel: #{channel} Date: #{date} Week: #{week}"
|
83
|
+
doc.css('ul[id="pgrow"]')[0].css("li").each do |schudel|
|
84
|
+
if schudel.content.split(" ").size>1
|
85
|
+
time = schudel.content.split(" ")[0]
|
86
|
+
schudel = schudel.content.split(" ")[1]
|
87
|
+
p "Time: #{time} Schudel: #{schudel}"
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
|
96
|
+
end
|
data/lib/grabepg.rb~
ADDED
@@ -0,0 +1,115 @@
|
|
1
|
+
model Grabepg
|
2
|
+
# To change this template use File | Settings | File Templates.
|
3
|
+
|
4
|
+
|
5
|
+
attr_accessor :channel
|
6
|
+
attr_accessor :data
|
7
|
+
|
8
|
+
DEFAULT_GrabtvType=["cctv","satellite","digital",]
|
9
|
+
|
10
|
+
|
11
|
+
def self.start
|
12
|
+
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
#获取网站的频道表
|
17
|
+
def self.getchannels
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
def initialize(channel, date)
|
25
|
+
p 'Tvmao grab %s - %s - %s' % [channel._id,channel.name, date.strftime('%F')]
|
26
|
+
count = ::Kfd::TvmaoChannel.count
|
27
|
+
if count == 0
|
28
|
+
kk = Kanke::Kfd::Grab::TvmaoChannel.new()
|
29
|
+
kk.load_in_database()
|
30
|
+
end
|
31
|
+
tmp_ch = ::Kfd::TvmaoChannel.where(zid:channel._id,enable: true).first
|
32
|
+
if tmp_ch.nil?
|
33
|
+
p 'can not find the %s | %s' % [channel._id,tmp_ch]
|
34
|
+
@request_url = nil
|
35
|
+
else
|
36
|
+
#request_url必须进行处理 w1 ~ w14
|
37
|
+
t_url = tmp_ch.url
|
38
|
+
regex_url = /http:\/\/www\.tvmao\.com\/program\/.*-w(?=.*\.html)/
|
39
|
+
_url = regex_url.match(t_url).to_s
|
40
|
+
if date > Date.today.end_of_week
|
41
|
+
_wd = date.wday
|
42
|
+
_wd += 7
|
43
|
+
_wd += 7 if _wd == 7
|
44
|
+
else
|
45
|
+
_wd = date.wday
|
46
|
+
_wd = 7 if date.wday == 0
|
47
|
+
end
|
48
|
+
@request_url = '%s%s.html' % [_url,_wd]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
# 返回数据
|
54
|
+
def next_schedule
|
55
|
+
#need proxy
|
56
|
+
unless @request_url.nil?
|
57
|
+
all_schedule = get_channel_the_date_show(@request_url)
|
58
|
+
p 'we get %d schedule' % all_schedule.count
|
59
|
+
@enum = all_schedule.each
|
60
|
+
@request_url = nil
|
61
|
+
end
|
62
|
+
|
63
|
+
#begin
|
64
|
+
if @enum.nil?
|
65
|
+
nil
|
66
|
+
else
|
67
|
+
sch = @enum.next
|
68
|
+
p '%s %s' % [sch['begin_at'].strftime('%F %R'),sch['name']]
|
69
|
+
Kanke::Kfd::Grab::Schedule.new(channel,sch['name'],sch['begin_at'], Kfd::Source::TVMAO)
|
70
|
+
end
|
71
|
+
#rescue StopIteration
|
72
|
+
# nil
|
73
|
+
# end
|
74
|
+
end
|
75
|
+
|
76
|
+
# 解析频道某天的节目安排
|
77
|
+
def get_channel_the_date_show(url)
|
78
|
+
p ' request url %s ' % url
|
79
|
+
doc = nil
|
80
|
+
doc = Kanke::Kfd::Grab::Util.get_parse_doc(url)
|
81
|
+
if(doc.nil?)
|
82
|
+
p "doc nil"
|
83
|
+
doc = Kanke::Kfd::Grab::Util.get_parse_doc(url,false)
|
84
|
+
end
|
85
|
+
return [] if(doc.nil?)
|
86
|
+
|
87
|
+
all_schedule = []
|
88
|
+
regex_schedule = /<span.*/
|
89
|
+
doc.css('ul#pgrow li').each{|li|
|
90
|
+
time = li.search('span').first
|
91
|
+
next unless time
|
92
|
+
next if time.eql?('')
|
93
|
+
|
94
|
+
tvgd = li.search('div').first
|
95
|
+
name = li.content.gsub("剧照","")
|
96
|
+
if(!tvgd.nil?)
|
97
|
+
name = name.gsub(tvgd.content,"")
|
98
|
+
end
|
99
|
+
name = name.gsub("剧照","")
|
100
|
+
name = name.gsub("剧情","")
|
101
|
+
name = name.gsub("演员表","")
|
102
|
+
name = name.gsub(time.content,"")
|
103
|
+
name = name.lstrip
|
104
|
+
name = name.chomp
|
105
|
+
begin
|
106
|
+
the_show_time = DateTime.strptime("#{date.strftime('%F')} #{time.content}",'%F %R')
|
107
|
+
all_schedule << {'name'=>name.strip,'begin_at' => the_show_time}
|
108
|
+
rescue Exception =>e
|
109
|
+
p e
|
110
|
+
end
|
111
|
+
}
|
112
|
+
all_schedule
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<module type="RUBY_MODULE" version="4">
|
3
|
+
<component name="NewModuleRootManager">
|
4
|
+
<content url="file://$MODULE_DIR$" />
|
5
|
+
<orderEntry type="inheritedJdk" />
|
6
|
+
<orderEntry type="sourceFolder" forTests="false" />
|
7
|
+
</component>
|
8
|
+
</module>
|
9
|
+
|
metadata
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: grab_epg
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- hahazql
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-04-25 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: ! '"用于从TVMAO抓取EPG信息"'
|
14
|
+
email:
|
15
|
+
- hahazhouqunli@gmail.com
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- .grabepg.gemspec
|
21
|
+
- Gemfile
|
22
|
+
- lib/debug.rb
|
23
|
+
- lib/grabepg.rb
|
24
|
+
- lib/grabepg.rb~
|
25
|
+
- projectFilesBackup/.idea/grabepg.iml
|
26
|
+
homepage: ''
|
27
|
+
licenses: []
|
28
|
+
metadata: {}
|
29
|
+
post_install_message:
|
30
|
+
rdoc_options: []
|
31
|
+
require_paths:
|
32
|
+
- lib
|
33
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
39
|
+
requirements:
|
40
|
+
- - ! '>='
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: '0'
|
43
|
+
requirements: []
|
44
|
+
rubyforge_project:
|
45
|
+
rubygems_version: 2.0.0
|
46
|
+
signing_key:
|
47
|
+
specification_version: 4
|
48
|
+
summary: ! '"Grab EPG"'
|
49
|
+
test_files: []
|
50
|
+
has_rdoc:
|