xmltv 0.8.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +5 -0
- data/Manifest.txt +17 -0
- data/README.txt +123 -0
- data/Rakefile +18 -0
- data/bin/xmltv +13 -0
- data/lib/xmltv/sample/dumpids.rb +15 -0
- data/lib/xmltv/sample/mythtv_chns.yaml +183 -0
- data/lib/xmltv/sample/sample_output +73 -0
- data/lib/xmltv/sample/tvcat_spoolfiles.rb +29 -0
- data/lib/xmltv/sites/film1.rb +100 -0
- data/lib/xmltv/sites/rt.rb +133 -0
- data/lib/xmltv/sites/trivial.rb +33 -0
- data/lib/xmltv/sites/tvgids.rb +224 -0
- data/lib/xmltv/sites/tvtoday.rb +185 -0
- data/lib/xmltv/sites/upc.rb +157 -0
- data/lib/xmltv/sites/vpro.rb +122 -0
- data/lib/xmltv/xmltv.rb +737 -0
- metadata +89 -0
@@ -0,0 +1,157 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
autoload :XMLTV, 'xmltv/xmltv'
|
4
|
+
require 'date'
|
5
|
+
#require 'uri'
|
6
|
+
class Date
|
7
|
+
Vandaag = Date.today
|
8
|
+
def self.upcdate(string)
|
9
|
+
if string == 'today'
|
10
|
+
return Vandaag
|
11
|
+
end
|
12
|
+
rsl = dutch(string)
|
13
|
+
rsl += 7 if rsl == Vandaag
|
14
|
+
rsl
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
module XMLTV
|
19
|
+
|
20
|
+
class UpcGrabber < Grabber
|
21
|
+
Days_to_grab = %w{ today } + Date::DAYNAMES.map{|x| x.downcase}
|
22
|
+
Cattrans = { 'drama' => 'Drama',
|
23
|
+
'educatie' => 'Educational',
|
24
|
+
'erotiek' => 'Adult',
|
25
|
+
'kids/jeugd' => 'Children',
|
26
|
+
'kunst/cultuur' => 'Arts/Culture',
|
27
|
+
'nieuws' => 'News',
|
28
|
+
'show/spelshow' => 'Game',
|
29
|
+
'speelfilm' => 'Movies',
|
30
|
+
'sport' => 'Sports',
|
31
|
+
'vrijetijd' => 'Educational' }
|
32
|
+
def base_url
|
33
|
+
'http://www.upclive.nl'
|
34
|
+
end
|
35
|
+
|
36
|
+
def fetch_all_channels
|
37
|
+
channels = Hash.new
|
38
|
+
catch(:ready) do
|
39
|
+
1.upto 200 do |pagenr|
|
40
|
+
www = "#{base_url}/Televisie/TV_gids/Zenders/?channels=All&theme=All&page=#{pagenr}"
|
41
|
+
STDERR.puts(www) if XmltvOptions.verbose
|
42
|
+
page = fetch(www)
|
43
|
+
# cont = page.at('div.epg_listings_bar4c_container')
|
44
|
+
chns = page.search('div.epg_listings_bar4c') + page.search('div.epg_listings_bar4cend')
|
45
|
+
chns.each do |chn|
|
46
|
+
if chn.at('h5 a').nil?
|
47
|
+
throw :ready
|
48
|
+
end
|
49
|
+
url = REXML::Text.read_with_substitution(chn.at('h5 a')['href']).gsub('nowandnext','')
|
50
|
+
name = URI.unescape(url[/channels=(.*)&/, 1])
|
51
|
+
tekst = chn.at('div.right')
|
52
|
+
zender = tekst.inner_text[/Zendernummer\s+([0-9]+)/, 1]
|
53
|
+
packet = tekst.at('a.epg_underline').inner_text.split[-1]
|
54
|
+
packet = '' if packet == 'Pack'
|
55
|
+
channels[zender] = [name, packet, url]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
save_object(channels, channel_list)
|
60
|
+
channels
|
61
|
+
end
|
62
|
+
def clean_cache(cache)
|
63
|
+
count = 0
|
64
|
+
cache.delete_if do |dt, en|
|
65
|
+
rsl = (Date.parse(dt) < Vandaag)
|
66
|
+
count += 1 if rsl
|
67
|
+
rsl
|
68
|
+
end
|
69
|
+
count
|
70
|
+
end
|
71
|
+
|
72
|
+
def channel_display(chan_id)
|
73
|
+
all_channels[chan_id][0]
|
74
|
+
end
|
75
|
+
def day_url(chan_id, day)
|
76
|
+
"#{base_url}#{all_channels[chan_id][2]}#{day}_all"
|
77
|
+
end
|
78
|
+
|
79
|
+
def grab_channel(chan_id)
|
80
|
+
@all_days = load_cachefile(chan_id)
|
81
|
+
@all_days.delete(Date.today.to_s) ## Always fetch today to calibrate calendars
|
82
|
+
progs = 0
|
83
|
+
Days_to_grab.each_with_index do |day, dchan_id|
|
84
|
+
date = Date.upcdate(dchan_id == 0 ? day : Date::Dagen[dchan_id - 1])
|
85
|
+
next if @all_days.has_key?(date.to_s)
|
86
|
+
url = day_url(chan_id, day)
|
87
|
+
page = fetch(url)
|
88
|
+
begin
|
89
|
+
datum = Date.dutch(page.at('//div.epg_listings_bar1//span').inner_text)
|
90
|
+
rescue NoMethodError
|
91
|
+
save(url, page)
|
92
|
+
raise
|
93
|
+
end
|
94
|
+
if day == 'today' && datum != Date.today
|
95
|
+
# @all_days = Hash.new ## Invalidate cache
|
96
|
+
raise "#{day} #{datum} Upc is gek geworden"
|
97
|
+
end
|
98
|
+
noshift = nil
|
99
|
+
programs = Array.new
|
100
|
+
page.at('div.ch_ci2_epg_center').search('/div').each do |programme|
|
101
|
+
case programme['class']
|
102
|
+
when 'epg_header_4'
|
103
|
+
noshift = ( programme.inner_text.strip.downcase == 'ochtend')
|
104
|
+
when 'listing_visible'
|
105
|
+
programs << (program = Hash.new)
|
106
|
+
program['noshift'] = noshift
|
107
|
+
program['title'] = programme.at('div.col7').inner_text.strip
|
108
|
+
program['times'] = programme.at('div.col8').inner_text.gsub(/\s/,'')
|
109
|
+
program['category'] = programme.at('div.col9').inner_text.strip
|
110
|
+
desc = ''
|
111
|
+
programme.at('div.info_color').each_child do |node|
|
112
|
+
break if node.class != Hpricot::Text
|
113
|
+
desc << node.to_s.strip
|
114
|
+
end
|
115
|
+
program['desc'] = desc
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
119
|
+
@all_days[datum.to_s] = programs
|
120
|
+
progs += programs.size
|
121
|
+
end
|
122
|
+
save_object( @all_days , cachefile(chan_id))
|
123
|
+
progs
|
124
|
+
end
|
125
|
+
def transform(chan_id)
|
126
|
+
# @all_days = load_cachefile(chan_id)
|
127
|
+
progdata_array = Array.new
|
128
|
+
@all_days.each_pair do |datum, programs|
|
129
|
+
jaar, maand, dag = datum.split('-').map{|x| x.to_i}
|
130
|
+
programs.each do |entry|
|
131
|
+
progdata = proghash(entry, chan_id)
|
132
|
+
# pp progdata, '==='
|
133
|
+
# pp entry
|
134
|
+
startuur, startmin, stopuur, stopmin = entry['times'].scan(/\d+/).map{|x| x.to_i}
|
135
|
+
shift = startuur < 8 && entry['noshift'] == false ? Dag : 0
|
136
|
+
progdata['start'] = start = Time.local(jaar, maand, dag, startuur, startmin) + shift
|
137
|
+
progdata['stop'] = stop = Time.local(jaar, maand, dag, stopuur, stopmin)
|
138
|
+
|
139
|
+
if stop < start && stopuur <= 10
|
140
|
+
progdata['stop'] += Dag
|
141
|
+
end
|
142
|
+
date_stats(chan_id, progdata['start'])
|
143
|
+
if progdata['stop'] < progdata['start']
|
144
|
+
reject(myname, day_url(chan_id, 'today'), entry, progdata)
|
145
|
+
next
|
146
|
+
end
|
147
|
+
date_stats(chan_id, progdata['start'])
|
148
|
+
progdata['category'] = Cattrans[entry['category'].gsub(/\s+/,'').downcase] || 'onbekend'
|
149
|
+
progdata_array << progdata
|
150
|
+
# pp progdata
|
151
|
+
end
|
152
|
+
end
|
153
|
+
progdata_array
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
XMLTV::UpcGrabber.new.run
|
@@ -0,0 +1,122 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
autoload :XMLTV, 'xmltv/xmltv'
|
4
|
+
require 'cgi'
|
5
|
+
|
6
|
+
|
7
|
+
module XMLTV
|
8
|
+
|
9
|
+
class VproGrabber < Grabber
|
10
|
+
def initialize
|
11
|
+
super
|
12
|
+
@cookie = nil
|
13
|
+
@zenders = config['channels'].map{|x| "zendersTV[]=#{x}"}.join('&')
|
14
|
+
end
|
15
|
+
def base_url
|
16
|
+
'http://gids.vpro.nl'
|
17
|
+
end
|
18
|
+
def fetch_all_channels
|
19
|
+
page = fetch(base_url)
|
20
|
+
channels = Hash.new
|
21
|
+
# puts page
|
22
|
+
page.search('ul#selZendersTV//a').each do |a|
|
23
|
+
begin
|
24
|
+
channels[a['id'].to_utf] = a.inner_text.to_utf
|
25
|
+
rescue NoMethodError
|
26
|
+
STDERR.puts a, '===='
|
27
|
+
end
|
28
|
+
end
|
29
|
+
save_object(channels, channel_list)
|
30
|
+
channels
|
31
|
+
end
|
32
|
+
# Ruby standard lib cookie parsing is seriously broken
|
33
|
+
# This is kind of a kludge, which works for vpro.nl
|
34
|
+
def self.cookie_parse(cookie)
|
35
|
+
ourcookie = Array.new
|
36
|
+
cookie.split(/, (?=[A-Z])/).map{|x| x.split('=', 2)}.each do |pair|
|
37
|
+
cook, val = pair
|
38
|
+
val.gsub!(/;.*/, '')
|
39
|
+
unless val == 'deleted'
|
40
|
+
ourcookie << "#{cook}=#{val}"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
ourcookie.join('; ')
|
44
|
+
end
|
45
|
+
def grab_detail(info)
|
46
|
+
program = Hash.new
|
47
|
+
begin
|
48
|
+
t = info.at('div.tijd').inner_text.strip
|
49
|
+
program['times'] = t
|
50
|
+
unless @avond
|
51
|
+
@avond = true if t.to_i > 17
|
52
|
+
end
|
53
|
+
program['avond'] = @avond
|
54
|
+
program['title'] = info.at('div.uitzending').inner_text.to_utf.strip.split("\n")[0]
|
55
|
+
detail = info.parent.at('div.detail')
|
56
|
+
program['category'] = detail.at('//p').inner_text.strip.split("\n")[-1]
|
57
|
+
program['desc'] = detail.at('//p.summary').inner_text.to_utf.strip rescue ''
|
58
|
+
rescue NoMethodError => exc
|
59
|
+
STDERR.puts info, detail
|
60
|
+
raise
|
61
|
+
end
|
62
|
+
program
|
63
|
+
end
|
64
|
+
def clean_cache(cache)
|
65
|
+
0
|
66
|
+
end
|
67
|
+
|
68
|
+
def grab_channel(chan_id)
|
69
|
+
if @cookie.nil?
|
70
|
+
a = open('http://gids.vpro.nl')
|
71
|
+
@cookie = VproGrabber.cookie_parse(a.meta['set-cookie'])
|
72
|
+
end
|
73
|
+
url = "/index.php/gids"
|
74
|
+
datasrc = [ 'medium=TV', @zenders, 'layout=zender', "lZenCode=#{chan_id}"]
|
75
|
+
data = URI.escape(datasrc.join('&'))
|
76
|
+
http = Net::HTTP.new( 'gids.vpro.nl')
|
77
|
+
cook = @cookie
|
78
|
+
response = http.request_post(url, data,
|
79
|
+
{ 'Cookie' => cook, 'Content-Type' => 'application/x-www-form-urlencoded' } )
|
80
|
+
# puts response.body
|
81
|
+
@channel_info = Array.new
|
82
|
+
%w{ vandaag morgen overmorgen }.each do |dag|
|
83
|
+
response = http.request_get("#{url}?medium=TV&dag=#{dag}", { 'Cookie' => cook } )
|
84
|
+
page = Hpricot(response.body)
|
85
|
+
vandaag = page.at('div#vandaag').inner_text
|
86
|
+
@avond = false
|
87
|
+
page.search('div.infoRow').each do |info|
|
88
|
+
@channel_info << grab_detail(info)
|
89
|
+
@channel_info[-1]['datum'] = vandaag
|
90
|
+
end
|
91
|
+
end
|
92
|
+
save_object(@channel_info, cachefile(chan_id))
|
93
|
+
@channel_info.size
|
94
|
+
end
|
95
|
+
|
96
|
+
def transform(chan_id)
|
97
|
+
# @channel_info = YAML.load_file(cachefile(chan_id))
|
98
|
+
progdata_array = Array.new
|
99
|
+
@channel_info.each do | entry|
|
100
|
+
begin
|
101
|
+
progdata_array << ( progdata = proghash(entry, chan_id))
|
102
|
+
date = Date.dutch(entry['datum'])
|
103
|
+
startuur, startmin, stopuur, stopmin = entry['times'].split(/[^0-9]+/).map {|x| x.to_i }
|
104
|
+
shift = entry['avond'] && startuur < 9 ? Dag : 0
|
105
|
+
progdata['start'] = start = Time.local(date.year, date.month, date.day, startuur, startmin) + shift
|
106
|
+
progdata['stop'] = stop = Time.local(date.year, date.month, date.day, stopuur, stopmin) + shift
|
107
|
+
date_stats(chan_id, progdata['start'])
|
108
|
+
if stop < start
|
109
|
+
progdata['stop'] += Dag
|
110
|
+
end
|
111
|
+
# dump progdata
|
112
|
+
rescue Exception => exc
|
113
|
+
STDERR.puts exc, exc.message, exc.backtrace
|
114
|
+
raise
|
115
|
+
end
|
116
|
+
end
|
117
|
+
progdata_array
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
122
|
+
XMLTV::VproGrabber.new.run
|