xmltv 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +5 -0
- data/Manifest.txt +17 -0
- data/README.txt +123 -0
- data/Rakefile +18 -0
- data/bin/xmltv +13 -0
- data/lib/xmltv/sample/dumpids.rb +15 -0
- data/lib/xmltv/sample/mythtv_chns.yaml +183 -0
- data/lib/xmltv/sample/sample_output +73 -0
- data/lib/xmltv/sample/tvcat_spoolfiles.rb +29 -0
- data/lib/xmltv/sites/film1.rb +100 -0
- data/lib/xmltv/sites/rt.rb +133 -0
- data/lib/xmltv/sites/trivial.rb +33 -0
- data/lib/xmltv/sites/tvgids.rb +224 -0
- data/lib/xmltv/sites/tvtoday.rb +185 -0
- data/lib/xmltv/sites/upc.rb +157 -0
- data/lib/xmltv/sites/vpro.rb +122 -0
- data/lib/xmltv/xmltv.rb +737 -0
- metadata +89 -0
@@ -0,0 +1,157 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
autoload :XMLTV, 'xmltv/xmltv'
|
4
|
+
require 'date'
|
5
|
+
#require 'uri'
|
6
|
+
class Date
|
7
|
+
Vandaag = Date.today
|
8
|
+
def self.upcdate(string)
|
9
|
+
if string == 'today'
|
10
|
+
return Vandaag
|
11
|
+
end
|
12
|
+
rsl = dutch(string)
|
13
|
+
rsl += 7 if rsl == Vandaag
|
14
|
+
rsl
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
module XMLTV
|
19
|
+
|
20
|
+
class UpcGrabber < Grabber
|
21
|
+
Days_to_grab = %w{ today } + Date::DAYNAMES.map{|x| x.downcase}
|
22
|
+
Cattrans = { 'drama' => 'Drama',
|
23
|
+
'educatie' => 'Educational',
|
24
|
+
'erotiek' => 'Adult',
|
25
|
+
'kids/jeugd' => 'Children',
|
26
|
+
'kunst/cultuur' => 'Arts/Culture',
|
27
|
+
'nieuws' => 'News',
|
28
|
+
'show/spelshow' => 'Game',
|
29
|
+
'speelfilm' => 'Movies',
|
30
|
+
'sport' => 'Sports',
|
31
|
+
'vrijetijd' => 'Educational' }
|
32
|
+
def base_url
|
33
|
+
'http://www.upclive.nl'
|
34
|
+
end
|
35
|
+
|
36
|
+
def fetch_all_channels
|
37
|
+
channels = Hash.new
|
38
|
+
catch(:ready) do
|
39
|
+
1.upto 200 do |pagenr|
|
40
|
+
www = "#{base_url}/Televisie/TV_gids/Zenders/?channels=All&theme=All&page=#{pagenr}"
|
41
|
+
STDERR.puts(www) if XmltvOptions.verbose
|
42
|
+
page = fetch(www)
|
43
|
+
# cont = page.at('div.epg_listings_bar4c_container')
|
44
|
+
chns = page.search('div.epg_listings_bar4c') + page.search('div.epg_listings_bar4cend')
|
45
|
+
chns.each do |chn|
|
46
|
+
if chn.at('h5 a').nil?
|
47
|
+
throw :ready
|
48
|
+
end
|
49
|
+
url = REXML::Text.read_with_substitution(chn.at('h5 a')['href']).gsub('nowandnext','')
|
50
|
+
name = URI.unescape(url[/channels=(.*)&/, 1])
|
51
|
+
tekst = chn.at('div.right')
|
52
|
+
zender = tekst.inner_text[/Zendernummer\s+([0-9]+)/, 1]
|
53
|
+
packet = tekst.at('a.epg_underline').inner_text.split[-1]
|
54
|
+
packet = '' if packet == 'Pack'
|
55
|
+
channels[zender] = [name, packet, url]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
save_object(channels, channel_list)
|
60
|
+
channels
|
61
|
+
end
|
62
|
+
def clean_cache(cache)
|
63
|
+
count = 0
|
64
|
+
cache.delete_if do |dt, en|
|
65
|
+
rsl = (Date.parse(dt) < Vandaag)
|
66
|
+
count += 1 if rsl
|
67
|
+
rsl
|
68
|
+
end
|
69
|
+
count
|
70
|
+
end
|
71
|
+
|
72
|
+
def channel_display(chan_id)
|
73
|
+
all_channels[chan_id][0]
|
74
|
+
end
|
75
|
+
def day_url(chan_id, day)
|
76
|
+
"#{base_url}#{all_channels[chan_id][2]}#{day}_all"
|
77
|
+
end
|
78
|
+
|
79
|
+
def grab_channel(chan_id)
|
80
|
+
@all_days = load_cachefile(chan_id)
|
81
|
+
@all_days.delete(Date.today.to_s) ## Always fetch today to calibrate calendars
|
82
|
+
progs = 0
|
83
|
+
Days_to_grab.each_with_index do |day, dchan_id|
|
84
|
+
date = Date.upcdate(dchan_id == 0 ? day : Date::Dagen[dchan_id - 1])
|
85
|
+
next if @all_days.has_key?(date.to_s)
|
86
|
+
url = day_url(chan_id, day)
|
87
|
+
page = fetch(url)
|
88
|
+
begin
|
89
|
+
datum = Date.dutch(page.at('//div.epg_listings_bar1//span').inner_text)
|
90
|
+
rescue NoMethodError
|
91
|
+
save(url, page)
|
92
|
+
raise
|
93
|
+
end
|
94
|
+
if day == 'today' && datum != Date.today
|
95
|
+
# @all_days = Hash.new ## Invalidate cache
|
96
|
+
raise "#{day} #{datum} Upc is gek geworden"
|
97
|
+
end
|
98
|
+
noshift = nil
|
99
|
+
programs = Array.new
|
100
|
+
page.at('div.ch_ci2_epg_center').search('/div').each do |programme|
|
101
|
+
case programme['class']
|
102
|
+
when 'epg_header_4'
|
103
|
+
noshift = ( programme.inner_text.strip.downcase == 'ochtend')
|
104
|
+
when 'listing_visible'
|
105
|
+
programs << (program = Hash.new)
|
106
|
+
program['noshift'] = noshift
|
107
|
+
program['title'] = programme.at('div.col7').inner_text.strip
|
108
|
+
program['times'] = programme.at('div.col8').inner_text.gsub(/\s/,'')
|
109
|
+
program['category'] = programme.at('div.col9').inner_text.strip
|
110
|
+
desc = ''
|
111
|
+
programme.at('div.info_color').each_child do |node|
|
112
|
+
break if node.class != Hpricot::Text
|
113
|
+
desc << node.to_s.strip
|
114
|
+
end
|
115
|
+
program['desc'] = desc
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
119
|
+
@all_days[datum.to_s] = programs
|
120
|
+
progs += programs.size
|
121
|
+
end
|
122
|
+
save_object( @all_days , cachefile(chan_id))
|
123
|
+
progs
|
124
|
+
end
|
125
|
+
def transform(chan_id)
|
126
|
+
# @all_days = load_cachefile(chan_id)
|
127
|
+
progdata_array = Array.new
|
128
|
+
@all_days.each_pair do |datum, programs|
|
129
|
+
jaar, maand, dag = datum.split('-').map{|x| x.to_i}
|
130
|
+
programs.each do |entry|
|
131
|
+
progdata = proghash(entry, chan_id)
|
132
|
+
# pp progdata, '==='
|
133
|
+
# pp entry
|
134
|
+
startuur, startmin, stopuur, stopmin = entry['times'].scan(/\d+/).map{|x| x.to_i}
|
135
|
+
shift = startuur < 8 && entry['noshift'] == false ? Dag : 0
|
136
|
+
progdata['start'] = start = Time.local(jaar, maand, dag, startuur, startmin) + shift
|
137
|
+
progdata['stop'] = stop = Time.local(jaar, maand, dag, stopuur, stopmin)
|
138
|
+
|
139
|
+
if stop < start && stopuur <= 10
|
140
|
+
progdata['stop'] += Dag
|
141
|
+
end
|
142
|
+
date_stats(chan_id, progdata['start'])
|
143
|
+
if progdata['stop'] < progdata['start']
|
144
|
+
reject(myname, day_url(chan_id, 'today'), entry, progdata)
|
145
|
+
next
|
146
|
+
end
|
147
|
+
date_stats(chan_id, progdata['start'])
|
148
|
+
progdata['category'] = Cattrans[entry['category'].gsub(/\s+/,'').downcase] || 'onbekend'
|
149
|
+
progdata_array << progdata
|
150
|
+
# pp progdata
|
151
|
+
end
|
152
|
+
end
|
153
|
+
progdata_array
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
XMLTV::UpcGrabber.new.run
|
@@ -0,0 +1,122 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
autoload :XMLTV, 'xmltv/xmltv'
|
4
|
+
require 'cgi'
|
5
|
+
|
6
|
+
|
7
|
+
module XMLTV
|
8
|
+
|
9
|
+
class VproGrabber < Grabber
|
10
|
+
def initialize
|
11
|
+
super
|
12
|
+
@cookie = nil
|
13
|
+
@zenders = config['channels'].map{|x| "zendersTV[]=#{x}"}.join('&')
|
14
|
+
end
|
15
|
+
def base_url
|
16
|
+
'http://gids.vpro.nl'
|
17
|
+
end
|
18
|
+
def fetch_all_channels
|
19
|
+
page = fetch(base_url)
|
20
|
+
channels = Hash.new
|
21
|
+
# puts page
|
22
|
+
page.search('ul#selZendersTV//a').each do |a|
|
23
|
+
begin
|
24
|
+
channels[a['id'].to_utf] = a.inner_text.to_utf
|
25
|
+
rescue NoMethodError
|
26
|
+
STDERR.puts a, '===='
|
27
|
+
end
|
28
|
+
end
|
29
|
+
save_object(channels, channel_list)
|
30
|
+
channels
|
31
|
+
end
|
32
|
+
# Ruby standard lib cookie parsing is seriously broken
|
33
|
+
# This is kind of a kludge, which works for vpro.nl
|
34
|
+
def self.cookie_parse(cookie)
|
35
|
+
ourcookie = Array.new
|
36
|
+
cookie.split(/, (?=[A-Z])/).map{|x| x.split('=', 2)}.each do |pair|
|
37
|
+
cook, val = pair
|
38
|
+
val.gsub!(/;.*/, '')
|
39
|
+
unless val == 'deleted'
|
40
|
+
ourcookie << "#{cook}=#{val}"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
ourcookie.join('; ')
|
44
|
+
end
|
45
|
+
def grab_detail(info)
|
46
|
+
program = Hash.new
|
47
|
+
begin
|
48
|
+
t = info.at('div.tijd').inner_text.strip
|
49
|
+
program['times'] = t
|
50
|
+
unless @avond
|
51
|
+
@avond = true if t.to_i > 17
|
52
|
+
end
|
53
|
+
program['avond'] = @avond
|
54
|
+
program['title'] = info.at('div.uitzending').inner_text.to_utf.strip.split("\n")[0]
|
55
|
+
detail = info.parent.at('div.detail')
|
56
|
+
program['category'] = detail.at('//p').inner_text.strip.split("\n")[-1]
|
57
|
+
program['desc'] = detail.at('//p.summary').inner_text.to_utf.strip rescue ''
|
58
|
+
rescue NoMethodError => exc
|
59
|
+
STDERR.puts info, detail
|
60
|
+
raise
|
61
|
+
end
|
62
|
+
program
|
63
|
+
end
|
64
|
+
def clean_cache(cache)
|
65
|
+
0
|
66
|
+
end
|
67
|
+
|
68
|
+
def grab_channel(chan_id)
|
69
|
+
if @cookie.nil?
|
70
|
+
a = open('http://gids.vpro.nl')
|
71
|
+
@cookie = VproGrabber.cookie_parse(a.meta['set-cookie'])
|
72
|
+
end
|
73
|
+
url = "/index.php/gids"
|
74
|
+
datasrc = [ 'medium=TV', @zenders, 'layout=zender', "lZenCode=#{chan_id}"]
|
75
|
+
data = URI.escape(datasrc.join('&'))
|
76
|
+
http = Net::HTTP.new( 'gids.vpro.nl')
|
77
|
+
cook = @cookie
|
78
|
+
response = http.request_post(url, data,
|
79
|
+
{ 'Cookie' => cook, 'Content-Type' => 'application/x-www-form-urlencoded' } )
|
80
|
+
# puts response.body
|
81
|
+
@channel_info = Array.new
|
82
|
+
%w{ vandaag morgen overmorgen }.each do |dag|
|
83
|
+
response = http.request_get("#{url}?medium=TV&dag=#{dag}", { 'Cookie' => cook } )
|
84
|
+
page = Hpricot(response.body)
|
85
|
+
vandaag = page.at('div#vandaag').inner_text
|
86
|
+
@avond = false
|
87
|
+
page.search('div.infoRow').each do |info|
|
88
|
+
@channel_info << grab_detail(info)
|
89
|
+
@channel_info[-1]['datum'] = vandaag
|
90
|
+
end
|
91
|
+
end
|
92
|
+
save_object(@channel_info, cachefile(chan_id))
|
93
|
+
@channel_info.size
|
94
|
+
end
|
95
|
+
|
96
|
+
def transform(chan_id)
|
97
|
+
# @channel_info = YAML.load_file(cachefile(chan_id))
|
98
|
+
progdata_array = Array.new
|
99
|
+
@channel_info.each do | entry|
|
100
|
+
begin
|
101
|
+
progdata_array << ( progdata = proghash(entry, chan_id))
|
102
|
+
date = Date.dutch(entry['datum'])
|
103
|
+
startuur, startmin, stopuur, stopmin = entry['times'].split(/[^0-9]+/).map {|x| x.to_i }
|
104
|
+
shift = entry['avond'] && startuur < 9 ? Dag : 0
|
105
|
+
progdata['start'] = start = Time.local(date.year, date.month, date.day, startuur, startmin) + shift
|
106
|
+
progdata['stop'] = stop = Time.local(date.year, date.month, date.day, stopuur, stopmin) + shift
|
107
|
+
date_stats(chan_id, progdata['start'])
|
108
|
+
if stop < start
|
109
|
+
progdata['stop'] += Dag
|
110
|
+
end
|
111
|
+
# dump progdata
|
112
|
+
rescue Exception => exc
|
113
|
+
STDERR.puts exc, exc.message, exc.backtrace
|
114
|
+
raise
|
115
|
+
end
|
116
|
+
end
|
117
|
+
progdata_array
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
122
|
+
XMLTV::VproGrabber.new.run
|