xmltv 0.8.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,157 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ autoload :XMLTV, 'xmltv/xmltv'
4
+ require 'date'
5
+ #require 'uri'
6
+ class Date
7
+ Vandaag = Date.today
8
+ def self.upcdate(string)
9
+ if string == 'today'
10
+ return Vandaag
11
+ end
12
+ rsl = dutch(string)
13
+ rsl += 7 if rsl == Vandaag
14
+ rsl
15
+ end
16
+ end
17
+
18
+ module XMLTV
19
+
20
+ class UpcGrabber < Grabber
21
+ Days_to_grab = %w{ today } + Date::DAYNAMES.map{|x| x.downcase}
22
+ Cattrans = { 'drama' => 'Drama',
23
+ 'educatie' => 'Educational',
24
+ 'erotiek' => 'Adult',
25
+ 'kids/jeugd' => 'Children',
26
+ 'kunst/cultuur' => 'Arts/Culture',
27
+ 'nieuws' => 'News',
28
+ 'show/spelshow' => 'Game',
29
+ 'speelfilm' => 'Movies',
30
+ 'sport' => 'Sports',
31
+ 'vrijetijd' => 'Educational' }
32
+ def base_url
33
+ 'http://www.upclive.nl'
34
+ end
35
+
36
+ def fetch_all_channels
37
+ channels = Hash.new
38
+ catch(:ready) do
39
+ 1.upto 200 do |pagenr|
40
+ www = "#{base_url}/Televisie/TV_gids/Zenders/?channels=All&theme=All&page=#{pagenr}"
41
+ STDERR.puts(www) if XmltvOptions.verbose
42
+ page = fetch(www)
43
+ # cont = page.at('div.epg_listings_bar4c_container')
44
+ chns = page.search('div.epg_listings_bar4c') + page.search('div.epg_listings_bar4cend')
45
+ chns.each do |chn|
46
+ if chn.at('h5 a').nil?
47
+ throw :ready
48
+ end
49
+ url = REXML::Text.read_with_substitution(chn.at('h5 a')['href']).gsub('nowandnext','')
50
+ name = URI.unescape(url[/channels=(.*)&/, 1])
51
+ tekst = chn.at('div.right')
52
+ zender = tekst.inner_text[/Zendernummer\s+([0-9]+)/, 1]
53
+ packet = tekst.at('a.epg_underline').inner_text.split[-1]
54
+ packet = '' if packet == 'Pack'
55
+ channels[zender] = [name, packet, url]
56
+ end
57
+ end
58
+ end
59
+ save_object(channels, channel_list)
60
+ channels
61
+ end
62
+ def clean_cache(cache)
63
+ count = 0
64
+ cache.delete_if do |dt, en|
65
+ rsl = (Date.parse(dt) < Vandaag)
66
+ count += 1 if rsl
67
+ rsl
68
+ end
69
+ count
70
+ end
71
+
72
+ def channel_display(chan_id)
73
+ all_channels[chan_id][0]
74
+ end
75
+ def day_url(chan_id, day)
76
+ "#{base_url}#{all_channels[chan_id][2]}#{day}_all"
77
+ end
78
+
79
+ def grab_channel(chan_id)
80
+ @all_days = load_cachefile(chan_id)
81
+ @all_days.delete(Date.today.to_s) ## Always fetch today to calibrate calendars
82
+ progs = 0
83
+ Days_to_grab.each_with_index do |day, dchan_id|
84
+ date = Date.upcdate(dchan_id == 0 ? day : Date::Dagen[dchan_id - 1])
85
+ next if @all_days.has_key?(date.to_s)
86
+ url = day_url(chan_id, day)
87
+ page = fetch(url)
88
+ begin
89
+ datum = Date.dutch(page.at('//div.epg_listings_bar1//span').inner_text)
90
+ rescue NoMethodError
91
+ save(url, page)
92
+ raise
93
+ end
94
+ if day == 'today' && datum != Date.today
95
+ # @all_days = Hash.new ## Invalidate cache
96
+ raise "#{day} #{datum} Upc is gek geworden"
97
+ end
98
+ noshift = nil
99
+ programs = Array.new
100
+ page.at('div.ch_ci2_epg_center').search('/div').each do |programme|
101
+ case programme['class']
102
+ when 'epg_header_4'
103
+ noshift = ( programme.inner_text.strip.downcase == 'ochtend')
104
+ when 'listing_visible'
105
+ programs << (program = Hash.new)
106
+ program['noshift'] = noshift
107
+ program['title'] = programme.at('div.col7').inner_text.strip
108
+ program['times'] = programme.at('div.col8').inner_text.gsub(/\s/,'')
109
+ program['category'] = programme.at('div.col9').inner_text.strip
110
+ desc = ''
111
+ programme.at('div.info_color').each_child do |node|
112
+ break if node.class != Hpricot::Text
113
+ desc << node.to_s.strip
114
+ end
115
+ program['desc'] = desc
116
+ end
117
+
118
+ end
119
+ @all_days[datum.to_s] = programs
120
+ progs += programs.size
121
+ end
122
+ save_object( @all_days , cachefile(chan_id))
123
+ progs
124
+ end
125
+ def transform(chan_id)
126
+ # @all_days = load_cachefile(chan_id)
127
+ progdata_array = Array.new
128
+ @all_days.each_pair do |datum, programs|
129
+ jaar, maand, dag = datum.split('-').map{|x| x.to_i}
130
+ programs.each do |entry|
131
+ progdata = proghash(entry, chan_id)
132
+ # pp progdata, '==='
133
+ # pp entry
134
+ startuur, startmin, stopuur, stopmin = entry['times'].scan(/\d+/).map{|x| x.to_i}
135
+ shift = startuur < 8 && entry['noshift'] == false ? Dag : 0
136
+ progdata['start'] = start = Time.local(jaar, maand, dag, startuur, startmin) + shift
137
+ progdata['stop'] = stop = Time.local(jaar, maand, dag, stopuur, stopmin)
138
+
139
+ if stop < start && stopuur <= 10
140
+ progdata['stop'] += Dag
141
+ end
142
+ date_stats(chan_id, progdata['start'])
143
+ if progdata['stop'] < progdata['start']
144
+ reject(myname, day_url(chan_id, 'today'), entry, progdata)
145
+ next
146
+ end
147
+ date_stats(chan_id, progdata['start'])
148
+ progdata['category'] = Cattrans[entry['category'].gsub(/\s+/,'').downcase] || 'onbekend'
149
+ progdata_array << progdata
150
+ # pp progdata
151
+ end
152
+ end
153
+ progdata_array
154
+ end
155
+ end
156
+ end
157
+ XMLTV::UpcGrabber.new.run
@@ -0,0 +1,122 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ autoload :XMLTV, 'xmltv/xmltv'
4
+ require 'cgi'
5
+
6
+
7
+ module XMLTV
8
+
9
+ class VproGrabber < Grabber
10
+ def initialize
11
+ super
12
+ @cookie = nil
13
+ @zenders = config['channels'].map{|x| "zendersTV[]=#{x}"}.join('&')
14
+ end
15
+ def base_url
16
+ 'http://gids.vpro.nl'
17
+ end
18
+ def fetch_all_channels
19
+ page = fetch(base_url)
20
+ channels = Hash.new
21
+ # puts page
22
+ page.search('ul#selZendersTV//a').each do |a|
23
+ begin
24
+ channels[a['id'].to_utf] = a.inner_text.to_utf
25
+ rescue NoMethodError
26
+ STDERR.puts a, '===='
27
+ end
28
+ end
29
+ save_object(channels, channel_list)
30
+ channels
31
+ end
32
+ # Ruby standard lib cookie parsing is seriously broken
33
+ # This is kind of a kludge, which works for vpro.nl
34
+ def self.cookie_parse(cookie)
35
+ ourcookie = Array.new
36
+ cookie.split(/, (?=[A-Z])/).map{|x| x.split('=', 2)}.each do |pair|
37
+ cook, val = pair
38
+ val.gsub!(/;.*/, '')
39
+ unless val == 'deleted'
40
+ ourcookie << "#{cook}=#{val}"
41
+ end
42
+ end
43
+ ourcookie.join('; ')
44
+ end
45
+ def grab_detail(info)
46
+ program = Hash.new
47
+ begin
48
+ t = info.at('div.tijd').inner_text.strip
49
+ program['times'] = t
50
+ unless @avond
51
+ @avond = true if t.to_i > 17
52
+ end
53
+ program['avond'] = @avond
54
+ program['title'] = info.at('div.uitzending').inner_text.to_utf.strip.split("\n")[0]
55
+ detail = info.parent.at('div.detail')
56
+ program['category'] = detail.at('//p').inner_text.strip.split("\n")[-1]
57
+ program['desc'] = detail.at('//p.summary').inner_text.to_utf.strip rescue ''
58
+ rescue NoMethodError => exc
59
+ STDERR.puts info, detail
60
+ raise
61
+ end
62
+ program
63
+ end
64
+ def clean_cache(cache)
65
+ 0
66
+ end
67
+
68
+ def grab_channel(chan_id)
69
+ if @cookie.nil?
70
+ a = open('http://gids.vpro.nl')
71
+ @cookie = VproGrabber.cookie_parse(a.meta['set-cookie'])
72
+ end
73
+ url = "/index.php/gids"
74
+ datasrc = [ 'medium=TV', @zenders, 'layout=zender', "lZenCode=#{chan_id}"]
75
+ data = URI.escape(datasrc.join('&'))
76
+ http = Net::HTTP.new( 'gids.vpro.nl')
77
+ cook = @cookie
78
+ response = http.request_post(url, data,
79
+ { 'Cookie' => cook, 'Content-Type' => 'application/x-www-form-urlencoded' } )
80
+ # puts response.body
81
+ @channel_info = Array.new
82
+ %w{ vandaag morgen overmorgen }.each do |dag|
83
+ response = http.request_get("#{url}?medium=TV&dag=#{dag}", { 'Cookie' => cook } )
84
+ page = Hpricot(response.body)
85
+ vandaag = page.at('div#vandaag').inner_text
86
+ @avond = false
87
+ page.search('div.infoRow').each do |info|
88
+ @channel_info << grab_detail(info)
89
+ @channel_info[-1]['datum'] = vandaag
90
+ end
91
+ end
92
+ save_object(@channel_info, cachefile(chan_id))
93
+ @channel_info.size
94
+ end
95
+
96
+ def transform(chan_id)
97
+ # @channel_info = YAML.load_file(cachefile(chan_id))
98
+ progdata_array = Array.new
99
+ @channel_info.each do | entry|
100
+ begin
101
+ progdata_array << ( progdata = proghash(entry, chan_id))
102
+ date = Date.dutch(entry['datum'])
103
+ startuur, startmin, stopuur, stopmin = entry['times'].split(/[^0-9]+/).map {|x| x.to_i }
104
+ shift = entry['avond'] && startuur < 9 ? Dag : 0
105
+ progdata['start'] = start = Time.local(date.year, date.month, date.day, startuur, startmin) + shift
106
+ progdata['stop'] = stop = Time.local(date.year, date.month, date.day, stopuur, stopmin) + shift
107
+ date_stats(chan_id, progdata['start'])
108
+ if stop < start
109
+ progdata['stop'] += Dag
110
+ end
111
+ # dump progdata
112
+ rescue Exception => exc
113
+ STDERR.puts exc, exc.message, exc.backtrace
114
+ raise
115
+ end
116
+ end
117
+ progdata_array
118
+ end
119
+ end
120
+
121
+ end
122
+ XMLTV::VproGrabber.new.run