xmltv 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,157 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ autoload :XMLTV, 'xmltv/xmltv'
4
+ require 'date'
5
+ #require 'uri'
6
+ class Date
7
+ Vandaag = Date.today
8
+ def self.upcdate(string)
9
+ if string == 'today'
10
+ return Vandaag
11
+ end
12
+ rsl = dutch(string)
13
+ rsl += 7 if rsl == Vandaag
14
+ rsl
15
+ end
16
+ end
17
+
18
+ module XMLTV
19
+
20
+ class UpcGrabber < Grabber
21
+ Days_to_grab = %w{ today } + Date::DAYNAMES.map{|x| x.downcase}
22
+ Cattrans = { 'drama' => 'Drama',
23
+ 'educatie' => 'Educational',
24
+ 'erotiek' => 'Adult',
25
+ 'kids/jeugd' => 'Children',
26
+ 'kunst/cultuur' => 'Arts/Culture',
27
+ 'nieuws' => 'News',
28
+ 'show/spelshow' => 'Game',
29
+ 'speelfilm' => 'Movies',
30
+ 'sport' => 'Sports',
31
+ 'vrijetijd' => 'Educational' }
32
+ def base_url
33
+ 'http://www.upclive.nl'
34
+ end
35
+
36
+ def fetch_all_channels
37
+ channels = Hash.new
38
+ catch(:ready) do
39
+ 1.upto 200 do |pagenr|
40
+ www = "#{base_url}/Televisie/TV_gids/Zenders/?channels=All&theme=All&page=#{pagenr}"
41
+ STDERR.puts(www) if XmltvOptions.verbose
42
+ page = fetch(www)
43
+ # cont = page.at('div.epg_listings_bar4c_container')
44
+ chns = page.search('div.epg_listings_bar4c') + page.search('div.epg_listings_bar4cend')
45
+ chns.each do |chn|
46
+ if chn.at('h5 a').nil?
47
+ throw :ready
48
+ end
49
+ url = REXML::Text.read_with_substitution(chn.at('h5 a')['href']).gsub('nowandnext','')
50
+ name = URI.unescape(url[/channels=(.*)&/, 1])
51
+ tekst = chn.at('div.right')
52
+ zender = tekst.inner_text[/Zendernummer\s+([0-9]+)/, 1]
53
+ packet = tekst.at('a.epg_underline').inner_text.split[-1]
54
+ packet = '' if packet == 'Pack'
55
+ channels[zender] = [name, packet, url]
56
+ end
57
+ end
58
+ end
59
+ save_object(channels, channel_list)
60
+ channels
61
+ end
62
+ def clean_cache(cache)
63
+ count = 0
64
+ cache.delete_if do |dt, en|
65
+ rsl = (Date.parse(dt) < Vandaag)
66
+ count += 1 if rsl
67
+ rsl
68
+ end
69
+ count
70
+ end
71
+
72
+ def channel_display(chan_id)
73
+ all_channels[chan_id][0]
74
+ end
75
+ def day_url(chan_id, day)
76
+ "#{base_url}#{all_channels[chan_id][2]}#{day}_all"
77
+ end
78
+
79
+ def grab_channel(chan_id)
80
+ @all_days = load_cachefile(chan_id)
81
+ @all_days.delete(Date.today.to_s) ## Always fetch today to calibrate calendars
82
+ progs = 0
83
+ Days_to_grab.each_with_index do |day, dchan_id|
84
+ date = Date.upcdate(dchan_id == 0 ? day : Date::Dagen[dchan_id - 1])
85
+ next if @all_days.has_key?(date.to_s)
86
+ url = day_url(chan_id, day)
87
+ page = fetch(url)
88
+ begin
89
+ datum = Date.dutch(page.at('//div.epg_listings_bar1//span').inner_text)
90
+ rescue NoMethodError
91
+ save(url, page)
92
+ raise
93
+ end
94
+ if day == 'today' && datum != Date.today
95
+ # @all_days = Hash.new ## Invalidate cache
96
+ raise "#{day} #{datum} Upc is gek geworden"
97
+ end
98
+ noshift = nil
99
+ programs = Array.new
100
+ page.at('div.ch_ci2_epg_center').search('/div').each do |programme|
101
+ case programme['class']
102
+ when 'epg_header_4'
103
+ noshift = ( programme.inner_text.strip.downcase == 'ochtend')
104
+ when 'listing_visible'
105
+ programs << (program = Hash.new)
106
+ program['noshift'] = noshift
107
+ program['title'] = programme.at('div.col7').inner_text.strip
108
+ program['times'] = programme.at('div.col8').inner_text.gsub(/\s/,'')
109
+ program['category'] = programme.at('div.col9').inner_text.strip
110
+ desc = ''
111
+ programme.at('div.info_color').each_child do |node|
112
+ break if node.class != Hpricot::Text
113
+ desc << node.to_s.strip
114
+ end
115
+ program['desc'] = desc
116
+ end
117
+
118
+ end
119
+ @all_days[datum.to_s] = programs
120
+ progs += programs.size
121
+ end
122
+ save_object( @all_days , cachefile(chan_id))
123
+ progs
124
+ end
125
+ def transform(chan_id)
126
+ # @all_days = load_cachefile(chan_id)
127
+ progdata_array = Array.new
128
+ @all_days.each_pair do |datum, programs|
129
+ jaar, maand, dag = datum.split('-').map{|x| x.to_i}
130
+ programs.each do |entry|
131
+ progdata = proghash(entry, chan_id)
132
+ # pp progdata, '==='
133
+ # pp entry
134
+ startuur, startmin, stopuur, stopmin = entry['times'].scan(/\d+/).map{|x| x.to_i}
135
+ shift = startuur < 8 && entry['noshift'] == false ? Dag : 0
136
+ progdata['start'] = start = Time.local(jaar, maand, dag, startuur, startmin) + shift
137
+ progdata['stop'] = stop = Time.local(jaar, maand, dag, stopuur, stopmin)
138
+
139
+ if stop < start && stopuur <= 10
140
+ progdata['stop'] += Dag
141
+ end
142
+ date_stats(chan_id, progdata['start'])
143
+ if progdata['stop'] < progdata['start']
144
+ reject(myname, day_url(chan_id, 'today'), entry, progdata)
145
+ next
146
+ end
147
+ date_stats(chan_id, progdata['start'])
148
+ progdata['category'] = Cattrans[entry['category'].gsub(/\s+/,'').downcase] || 'onbekend'
149
+ progdata_array << progdata
150
+ # pp progdata
151
+ end
152
+ end
153
+ progdata_array
154
+ end
155
+ end
156
+ end
157
+ XMLTV::UpcGrabber.new.run
@@ -0,0 +1,122 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ autoload :XMLTV, 'xmltv/xmltv'
4
+ require 'cgi'
5
+
6
+
7
+ module XMLTV
8
+
9
+ class VproGrabber < Grabber
10
+ def initialize
11
+ super
12
+ @cookie = nil
13
+ @zenders = config['channels'].map{|x| "zendersTV[]=#{x}"}.join('&')
14
+ end
15
+ def base_url
16
+ 'http://gids.vpro.nl'
17
+ end
18
+ def fetch_all_channels
19
+ page = fetch(base_url)
20
+ channels = Hash.new
21
+ # puts page
22
+ page.search('ul#selZendersTV//a').each do |a|
23
+ begin
24
+ channels[a['id'].to_utf] = a.inner_text.to_utf
25
+ rescue NoMethodError
26
+ STDERR.puts a, '===='
27
+ end
28
+ end
29
+ save_object(channels, channel_list)
30
+ channels
31
+ end
32
+ # Ruby standard lib cookie parsing is seriously broken
33
+ # This is kind of a kludge, which works for vpro.nl
34
+ def self.cookie_parse(cookie)
35
+ ourcookie = Array.new
36
+ cookie.split(/, (?=[A-Z])/).map{|x| x.split('=', 2)}.each do |pair|
37
+ cook, val = pair
38
+ val.gsub!(/;.*/, '')
39
+ unless val == 'deleted'
40
+ ourcookie << "#{cook}=#{val}"
41
+ end
42
+ end
43
+ ourcookie.join('; ')
44
+ end
45
+ def grab_detail(info)
46
+ program = Hash.new
47
+ begin
48
+ t = info.at('div.tijd').inner_text.strip
49
+ program['times'] = t
50
+ unless @avond
51
+ @avond = true if t.to_i > 17
52
+ end
53
+ program['avond'] = @avond
54
+ program['title'] = info.at('div.uitzending').inner_text.to_utf.strip.split("\n")[0]
55
+ detail = info.parent.at('div.detail')
56
+ program['category'] = detail.at('//p').inner_text.strip.split("\n")[-1]
57
+ program['desc'] = detail.at('//p.summary').inner_text.to_utf.strip rescue ''
58
+ rescue NoMethodError => exc
59
+ STDERR.puts info, detail
60
+ raise
61
+ end
62
+ program
63
+ end
64
+ def clean_cache(cache)
65
+ 0
66
+ end
67
+
68
+ def grab_channel(chan_id)
69
+ if @cookie.nil?
70
+ a = open('http://gids.vpro.nl')
71
+ @cookie = VproGrabber.cookie_parse(a.meta['set-cookie'])
72
+ end
73
+ url = "/index.php/gids"
74
+ datasrc = [ 'medium=TV', @zenders, 'layout=zender', "lZenCode=#{chan_id}"]
75
+ data = URI.escape(datasrc.join('&'))
76
+ http = Net::HTTP.new( 'gids.vpro.nl')
77
+ cook = @cookie
78
+ response = http.request_post(url, data,
79
+ { 'Cookie' => cook, 'Content-Type' => 'application/x-www-form-urlencoded' } )
80
+ # puts response.body
81
+ @channel_info = Array.new
82
+ %w{ vandaag morgen overmorgen }.each do |dag|
83
+ response = http.request_get("#{url}?medium=TV&dag=#{dag}", { 'Cookie' => cook } )
84
+ page = Hpricot(response.body)
85
+ vandaag = page.at('div#vandaag').inner_text
86
+ @avond = false
87
+ page.search('div.infoRow').each do |info|
88
+ @channel_info << grab_detail(info)
89
+ @channel_info[-1]['datum'] = vandaag
90
+ end
91
+ end
92
+ save_object(@channel_info, cachefile(chan_id))
93
+ @channel_info.size
94
+ end
95
+
96
+ def transform(chan_id)
97
+ # @channel_info = YAML.load_file(cachefile(chan_id))
98
+ progdata_array = Array.new
99
+ @channel_info.each do | entry|
100
+ begin
101
+ progdata_array << ( progdata = proghash(entry, chan_id))
102
+ date = Date.dutch(entry['datum'])
103
+ startuur, startmin, stopuur, stopmin = entry['times'].split(/[^0-9]+/).map {|x| x.to_i }
104
+ shift = entry['avond'] && startuur < 9 ? Dag : 0
105
+ progdata['start'] = start = Time.local(date.year, date.month, date.day, startuur, startmin) + shift
106
+ progdata['stop'] = stop = Time.local(date.year, date.month, date.day, stopuur, stopmin) + shift
107
+ date_stats(chan_id, progdata['start'])
108
+ if stop < start
109
+ progdata['stop'] += Dag
110
+ end
111
+ # dump progdata
112
+ rescue Exception => exc
113
+ STDERR.puts exc, exc.message, exc.backtrace
114
+ raise
115
+ end
116
+ end
117
+ progdata_array
118
+ end
119
+ end
120
+
121
+ end
122
+ XMLTV::VproGrabber.new.run