xmltv 0.8.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,133 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ autoload :XMLTV, 'xmltv/xmltv'
4
+
5
+
6
+ module XMLTV
7
+
8
+ class RtGrabber < Grabber
9
+ Fields = %w{
10
+ title sub_title episode date director cast
11
+ premiere film repeat subtitles widescreen
12
+ new_series deaf_signed black_and_white star_rating
13
+ certificate category desc choice datum start stop
14
+ duration_mins
15
+ }
16
+ TimeDiff = 60 * 60
17
+ EncodingErrors = {
18
+ /\303[\306\346\341]/ => "'",
19
+ /\310\355/ => "\303\241",
20
+ /\310\321/ => "\303\245"
21
+ }
22
+ def version
23
+ [ "RtGrabber 0.9", super].join("\n")
24
+ end
25
+
26
+ def base_url
27
+ 'http://xmltv.radiotimes.com/xmltv'
28
+ end
29
+
30
+ def fetch_all_channels
31
+ chn = open("#{base_url}/channels.dat") {|h| h.readlines }
32
+ channels = Hash.new
33
+ chn.each do |c|
34
+ chan_id, name = c.chomp.split('|')
35
+ channels[chan_id] = name
36
+ end
37
+ save_object(channels, channel_list)
38
+ channels
39
+ end
40
+
41
+ def lang
42
+ 'en'
43
+ end
44
+
45
+
46
+ ### Must Implement
47
+
48
+
49
+ def grab_channel(chan_id)
50
+ uri = "#{base_url}/#{chan_id}.dat"
51
+ # uri = '/home/han/47.dat'
52
+ # uri = '/var/tmp/xmltv/102.dat'
53
+ info = open(uri) {|h| h.readlines }.map{|x| x.chomp}
54
+ @hits += 1
55
+ @channel_info = Array.new
56
+ info.each do |program|
57
+ begin
58
+ prog = program.split('~')
59
+ entry = Hash.new
60
+ Fields.each_with_index do |el, i |
61
+ item = prog[i]
62
+ next if item == 'false' || item.empty?
63
+ begin
64
+ entry[el] = item.ck_utf
65
+ rescue Iconv::IllegalSequence => e
66
+ ret = false
67
+ EncodingErrors.each_pair do |k, v|
68
+ # STDERR.puts "Try: #{k.dump} #{v}\n#{item.dump}\n"
69
+ if item.gsub!(k, v)
70
+ # STDERR.puts "====> #{item.dump}\n"
71
+ ret = true
72
+ break
73
+ end
74
+ # STDERR.puts 'failed'
75
+ end
76
+ retry if ret
77
+ prt = (e.failed =~ /[[:print:]]/)
78
+ failstring = e.failed[0 ... prt]
79
+ STDERR.puts "Repl: #{failstring.dump}"
80
+ EncodingErrors[failstring] = '?'
81
+ retry if item.gsub!(e.failed[0 ... prt], '?')
82
+ raise "Encoding error: #{e.failed}\n"
83
+ end
84
+
85
+ end
86
+ # puts entry['cast'], entry['start'], entry['stop']
87
+ @channel_info << entry
88
+ rescue StandardError => exc
89
+ STDERR.puts exc.class, exc.message, exc.backtrace
90
+ PP.pp prog, STDERR
91
+ raise
92
+ end
93
+ end
94
+ info.size
95
+ end
96
+
97
+ def transform(chan_id)
98
+ progdata_array = Array.new
99
+ @channel_info.each do |p|
100
+ progdata_array << ( progdata = proghash(p, chan_id))
101
+ dag, maand, jaar = p['datum'].scan(/\d+/).map{|x| x.to_i}
102
+ startuur, startmin = p['start'].scan(/\d+/).map{|x| x.to_i}
103
+ stopuur, stopmin = p['stop'].scan(/\d+/).map{|x| x.to_i}
104
+ progdata['start'] = Time.local(jaar, maand, dag, startuur, startmin) + TimeDiff
105
+ date_stats(chan_id, progdata['start'])
106
+ progdata['stop'] = Time.local(jaar, maand, dag, stopuur, stopmin) + TimeDiff
107
+ progdata['stop'] += Dag if progdata['stop'] < progdata['start']
108
+ progdata['credits']['director'] = p['director'] if p['director']
109
+ progdata['video']['colour'] = 'no' if p['black_and_white']
110
+ progdata['video']['aspect'] = '16:9' if p['widescreen']
111
+ progdata['sub-title'] = p['episode'] || p['sub_title'] || ''
112
+ progdata['category'] = 'Film' if p['film']
113
+ progdata['subtitles']['type'] = 'teletext' if p['subtitles']
114
+ progdata['star-rating']['value'] = "#{p['star_rating']}/5" if p['star_rating']
115
+ if (cast = p['cast'])
116
+ progdata['credits']['actor'] =
117
+ cast.index('|') ?
118
+ cast.split('|').map {|x| x.gsub(/^.*[*]/,'') } :
119
+ cast.split(',')
120
+ end
121
+ %w{ widescreen black_and_white }.each do |it|
122
+ # STDERR.puts "#{it}: #{p[it]}"
123
+ end
124
+ # pp progdata
125
+ # pp p
126
+
127
+ end
128
+ progdata_array
129
+ end
130
+ end
131
+ end
132
+ XMLTV::RtGrabber.new.run
133
+
@@ -0,0 +1,33 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ autoload :XMLTV, 'xmltv/xmltv'
4
+
5
+
6
+ module XMLTV
7
+
8
+ class TrivialGrabber < Grabber
9
+ # Must implement fetch_all_channels or define channel_list as nil
10
+ # Must return something that respond_to? []
11
+ def fetch_all_channels
12
+ { "1" => "Channel" }
13
+ end
14
+ # grab_channel returns number of programs found
15
+ def grab_channel(chan_id)
16
+ 1
17
+ end
18
+ # transform returns an array of special hashes (obtained with proghash)
19
+ #
20
+ def transform(chan_id)
21
+ progdata_array = Array.new
22
+ progdata = proghash({}, chan_id)
23
+ progdata['start'] = Time.new
24
+ progdata['stop'] = Time.new + 60 * 60
25
+ progdata['title'] = 'Title'
26
+ progdata_array << progdata
27
+
28
+ progdata_array
29
+ end
30
+ end
31
+
32
+ end
33
+ XMLTV::TrivialGrabber.new.run
@@ -0,0 +1,224 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ autoload :XMLTV, 'xmltv/xmltv'
4
+ require 'cgi'
5
+
6
+
7
+
8
+ module XMLTV
9
+
10
+ class TvgidsGrabber < Grabber
11
+ Cattrans = {
12
+ 'amusement' => 'Talk',
13
+ 'animatie' => 'Animated',
14
+ 'comedy' => 'Comedy',
15
+ 'documentaire' => 'Documentary',
16
+ 'educatief' => 'Educational',
17
+ 'erotiek' => 'Adult',
18
+ 'film' => 'Movies',
19
+ 'muziek' => 'Art/Music',
20
+ 'informatief' => 'Educational',
21
+ 'jeugd' => 'Children',
22
+ 'kunst/cultuur' => 'Arts/Culture',
23
+ 'misdaad' => 'Crime/Mystery',
24
+ 'muziek' => 'Music',
25
+ 'natuur' => 'Science/Nature',
26
+ 'nieuws/actualiteiten' => 'News',
27
+ 'overige' => 'Unknown',
28
+ 'religieus' => 'Religion',
29
+ 'serie/soap' => 'Drama',
30
+ 'sport' => 'Sports',
31
+ 'theater' => 'Arts/Culture',
32
+ 'wetenschap' => 'Science/Nature'
33
+ }
34
+ Roletrans = {
35
+ 'regie' => 'director',
36
+ 'acteurs' => 'actor',
37
+ 'presentatie' => 'presenter',
38
+ 'scenario' => 'writer'
39
+ }
40
+
41
+ Titeltrans = {
42
+ 'titel aflevering' => 'sub-title',
43
+ 'jaar van premiere' => 'date',
44
+ 'aflevering' => 'episode-num'
45
+ }
46
+
47
+ def grab_detail(href)
48
+ if href[0] == ?/
49
+ href="#{base_url}#{href}"
50
+ end
51
+ STDERR.puts "#{Time.now}: #{href} #{@channelhash.size}" if XmltvOptions.verbose
52
+ program = Hash.new
53
+ details = fetch(href)
54
+ desc = []
55
+ details.at('//table#progDetail').search('//tr//p').each do |p|
56
+ break if p['class'] == 'meerLinks'
57
+ line = p.inner_text.strip
58
+ desc << line unless line.empty?
59
+ end
60
+ program['desc'] = desc.join(' ').to_utf
61
+ details.search('//div#progPropt//tr/th').each do |pg|
62
+ content = pg.at('../td')
63
+ if content['class'] == 'personen'
64
+ rsl = content.at('div').search('.').find_all { |x| x.text? }.map{|x| x.to_s.strip.to_utf}.find_all{|x| ! x.empty?}
65
+ else
66
+ rsl = content.inner_text.strip.to_utf
67
+ end
68
+ program[pg.inner_text.strip.gsub(':','').downcase] = rsl
69
+ end
70
+ # PP.pp program, STDERR
71
+ program
72
+ end
73
+
74
+ def channel_url(chan_id)
75
+ "#{base_url}//zoeken/?periode=9&station=#{chan_id}"
76
+ end
77
+
78
+ def fetch_all_channels
79
+ page = fetch(channel_url(1))
80
+ channels = Hash.new
81
+ page.search('//optgroup')[0..1].each do |og|
82
+ og.search('/option').each do |g|
83
+ channels[g['value']] = g.inner_text
84
+ end
85
+ end
86
+ save_object(channels, channel_list)
87
+ channels
88
+ end
89
+
90
+ def clean_cache(cache)
91
+ count = 0
92
+ cache.delete_if do |dt, en|
93
+ rsl = (Date.dutch(en['datum']) < Vandaag)
94
+ # puts Date.dutch(en['datum']), Vandaag, rsl, '==='
95
+ count += 1 if rsl
96
+ rsl
97
+ end
98
+ count
99
+ end
100
+
101
+ def grab_channel(chan_id)
102
+ url = channel_url(chan_id)
103
+ page = fetch(url)
104
+ @channelhash = load_cachefile(chan_id)
105
+ # get_file(chan_id)
106
+ period = datum = nil
107
+ fetched = 0
108
+ begin
109
+ found = remaining = page.at("//table.overzicht//tr//td/strong").inner_text.to_i
110
+ rescue NoMethodError
111
+ niks = page.at("//div#resultaten").at("//td").inner_text
112
+ STDERR.puts url, niks
113
+ return
114
+ end
115
+ # STDERR.puts("#{found} on site, #{@channelhash.size} in cache")
116
+ page.search("//table.overzicht//tr").each do |pg|
117
+ td = pg.at('td')
118
+ next if td.nil? || pg['class'] == 'zoekstring'
119
+ if td['class'] == 'bloktitel'
120
+ period = td.at('h5').inner_text rescue period
121
+ datum = td.at('h4').inner_text rescue datum
122
+ next
123
+ end
124
+ # puts "Period: #{period} "
125
+ if (tijd = pg.at('/th').inner_text) =~ /\d\d:\d\d/
126
+ det = pg.at('/td//a')
127
+ href = det['href']
128
+ id = href[/ID=(\d+)/,1]
129
+ # puts @channelhash[id]
130
+ remaining -= 1
131
+ next if @channelhash[id]
132
+ fetched += 1
133
+ begin
134
+ @channelhash[id] = program = grab_detail(href)
135
+ rescue
136
+ STDERR.puts href, pg, '====='
137
+ raise
138
+ end
139
+ program['title'] = det.inner_text.strip.to_utf
140
+ program['period'] = period
141
+ program['datum'] = datum
142
+ program['tijd'] = tijd
143
+ program['progtip'] = '4/5' if pg['class'] == 'progTip'
144
+ end
145
+ end
146
+ STDERR.puts "Something wrong remaining: #{remaining}" if remaining != 0
147
+ save_object(@channelhash, cachefile(chan_id)) if fetched > 0
148
+ found
149
+ end
150
+
151
+ def parse_times(str)
152
+ rsl = nil
153
+ md = /(\d+)\s(\w+)\s(\d+),\s(\d+):(\d+)/.match(str)
154
+ if md
155
+ rsl = md.captures.map do |x|
156
+ x =~ /\d/ ? x.to_i : Date::Maanden.index(x)
157
+ end
158
+ mdstop = /(\d+):(\d+)/.match(str[md.offset(0)[1]..-1])
159
+ if mdstop
160
+ rsl << mdstop.captures.map {|x| x.to_i}
161
+ end
162
+ rsl.flatten!
163
+ end
164
+ rsl
165
+ end
166
+ def transform(chan_id)
167
+ # get_file(chan_id)
168
+ # STDERR.print "#{chan_id} #{@channelhash.size}"
169
+ progdata_array = Array.new
170
+ @channelhash.each_pair do |id, entry|
171
+ begin
172
+ progdata = proghash(entry, chan_id)
173
+ a=entry['datum en tijdstip']
174
+ shift = entry['period'] == 'Nacht' ? Dag : 0
175
+ dag, maand, jaar, startuur, startmin, stopuur, stopmin = parse_times(a)
176
+ next if dag.nil?
177
+ progdata['start'] = start = Time.local(jaar, maand, dag, startuur, startmin) + shift
178
+
179
+ if stopuur
180
+ stop = Time.local(jaar, maand, dag, stopuur, stopmin) + shift
181
+ if start > stop && start.hour >= 21 && stop.hour <= 7
182
+ stop += Dag
183
+ end
184
+ progdata['stop'] = stop
185
+ end
186
+ date_stats(chan_id, progdata['start'])
187
+ if (b = entry['bijzonderheden'])
188
+ b.downcase.split(',').each do |bijz|
189
+ case bijz
190
+ when /breedbeeld/
191
+ progdata['video']['aspect'] = '16:9'
192
+ when /zwart/
193
+ progdata['video']['colour'] = 'no'
194
+ when /teletekst/
195
+ progdata['subtitles']['type'] = 'teletext'
196
+ when /stereo/
197
+ progdata['audio']['stereo'] = 'stereo'
198
+ end
199
+ end
200
+ end
201
+ %w{ regie acteurs scenario presentatie }.each do |role|
202
+ if entry[role]
203
+ progdata['credits'][Roletrans[role]] = entry[role]
204
+ end
205
+ end
206
+ progdata['category'] = Cattrans[entry['genre'].downcase] if entry['genre']
207
+ progdata['star-rating']['value'] = entry['progtip'] if entry['progtip']
208
+ Titeltrans.each do |key|
209
+ progdata[Titeltrans[key]] = entry[key] if entry[key]
210
+ end
211
+ progdata_array << progdata
212
+
213
+ rescue StandardError => exc
214
+ STDERR.puts exc, exc.message, exc.backtrace
215
+ PP.pp(entry, STDERR)
216
+ raise
217
+ end
218
+ end
219
+ progdata_array
220
+ end
221
+ end
222
+
223
+ end
224
+ XMLTV::TvgidsGrabber.new.run
@@ -0,0 +1,185 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ autoload :XMLTV, 'xmltv/xmltv'
4
+ require 'date'
5
+
6
+ class Date
7
+ def german
8
+ "#{day}.#{month}.#{year}"
9
+ end
10
+ end
11
+
12
+ module XMLTV
13
+ class TvtodayGrabber < Grabber
14
+ # Must implement fetch_all_channels or define channel_list as nil
15
+ # Must returns something that respond_to? []
16
+ def lang
17
+ 'de'
18
+ end
19
+ def fetch_all_channels
20
+ puts base_url
21
+ url = @base_url
22
+ page = fetch(url)
23
+ channels = Hash.new
24
+ page.at('//select[@name=channel]').search('//option').each do |op|
25
+ val = op['value']
26
+ next if val.index('gruppeId') || val.empty?
27
+ channels[val] = op.inner_text.strip.to_utf
28
+ end
29
+ save_object(channels, channel_list)
30
+ channels
31
+ end
32
+ def grab_detail(href)
33
+ program = Hash.new
34
+ if href[0] == ?/
35
+ href="#{base_url}#{href}"
36
+ end
37
+ # puts href
38
+ page = fetch(href)
39
+ info = page.at('span.tv-titel-popup').parent
40
+ # puts info.at('span.tv-titel-popup').inner_text.to_utf
41
+ program['desc-s'] = info.at('span.tv-vorspann-popup').inner_text.to_utf
42
+ page.at('td.tv-sendung-info').each_child do |el|
43
+ if el.is_a? Hpricot::Text
44
+ program['desc'] = el.inner_text.to_utf
45
+ break
46
+ end
47
+ end
48
+ credits = page.at('span.tv-credits')
49
+ if credits
50
+ program['info'] = credits.parent.inner_text.to_utf
51
+ end
52
+ categ = info.at('span.tv-auszeichnung')
53
+ if categ
54
+ program['kind'] = categ.inner_text.to_utf
55
+ end
56
+ info.search('img').each do |img|
57
+ program[img['title']] = true
58
+ end
59
+ program
60
+ end
61
+
62
+ def clean_cache(cache)
63
+ count = 0
64
+ cache.delete_if do |dt, en|
65
+ rsl = (Date.parse(en['datum']) < Vandaag)
66
+ count += 1 if rsl
67
+ rsl
68
+ end
69
+ count
70
+ end
71
+
72
+ def day_url(chan_id, day)
73
+ "#{base_url}/program2007?format=genre&offset=0&date=#{day.german}&slotIndex=all&channel=#{chan_id}&order=time"
74
+ end
75
+ # grab_channel return chan_idber of programs found
76
+ def grab_channel(chan_id)
77
+ fetched = found = 0
78
+ @channelhash = load_cachefile(chan_id)
79
+ now = Date.today
80
+ (now .. now + 14).each do |date|
81
+ url = day_url(chan_id, date)
82
+ # puts url
83
+ # url = '/home/han/program2007'
84
+ page = fetch(url)
85
+ avond = false
86
+ page.search('div#program_complete//tr').each do |prog|
87
+ # puts prog
88
+
89
+ times = prog.at('span.tv-sendung-uhrzeit')
90
+ next unless times
91
+ # puts times.inner_text
92
+ titel = prog.at("td/a.tv-sendung-titel")
93
+ id = titel['href'][/detailPopup\('(\d+)'/, 1]
94
+ href = "/program2007?format=detail&sid=#{id}"
95
+ # puts "#{id}: #{@channelhash[id]}"
96
+ found += 1
97
+ next if @channelhash[id]
98
+ begin
99
+ @channelhash[id] = program = grab_detail(href)
100
+ fetched += 1
101
+ rescue
102
+ STDERR.puts href, page, '====='
103
+ raise
104
+ end
105
+ times = times.inner_text
106
+ start = times.to_i
107
+ avond = true if start > 17
108
+ program['times'] = times
109
+ program['title'] = titel.inner_text.to_utf
110
+ program['datum'] = date.to_s
111
+ program['evening'] = avond
112
+
113
+ # exit
114
+ end
115
+ end
116
+ save_object(@channelhash, cachefile(chan_id)) if fetched > 0
117
+ found
118
+ end
119
+ # transform returns an array of special hashes (obtained with proghash)
120
+ #
121
+ def transform(chan_id)
122
+ progdata_array = Array.new
123
+ @channelhash.each_pair do |id, entry|
124
+ begin
125
+ progdata = proghash(entry, chan_id)
126
+ # pp progdata
127
+ # pp entry
128
+ start, stop = entry['times'].split('-')
129
+ date = Date.parse(entry['datum'])
130
+ startuur, startmin, stopuur, stopmin = entry['times'].split(/[-.]/).map{|x| x.to_i}
131
+ if startuur < 7 && entry['evening']
132
+ date += 1
133
+ end
134
+ progdata['start'] = Time.local(date.year, date.month, date.day, startuur, startmin)
135
+ progdata['stop'] = Time.local(date.year, date.month, date.day, stopuur, stopmin)
136
+ if progdata['stop'] < progdata['start']
137
+ progdata['stop'] += Dag
138
+ end
139
+ date_stats(chan_id, progdata['start'])
140
+ # puts progdata['stop'], progdata['start']
141
+ if entry['desc'].index("\t\t\t\t>") || entry['desc'].empty? && ! entry['desc-s'].empty?
142
+ progdata['desc'] = entry['desc-s']
143
+ end
144
+ if entry['info']
145
+ # puts entry['info']
146
+ details = entry['info'].split(';').map{|x| x.strip}
147
+ role = nil
148
+ details.each do |det|
149
+ year = nil
150
+ case det[0,2]
151
+ when 'R:'
152
+ role = 'director'
153
+ det = det[2..-1].strip
154
+ when 'D:'
155
+ role = 'actor'
156
+ det = det[2..-1]
157
+ when 'O:'
158
+ year = det[-5..-1].to_i
159
+ else
160
+ year = det[-5..-1].to_i
161
+ end
162
+ if year && year > 1900
163
+ progdata['date'] = year.to_s
164
+ elsif role
165
+ (progdata['credits'][role] ||= Array.new) << det.gsub(/\(.*\)/,'').strip
166
+ end
167
+ end
168
+ progdata['audio']['stereo'] = 'stereo' if entry['Stereo']
169
+ progdata['subtitles']['type'] = 'teletext' if entry['Untertitel']
170
+ progdata['video']['aspect'] = '16:9' if entry['Breitbild']
171
+ end
172
+
173
+ progdata_array << progdata
174
+ rescue StandardError => exc
175
+ STDERR.puts exc, exc.message, exc.backtrace
176
+ PP.pp(entry, STDERR)
177
+ raise
178
+ end
179
+ end
180
+ progdata_array
181
+ end
182
+ end
183
+
184
+ end
185
+ XMLTV::TvtodayGrabber.new.run