xmltv 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,133 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ autoload :XMLTV, 'xmltv/xmltv'
4
+
5
+
6
+ module XMLTV
7
+
8
+ class RtGrabber < Grabber
9
+ Fields = %w{
10
+ title sub_title episode date director cast
11
+ premiere film repeat subtitles widescreen
12
+ new_series deaf_signed black_and_white star_rating
13
+ certificate category desc choice datum start stop
14
+ duration_mins
15
+ }
16
+ TimeDiff = 60 * 60
17
+ EncodingErrors = {
18
+ /\303[\306\346\341]/ => "'",
19
+ /\310\355/ => "\303\241",
20
+ /\310\321/ => "\303\245"
21
+ }
22
+ def version
23
+ [ "RtGrabber 0.9", super].join("\n")
24
+ end
25
+
26
+ def base_url
27
+ 'http://xmltv.radiotimes.com/xmltv'
28
+ end
29
+
30
+ def fetch_all_channels
31
+ chn = open("#{base_url}/channels.dat") {|h| h.readlines }
32
+ channels = Hash.new
33
+ chn.each do |c|
34
+ chan_id, name = c.chomp.split('|')
35
+ channels[chan_id] = name
36
+ end
37
+ save_object(channels, channel_list)
38
+ channels
39
+ end
40
+
41
+ def lang
42
+ 'en'
43
+ end
44
+
45
+
46
+ ### Must Implement
47
+
48
+
49
+ def grab_channel(chan_id)
50
+ uri = "#{base_url}/#{chan_id}.dat"
51
+ # uri = '/home/han/47.dat'
52
+ # uri = '/var/tmp/xmltv/102.dat'
53
+ info = open(uri) {|h| h.readlines }.map{|x| x.chomp}
54
+ @hits += 1
55
+ @channel_info = Array.new
56
+ info.each do |program|
57
+ begin
58
+ prog = program.split('~')
59
+ entry = Hash.new
60
+ Fields.each_with_index do |el, i |
61
+ item = prog[i]
62
+ next if item == 'false' || item.empty?
63
+ begin
64
+ entry[el] = item.ck_utf
65
+ rescue Iconv::IllegalSequence => e
66
+ ret = false
67
+ EncodingErrors.each_pair do |k, v|
68
+ # STDERR.puts "Try: #{k.dump} #{v}\n#{item.dump}\n"
69
+ if item.gsub!(k, v)
70
+ # STDERR.puts "====> #{item.dump}\n"
71
+ ret = true
72
+ break
73
+ end
74
+ # STDERR.puts 'failed'
75
+ end
76
+ retry if ret
77
+ prt = (e.failed =~ /[[:print:]]/)
78
+ failstring = e.failed[0 ... prt]
79
+ STDERR.puts "Repl: #{failstring.dump}"
80
+ EncodingErrors[failstring] = '?'
81
+ retry if item.gsub!(e.failed[0 ... prt], '?')
82
+ raise "Encoding error: #{e.failed}\n"
83
+ end
84
+
85
+ end
86
+ # puts entry['cast'], entry['start'], entry['stop']
87
+ @channel_info << entry
88
+ rescue StandardError => exc
89
+ STDERR.puts exc.class, exc.message, exc.backtrace
90
+ PP.pp prog, STDERR
91
+ raise
92
+ end
93
+ end
94
+ info.size
95
+ end
96
+
97
+ def transform(chan_id)
98
+ progdata_array = Array.new
99
+ @channel_info.each do |p|
100
+ progdata_array << ( progdata = proghash(p, chan_id))
101
+ dag, maand, jaar = p['datum'].scan(/\d+/).map{|x| x.to_i}
102
+ startuur, startmin = p['start'].scan(/\d+/).map{|x| x.to_i}
103
+ stopuur, stopmin = p['stop'].scan(/\d+/).map{|x| x.to_i}
104
+ progdata['start'] = Time.local(jaar, maand, dag, startuur, startmin) + TimeDiff
105
+ date_stats(chan_id, progdata['start'])
106
+ progdata['stop'] = Time.local(jaar, maand, dag, stopuur, stopmin) + TimeDiff
107
+ progdata['stop'] += Dag if progdata['stop'] < progdata['start']
108
+ progdata['credits']['director'] = p['director'] if p['director']
109
+ progdata['video']['colour'] = 'no' if p['black_and_white']
110
+ progdata['video']['aspect'] = '16:9' if p['widescreen']
111
+ progdata['sub-title'] = p['episode'] || p['sub_title'] || ''
112
+ progdata['category'] = 'Film' if p['film']
113
+ progdata['subtitles']['type'] = 'teletext' if p['subtitles']
114
+ progdata['star-rating']['value'] = "#{p['star_rating']}/5" if p['star_rating']
115
+ if (cast = p['cast'])
116
+ progdata['credits']['actor'] =
117
+ cast.index('|') ?
118
+ cast.split('|').map {|x| x.gsub(/^.*[*]/,'') } :
119
+ cast.split(',')
120
+ end
121
+ %w{ widescreen black_and_white }.each do |it|
122
+ # STDERR.puts "#{it}: #{p[it]}"
123
+ end
124
+ # pp progdata
125
+ # pp p
126
+
127
+ end
128
+ progdata_array
129
+ end
130
+ end
131
+ end
132
+ XMLTV::RtGrabber.new.run
133
+
@@ -0,0 +1,33 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ autoload :XMLTV, 'xmltv/xmltv'
4
+
5
+
6
+ module XMLTV
7
+
8
+ class TrivialGrabber < Grabber
9
+ # Must implement fetch_all_channels or define channel_list as nil
10
+ # Must return something that respond_to? []
11
+ def fetch_all_channels
12
+ { "1" => "Channel" }
13
+ end
14
+ # grab_channel returns number of programs found
15
+ def grab_channel(chan_id)
16
+ 1
17
+ end
18
+ # transform returns an array of special hashes (obtained with proghash)
19
+ #
20
+ def transform(chan_id)
21
+ progdata_array = Array.new
22
+ progdata = proghash({}, chan_id)
23
+ progdata['start'] = Time.new
24
+ progdata['stop'] = Time.new + 60 * 60
25
+ progdata['title'] = 'Title'
26
+ progdata_array << progdata
27
+
28
+ progdata_array
29
+ end
30
+ end
31
+
32
+ end
33
+ XMLTV::TrivialGrabber.new.run
@@ -0,0 +1,224 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ autoload :XMLTV, 'xmltv/xmltv'
4
+ require 'cgi'
5
+
6
+
7
+
8
+ module XMLTV
9
+
10
+ class TvgidsGrabber < Grabber
11
+ Cattrans = {
12
+ 'amusement' => 'Talk',
13
+ 'animatie' => 'Animated',
14
+ 'comedy' => 'Comedy',
15
+ 'documentaire' => 'Documentary',
16
+ 'educatief' => 'Educational',
17
+ 'erotiek' => 'Adult',
18
+ 'film' => 'Movies',
19
+ 'muziek' => 'Art/Music',
20
+ 'informatief' => 'Educational',
21
+ 'jeugd' => 'Children',
22
+ 'kunst/cultuur' => 'Arts/Culture',
23
+ 'misdaad' => 'Crime/Mystery',
24
+ 'muziek' => 'Music',
25
+ 'natuur' => 'Science/Nature',
26
+ 'nieuws/actualiteiten' => 'News',
27
+ 'overige' => 'Unknown',
28
+ 'religieus' => 'Religion',
29
+ 'serie/soap' => 'Drama',
30
+ 'sport' => 'Sports',
31
+ 'theater' => 'Arts/Culture',
32
+ 'wetenschap' => 'Science/Nature'
33
+ }
34
+ Roletrans = {
35
+ 'regie' => 'director',
36
+ 'acteurs' => 'actor',
37
+ 'presentatie' => 'presenter',
38
+ 'scenario' => 'writer'
39
+ }
40
+
41
+ Titeltrans = {
42
+ 'titel aflevering' => 'sub-title',
43
+ 'jaar van premiere' => 'date',
44
+ 'aflevering' => 'episode-num'
45
+ }
46
+
47
+ def grab_detail(href)
48
+ if href[0] == ?/
49
+ href="#{base_url}#{href}"
50
+ end
51
+ STDERR.puts "#{Time.now}: #{href} #{@channelhash.size}" if XmltvOptions.verbose
52
+ program = Hash.new
53
+ details = fetch(href)
54
+ desc = []
55
+ details.at('//table#progDetail').search('//tr//p').each do |p|
56
+ break if p['class'] == 'meerLinks'
57
+ line = p.inner_text.strip
58
+ desc << line unless line.empty?
59
+ end
60
+ program['desc'] = desc.join(' ').to_utf
61
+ details.search('//div#progPropt//tr/th').each do |pg|
62
+ content = pg.at('../td')
63
+ if content['class'] == 'personen'
64
+ rsl = content.at('div').search('.').find_all { |x| x.text? }.map{|x| x.to_s.strip.to_utf}.find_all{|x| ! x.empty?}
65
+ else
66
+ rsl = content.inner_text.strip.to_utf
67
+ end
68
+ program[pg.inner_text.strip.gsub(':','').downcase] = rsl
69
+ end
70
+ # PP.pp program, STDERR
71
+ program
72
+ end
73
+
74
+ def channel_url(chan_id)
75
+ "#{base_url}//zoeken/?periode=9&station=#{chan_id}"
76
+ end
77
+
78
+ def fetch_all_channels
79
+ page = fetch(channel_url(1))
80
+ channels = Hash.new
81
+ page.search('//optgroup')[0..1].each do |og|
82
+ og.search('/option').each do |g|
83
+ channels[g['value']] = g.inner_text
84
+ end
85
+ end
86
+ save_object(channels, channel_list)
87
+ channels
88
+ end
89
+
90
+ def clean_cache(cache)
91
+ count = 0
92
+ cache.delete_if do |dt, en|
93
+ rsl = (Date.dutch(en['datum']) < Vandaag)
94
+ # puts Date.dutch(en['datum']), Vandaag, rsl, '==='
95
+ count += 1 if rsl
96
+ rsl
97
+ end
98
+ count
99
+ end
100
+
101
+ def grab_channel(chan_id)
102
+ url = channel_url(chan_id)
103
+ page = fetch(url)
104
+ @channelhash = load_cachefile(chan_id)
105
+ # get_file(chan_id)
106
+ period = datum = nil
107
+ fetched = 0
108
+ begin
109
+ found = remaining = page.at("//table.overzicht//tr//td/strong").inner_text.to_i
110
+ rescue NoMethodError
111
+ niks = page.at("//div#resultaten").at("//td").inner_text
112
+ STDERR.puts url, niks
113
+ return
114
+ end
115
+ # STDERR.puts("#{found} on site, #{@channelhash.size} in cache")
116
+ page.search("//table.overzicht//tr").each do |pg|
117
+ td = pg.at('td')
118
+ next if td.nil? || pg['class'] == 'zoekstring'
119
+ if td['class'] == 'bloktitel'
120
+ period = td.at('h5').inner_text rescue period
121
+ datum = td.at('h4').inner_text rescue datum
122
+ next
123
+ end
124
+ # puts "Period: #{period} "
125
+ if (tijd = pg.at('/th').inner_text) =~ /\d\d:\d\d/
126
+ det = pg.at('/td//a')
127
+ href = det['href']
128
+ id = href[/ID=(\d+)/,1]
129
+ # puts @channelhash[id]
130
+ remaining -= 1
131
+ next if @channelhash[id]
132
+ fetched += 1
133
+ begin
134
+ @channelhash[id] = program = grab_detail(href)
135
+ rescue
136
+ STDERR.puts href, pg, '====='
137
+ raise
138
+ end
139
+ program['title'] = det.inner_text.strip.to_utf
140
+ program['period'] = period
141
+ program['datum'] = datum
142
+ program['tijd'] = tijd
143
+ program['progtip'] = '4/5' if pg['class'] == 'progTip'
144
+ end
145
+ end
146
+ STDERR.puts "Something wrong remaining: #{remaining}" if remaining != 0
147
+ save_object(@channelhash, cachefile(chan_id)) if fetched > 0
148
+ found
149
+ end
150
+
151
+ def parse_times(str)
152
+ rsl = nil
153
+ md = /(\d+)\s(\w+)\s(\d+),\s(\d+):(\d+)/.match(str)
154
+ if md
155
+ rsl = md.captures.map do |x|
156
+ x =~ /\d/ ? x.to_i : Date::Maanden.index(x)
157
+ end
158
+ mdstop = /(\d+):(\d+)/.match(str[md.offset(0)[1]..-1])
159
+ if mdstop
160
+ rsl << mdstop.captures.map {|x| x.to_i}
161
+ end
162
+ rsl.flatten!
163
+ end
164
+ rsl
165
+ end
166
+ def transform(chan_id)
167
+ # get_file(chan_id)
168
+ # STDERR.print "#{chan_id} #{@channelhash.size}"
169
+ progdata_array = Array.new
170
+ @channelhash.each_pair do |id, entry|
171
+ begin
172
+ progdata = proghash(entry, chan_id)
173
+ a=entry['datum en tijdstip']
174
+ shift = entry['period'] == 'Nacht' ? Dag : 0
175
+ dag, maand, jaar, startuur, startmin, stopuur, stopmin = parse_times(a)
176
+ next if dag.nil?
177
+ progdata['start'] = start = Time.local(jaar, maand, dag, startuur, startmin) + shift
178
+
179
+ if stopuur
180
+ stop = Time.local(jaar, maand, dag, stopuur, stopmin) + shift
181
+ if start > stop && start.hour >= 21 && stop.hour <= 7
182
+ stop += Dag
183
+ end
184
+ progdata['stop'] = stop
185
+ end
186
+ date_stats(chan_id, progdata['start'])
187
+ if (b = entry['bijzonderheden'])
188
+ b.downcase.split(',').each do |bijz|
189
+ case bijz
190
+ when /breedbeeld/
191
+ progdata['video']['aspect'] = '16:9'
192
+ when /zwart/
193
+ progdata['video']['colour'] = 'no'
194
+ when /teletekst/
195
+ progdata['subtitles']['type'] = 'teletext'
196
+ when /stereo/
197
+ progdata['audio']['stereo'] = 'stereo'
198
+ end
199
+ end
200
+ end
201
+ %w{ regie acteurs scenario presentatie }.each do |role|
202
+ if entry[role]
203
+ progdata['credits'][Roletrans[role]] = entry[role]
204
+ end
205
+ end
206
+ progdata['category'] = Cattrans[entry['genre'].downcase] if entry['genre']
207
+ progdata['star-rating']['value'] = entry['progtip'] if entry['progtip']
208
+ Titeltrans.each do |key|
209
+ progdata[Titeltrans[key]] = entry[key] if entry[key]
210
+ end
211
+ progdata_array << progdata
212
+
213
+ rescue StandardError => exc
214
+ STDERR.puts exc, exc.message, exc.backtrace
215
+ PP.pp(entry, STDERR)
216
+ raise
217
+ end
218
+ end
219
+ progdata_array
220
+ end
221
+ end
222
+
223
+ end
224
+ XMLTV::TvgidsGrabber.new.run
@@ -0,0 +1,185 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ autoload :XMLTV, 'xmltv/xmltv'
4
+ require 'date'
5
+
6
+ class Date
7
+ def german
8
+ "#{day}.#{month}.#{year}"
9
+ end
10
+ end
11
+
12
+ module XMLTV
13
+ class TvtodayGrabber < Grabber
14
+ # Must implement fetch_all_channels or define channel_list as nil
15
+ # Must returns something that respond_to? []
16
+ def lang
17
+ 'de'
18
+ end
19
+ def fetch_all_channels
20
+ puts base_url
21
+ url = @base_url
22
+ page = fetch(url)
23
+ channels = Hash.new
24
+ page.at('//select[@name=channel]').search('//option').each do |op|
25
+ val = op['value']
26
+ next if val.index('gruppeId') || val.empty?
27
+ channels[val] = op.inner_text.strip.to_utf
28
+ end
29
+ save_object(channels, channel_list)
30
+ channels
31
+ end
32
+ def grab_detail(href)
33
+ program = Hash.new
34
+ if href[0] == ?/
35
+ href="#{base_url}#{href}"
36
+ end
37
+ # puts href
38
+ page = fetch(href)
39
+ info = page.at('span.tv-titel-popup').parent
40
+ # puts info.at('span.tv-titel-popup').inner_text.to_utf
41
+ program['desc-s'] = info.at('span.tv-vorspann-popup').inner_text.to_utf
42
+ page.at('td.tv-sendung-info').each_child do |el|
43
+ if el.is_a? Hpricot::Text
44
+ program['desc'] = el.inner_text.to_utf
45
+ break
46
+ end
47
+ end
48
+ credits = page.at('span.tv-credits')
49
+ if credits
50
+ program['info'] = credits.parent.inner_text.to_utf
51
+ end
52
+ categ = info.at('span.tv-auszeichnung')
53
+ if categ
54
+ program['kind'] = categ.inner_text.to_utf
55
+ end
56
+ info.search('img').each do |img|
57
+ program[img['title']] = true
58
+ end
59
+ program
60
+ end
61
+
62
+ def clean_cache(cache)
63
+ count = 0
64
+ cache.delete_if do |dt, en|
65
+ rsl = (Date.parse(en['datum']) < Vandaag)
66
+ count += 1 if rsl
67
+ rsl
68
+ end
69
+ count
70
+ end
71
+
72
+ def day_url(chan_id, day)
73
+ "#{base_url}/program2007?format=genre&offset=0&date=#{day.german}&slotIndex=all&channel=#{chan_id}&order=time"
74
+ end
75
+ # grab_channel return chan_idber of programs found
76
+ def grab_channel(chan_id)
77
+ fetched = found = 0
78
+ @channelhash = load_cachefile(chan_id)
79
+ now = Date.today
80
+ (now .. now + 14).each do |date|
81
+ url = day_url(chan_id, date)
82
+ # puts url
83
+ # url = '/home/han/program2007'
84
+ page = fetch(url)
85
+ avond = false
86
+ page.search('div#program_complete//tr').each do |prog|
87
+ # puts prog
88
+
89
+ times = prog.at('span.tv-sendung-uhrzeit')
90
+ next unless times
91
+ # puts times.inner_text
92
+ titel = prog.at("td/a.tv-sendung-titel")
93
+ id = titel['href'][/detailPopup\('(\d+)'/, 1]
94
+ href = "/program2007?format=detail&sid=#{id}"
95
+ # puts "#{id}: #{@channelhash[id]}"
96
+ found += 1
97
+ next if @channelhash[id]
98
+ begin
99
+ @channelhash[id] = program = grab_detail(href)
100
+ fetched += 1
101
+ rescue
102
+ STDERR.puts href, page, '====='
103
+ raise
104
+ end
105
+ times = times.inner_text
106
+ start = times.to_i
107
+ avond = true if start > 17
108
+ program['times'] = times
109
+ program['title'] = titel.inner_text.to_utf
110
+ program['datum'] = date.to_s
111
+ program['evening'] = avond
112
+
113
+ # exit
114
+ end
115
+ end
116
+ save_object(@channelhash, cachefile(chan_id)) if fetched > 0
117
+ found
118
+ end
119
+ # transform returns an array of special hashes (obtained with proghash)
120
+ #
121
+ def transform(chan_id)
122
+ progdata_array = Array.new
123
+ @channelhash.each_pair do |id, entry|
124
+ begin
125
+ progdata = proghash(entry, chan_id)
126
+ # pp progdata
127
+ # pp entry
128
+ start, stop = entry['times'].split('-')
129
+ date = Date.parse(entry['datum'])
130
+ startuur, startmin, stopuur, stopmin = entry['times'].split(/[-.]/).map{|x| x.to_i}
131
+ if startuur < 7 && entry['evening']
132
+ date += 1
133
+ end
134
+ progdata['start'] = Time.local(date.year, date.month, date.day, startuur, startmin)
135
+ progdata['stop'] = Time.local(date.year, date.month, date.day, stopuur, stopmin)
136
+ if progdata['stop'] < progdata['start']
137
+ progdata['stop'] += Dag
138
+ end
139
+ date_stats(chan_id, progdata['start'])
140
+ # puts progdata['stop'], progdata['start']
141
+ if entry['desc'].index("\t\t\t\t>") || entry['desc'].empty? && ! entry['desc-s'].empty?
142
+ progdata['desc'] = entry['desc-s']
143
+ end
144
+ if entry['info']
145
+ # puts entry['info']
146
+ details = entry['info'].split(';').map{|x| x.strip}
147
+ role = nil
148
+ details.each do |det|
149
+ year = nil
150
+ case det[0,2]
151
+ when 'R:'
152
+ role = 'director'
153
+ det = det[2..-1].strip
154
+ when 'D:'
155
+ role = 'actor'
156
+ det = det[2..-1]
157
+ when 'O:'
158
+ year = det[-5..-1].to_i
159
+ else
160
+ year = det[-5..-1].to_i
161
+ end
162
+ if year && year > 1900
163
+ progdata['date'] = year.to_s
164
+ elsif role
165
+ (progdata['credits'][role] ||= Array.new) << det.gsub(/\(.*\)/,'').strip
166
+ end
167
+ end
168
+ progdata['audio']['stereo'] = 'stereo' if entry['Stereo']
169
+ progdata['subtitles']['type'] = 'teletext' if entry['Untertitel']
170
+ progdata['video']['aspect'] = '16:9' if entry['Breitbild']
171
+ end
172
+
173
+ progdata_array << progdata
174
+ rescue StandardError => exc
175
+ STDERR.puts exc, exc.message, exc.backtrace
176
+ PP.pp(entry, STDERR)
177
+ raise
178
+ end
179
+ end
180
+ progdata_array
181
+ end
182
+ end
183
+
184
+ end
185
+ XMLTV::TvtodayGrabber.new.run