xmltv 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +5 -0
- data/Manifest.txt +17 -0
- data/README.txt +123 -0
- data/Rakefile +18 -0
- data/bin/xmltv +13 -0
- data/lib/xmltv/sample/dumpids.rb +15 -0
- data/lib/xmltv/sample/mythtv_chns.yaml +183 -0
- data/lib/xmltv/sample/sample_output +73 -0
- data/lib/xmltv/sample/tvcat_spoolfiles.rb +29 -0
- data/lib/xmltv/sites/film1.rb +100 -0
- data/lib/xmltv/sites/rt.rb +133 -0
- data/lib/xmltv/sites/trivial.rb +33 -0
- data/lib/xmltv/sites/tvgids.rb +224 -0
- data/lib/xmltv/sites/tvtoday.rb +185 -0
- data/lib/xmltv/sites/upc.rb +157 -0
- data/lib/xmltv/sites/vpro.rb +122 -0
- data/lib/xmltv/xmltv.rb +737 -0
- metadata +89 -0
| @@ -0,0 +1,133 @@ | |
| 1 | 
            +
            #!/usr/bin/ruby -w
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            autoload :XMLTV, 'xmltv/xmltv'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
             | 
| 6 | 
            +
            module XMLTV
         | 
| 7 | 
            +
             | 
| 8 | 
            +
              class RtGrabber < Grabber
         | 
| 9 | 
            +
                Fields = %w{ 
         | 
| 10 | 
            +
                  title sub_title episode date director cast
         | 
| 11 | 
            +
                  premiere film repeat subtitles widescreen
         | 
| 12 | 
            +
                  new_series deaf_signed black_and_white star_rating
         | 
| 13 | 
            +
                  certificate category desc choice datum start stop
         | 
| 14 | 
            +
                  duration_mins
         | 
| 15 | 
            +
                }
         | 
| 16 | 
            +
                TimeDiff = 60 * 60
         | 
| 17 | 
            +
                EncodingErrors = {
         | 
| 18 | 
            +
                  /\303[\306\346\341]/ => "'",
         | 
| 19 | 
            +
                  /\310\355/ => "\303\241",
         | 
| 20 | 
            +
                  /\310\321/ => "\303\245"
         | 
| 21 | 
            +
                }
         | 
| 22 | 
            +
                def version
         | 
| 23 | 
            +
                  [ "RtGrabber 0.9", super].join("\n")
         | 
| 24 | 
            +
                end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                def base_url
         | 
| 27 | 
            +
                  'http://xmltv.radiotimes.com/xmltv'
         | 
| 28 | 
            +
                end
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                def fetch_all_channels
         | 
| 31 | 
            +
                  chn = open("#{base_url}/channels.dat") {|h| h.readlines }
         | 
| 32 | 
            +
                  channels = Hash.new
         | 
| 33 | 
            +
                  chn.each do |c|
         | 
| 34 | 
            +
                    chan_id, name = c.chomp.split('|')
         | 
| 35 | 
            +
                    channels[chan_id] = name
         | 
| 36 | 
            +
                  end
         | 
| 37 | 
            +
                  save_object(channels, channel_list)
         | 
| 38 | 
            +
                  channels
         | 
| 39 | 
            +
                end
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                def lang
         | 
| 42 | 
            +
                  'en'
         | 
| 43 | 
            +
                end
         | 
| 44 | 
            +
             | 
| 45 | 
            +
             | 
| 46 | 
            +
              ###  Must Implement
         | 
| 47 | 
            +
             | 
| 48 | 
            +
             | 
| 49 | 
            +
                def grab_channel(chan_id)
         | 
| 50 | 
            +
                  uri = "#{base_url}/#{chan_id}.dat"
         | 
| 51 | 
            +
              #    uri = '/home/han/47.dat'
         | 
| 52 | 
            +
              #    uri = '/var/tmp/xmltv/102.dat'
         | 
| 53 | 
            +
                  info = open(uri) {|h| h.readlines }.map{|x| x.chomp}
         | 
| 54 | 
            +
                  @hits += 1
         | 
| 55 | 
            +
                  @channel_info = Array.new
         | 
| 56 | 
            +
                  info.each do |program|
         | 
| 57 | 
            +
                    begin
         | 
| 58 | 
            +
                      prog = program.split('~')
         | 
| 59 | 
            +
                      entry = Hash.new
         | 
| 60 | 
            +
                      Fields.each_with_index do |el, i |
         | 
| 61 | 
            +
                        item = prog[i]
         | 
| 62 | 
            +
                        next if item == 'false' || item.empty?
         | 
| 63 | 
            +
                        begin
         | 
| 64 | 
            +
                          entry[el] = item.ck_utf
         | 
| 65 | 
            +
                        rescue Iconv::IllegalSequence => e
         | 
| 66 | 
            +
                          ret = false
         | 
| 67 | 
            +
                          EncodingErrors.each_pair do |k, v|
         | 
| 68 | 
            +
              #              STDERR.puts "Try: #{k.dump} #{v}\n#{item.dump}\n"
         | 
| 69 | 
            +
                            if item.gsub!(k, v)
         | 
| 70 | 
            +
              #                STDERR.puts "====> #{item.dump}\n"
         | 
| 71 | 
            +
                              ret = true
         | 
| 72 | 
            +
                              break
         | 
| 73 | 
            +
                            end
         | 
| 74 | 
            +
              #              STDERR.puts 'failed'
         | 
| 75 | 
            +
                          end
         | 
| 76 | 
            +
                          retry if ret
         | 
| 77 | 
            +
                          prt = (e.failed =~ /[[:print:]]/)
         | 
| 78 | 
            +
                          failstring = e.failed[0 ... prt]
         | 
| 79 | 
            +
                          STDERR.puts "Repl: #{failstring.dump}"
         | 
| 80 | 
            +
                          EncodingErrors[failstring] = '?'
         | 
| 81 | 
            +
                          retry if item.gsub!(e.failed[0 ... prt], '?') 
         | 
| 82 | 
            +
                          raise "Encoding error: #{e.failed}\n"
         | 
| 83 | 
            +
                        end 
         | 
| 84 | 
            +
             | 
| 85 | 
            +
                      end
         | 
| 86 | 
            +
                #      puts entry['cast'], entry['start'], entry['stop']
         | 
| 87 | 
            +
                       @channel_info << entry
         | 
| 88 | 
            +
                    rescue StandardError => exc
         | 
| 89 | 
            +
                      STDERR.puts exc.class, exc.message, exc.backtrace
         | 
| 90 | 
            +
                      PP.pp prog, STDERR
         | 
| 91 | 
            +
                      raise
         | 
| 92 | 
            +
                    end
         | 
| 93 | 
            +
                  end
         | 
| 94 | 
            +
                  info.size
         | 
| 95 | 
            +
                end
         | 
| 96 | 
            +
             | 
| 97 | 
            +
                def transform(chan_id)
         | 
| 98 | 
            +
                  progdata_array = Array.new
         | 
| 99 | 
            +
                  @channel_info.each do |p|
         | 
| 100 | 
            +
                    progdata_array << ( progdata = proghash(p, chan_id))
         | 
| 101 | 
            +
                    dag, maand, jaar = p['datum'].scan(/\d+/).map{|x| x.to_i}
         | 
| 102 | 
            +
                    startuur, startmin = p['start'].scan(/\d+/).map{|x| x.to_i}
         | 
| 103 | 
            +
                    stopuur, stopmin = p['stop'].scan(/\d+/).map{|x| x.to_i}
         | 
| 104 | 
            +
                    progdata['start']  = Time.local(jaar, maand, dag, startuur, startmin) + TimeDiff
         | 
| 105 | 
            +
                    date_stats(chan_id, progdata['start'])
         | 
| 106 | 
            +
                    progdata['stop'] = Time.local(jaar, maand, dag, stopuur, stopmin) + TimeDiff
         | 
| 107 | 
            +
                    progdata['stop'] += Dag if progdata['stop'] < progdata['start']
         | 
| 108 | 
            +
                    progdata['credits']['director'] = p['director'] if p['director']
         | 
| 109 | 
            +
                    progdata['video']['colour'] = 'no' if p['black_and_white']
         | 
| 110 | 
            +
                    progdata['video']['aspect'] = '16:9' if p['widescreen']
         | 
| 111 | 
            +
                    progdata['sub-title'] = p['episode'] || p['sub_title'] || ''
         | 
| 112 | 
            +
                    progdata['category'] = 'Film' if p['film']
         | 
| 113 | 
            +
                    progdata['subtitles']['type'] = 'teletext' if p['subtitles']
         | 
| 114 | 
            +
                    progdata['star-rating']['value'] = "#{p['star_rating']}/5" if p['star_rating']
         | 
| 115 | 
            +
                    if (cast = p['cast'])
         | 
| 116 | 
            +
                      progdata['credits']['actor'] = 
         | 
| 117 | 
            +
                        cast.index('|') ?
         | 
| 118 | 
            +
                          cast.split('|').map {|x| x.gsub(/^.*[*]/,'') } :
         | 
| 119 | 
            +
                          cast.split(',')
         | 
| 120 | 
            +
                    end
         | 
| 121 | 
            +
                    %w{ widescreen black_and_white }.each do |it|
         | 
| 122 | 
            +
              #        STDERR.puts "#{it}: #{p[it]}"
         | 
| 123 | 
            +
                    end
         | 
| 124 | 
            +
                   # pp progdata
         | 
| 125 | 
            +
                   # pp p
         | 
| 126 | 
            +
             | 
| 127 | 
            +
                  end
         | 
| 128 | 
            +
                  progdata_array
         | 
| 129 | 
            +
                end
         | 
| 130 | 
            +
              end
         | 
| 131 | 
            +
            end
         | 
| 132 | 
            +
            XMLTV::RtGrabber.new.run
         | 
| 133 | 
            +
             | 
| @@ -0,0 +1,33 @@ | |
| 1 | 
            +
            #!/usr/bin/ruby -w
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            autoload :XMLTV, 'xmltv/xmltv'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
             | 
| 6 | 
            +
            module XMLTV
         | 
| 7 | 
            +
             | 
| 8 | 
            +
              class TrivialGrabber < Grabber
         | 
| 9 | 
            +
                # Must implement fetch_all_channels or define channel_list as nil
         | 
| 10 | 
            +
                # Must return something that respond_to? []
         | 
| 11 | 
            +
                def fetch_all_channels
         | 
| 12 | 
            +
                  { "1" => "Channel" }
         | 
| 13 | 
            +
                end
         | 
| 14 | 
            +
                # grab_channel returns number of programs found
         | 
| 15 | 
            +
                def grab_channel(chan_id)
         | 
| 16 | 
            +
                  1
         | 
| 17 | 
            +
                end
         | 
| 18 | 
            +
                # transform returns an array of special hashes (obtained with proghash)
         | 
| 19 | 
            +
                # 
         | 
| 20 | 
            +
                def transform(chan_id)
         | 
| 21 | 
            +
                  progdata_array = Array.new
         | 
| 22 | 
            +
                  progdata = proghash({}, chan_id)
         | 
| 23 | 
            +
                  progdata['start'] = Time.new
         | 
| 24 | 
            +
                  progdata['stop'] = Time.new + 60 * 60
         | 
| 25 | 
            +
                  progdata['title'] = 'Title'
         | 
| 26 | 
            +
                  progdata_array << progdata
         | 
| 27 | 
            +
                    
         | 
| 28 | 
            +
                  progdata_array
         | 
| 29 | 
            +
                end
         | 
| 30 | 
            +
              end
         | 
| 31 | 
            +
             | 
| 32 | 
            +
            end
         | 
| 33 | 
            +
            XMLTV::TrivialGrabber.new.run
         | 
| @@ -0,0 +1,224 @@ | |
| 1 | 
            +
            #!/usr/bin/ruby -w
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            autoload :XMLTV, 'xmltv/xmltv'
         | 
| 4 | 
            +
            require 'cgi'
         | 
| 5 | 
            +
             | 
| 6 | 
            +
             | 
| 7 | 
            +
             | 
| 8 | 
            +
            module XMLTV
         | 
| 9 | 
            +
             | 
| 10 | 
            +
              class TvgidsGrabber < Grabber
         | 
| 11 | 
            +
                Cattrans = { 
         | 
| 12 | 
            +
                  'amusement'        => 'Talk',
         | 
| 13 | 
            +
                  'animatie'         => 'Animated',
         | 
| 14 | 
            +
                  'comedy'           => 'Comedy',
         | 
| 15 | 
            +
                  'documentaire'     => 'Documentary',
         | 
| 16 | 
            +
                  'educatief'        => 'Educational',
         | 
| 17 | 
            +
                  'erotiek'          => 'Adult',
         | 
| 18 | 
            +
                  'film'             => 'Movies',
         | 
| 19 | 
            +
                  'muziek'           => 'Art/Music',
         | 
| 20 | 
            +
                  'informatief'      => 'Educational',
         | 
| 21 | 
            +
                  'jeugd'            => 'Children',
         | 
| 22 | 
            +
                  'kunst/cultuur'    => 'Arts/Culture',
         | 
| 23 | 
            +
                  'misdaad'          => 'Crime/Mystery',
         | 
| 24 | 
            +
                  'muziek'           => 'Music',
         | 
| 25 | 
            +
                  'natuur'           => 'Science/Nature',
         | 
| 26 | 
            +
                  'nieuws/actualiteiten' => 'News',
         | 
| 27 | 
            +
                  'overige'          => 'Unknown',
         | 
| 28 | 
            +
                  'religieus'        => 'Religion',
         | 
| 29 | 
            +
                  'serie/soap'       => 'Drama',
         | 
| 30 | 
            +
                  'sport'            => 'Sports',
         | 
| 31 | 
            +
                  'theater'          => 'Arts/Culture',
         | 
| 32 | 
            +
                  'wetenschap'       => 'Science/Nature'
         | 
| 33 | 
            +
                }
         | 
| 34 | 
            +
                Roletrans = {
         | 
| 35 | 
            +
                  'regie'             => 'director',
         | 
| 36 | 
            +
                  'acteurs'           => 'actor',
         | 
| 37 | 
            +
                  'presentatie'       => 'presenter',
         | 
| 38 | 
            +
                  'scenario'          => 'writer'
         | 
| 39 | 
            +
                }
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                Titeltrans = {
         | 
| 42 | 
            +
                  'titel aflevering' => 'sub-title',
         | 
| 43 | 
            +
                  'jaar van premiere' => 'date',
         | 
| 44 | 
            +
                  'aflevering' => 'episode-num'
         | 
| 45 | 
            +
                }
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                def grab_detail(href)
         | 
| 48 | 
            +
                  if href[0] == ?/
         | 
| 49 | 
            +
                    href="#{base_url}#{href}"
         | 
| 50 | 
            +
                  end
         | 
| 51 | 
            +
                  STDERR.puts "#{Time.now}: #{href} #{@channelhash.size}" if XmltvOptions.verbose
         | 
| 52 | 
            +
                  program = Hash.new
         | 
| 53 | 
            +
                  details = fetch(href)
         | 
| 54 | 
            +
                  desc = []
         | 
| 55 | 
            +
                  details.at('//table#progDetail').search('//tr//p').each do |p|
         | 
| 56 | 
            +
                    break if p['class'] == 'meerLinks'
         | 
| 57 | 
            +
                    line = p.inner_text.strip
         | 
| 58 | 
            +
                    desc << line unless line.empty?
         | 
| 59 | 
            +
                  end
         | 
| 60 | 
            +
                  program['desc'] = desc.join(' ').to_utf
         | 
| 61 | 
            +
                  details.search('//div#progPropt//tr/th').each do |pg|
         | 
| 62 | 
            +
                    content = pg.at('../td')
         | 
| 63 | 
            +
                    if content['class'] == 'personen'
         | 
| 64 | 
            +
                      rsl = content.at('div').search('.').find_all { |x| x.text? }.map{|x| x.to_s.strip.to_utf}.find_all{|x| ! x.empty?}
         | 
| 65 | 
            +
                    else
         | 
| 66 | 
            +
                      rsl = content.inner_text.strip.to_utf
         | 
| 67 | 
            +
                    end
         | 
| 68 | 
            +
                    program[pg.inner_text.strip.gsub(':','').downcase] = rsl 
         | 
| 69 | 
            +
                  end
         | 
| 70 | 
            +
              #    PP.pp program, STDERR
         | 
| 71 | 
            +
                  program
         | 
| 72 | 
            +
                end
         | 
| 73 | 
            +
             | 
| 74 | 
            +
                def channel_url(chan_id)
         | 
| 75 | 
            +
                  "#{base_url}//zoeken/?periode=9&station=#{chan_id}"
         | 
| 76 | 
            +
                end
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                def fetch_all_channels
         | 
| 79 | 
            +
                  page = fetch(channel_url(1))
         | 
| 80 | 
            +
                  channels = Hash.new
         | 
| 81 | 
            +
                  page.search('//optgroup')[0..1].each do |og|
         | 
| 82 | 
            +
                    og.search('/option').each do |g|
         | 
| 83 | 
            +
                      channels[g['value']] = g.inner_text
         | 
| 84 | 
            +
                    end
         | 
| 85 | 
            +
                  end
         | 
| 86 | 
            +
                  save_object(channels, channel_list)
         | 
| 87 | 
            +
                  channels
         | 
| 88 | 
            +
                end
         | 
| 89 | 
            +
             | 
| 90 | 
            +
                def clean_cache(cache)
         | 
| 91 | 
            +
                  count = 0
         | 
| 92 | 
            +
                  cache.delete_if do |dt, en|
         | 
| 93 | 
            +
                    rsl = (Date.dutch(en['datum']) < Vandaag)
         | 
| 94 | 
            +
              #      puts Date.dutch(en['datum']), Vandaag, rsl, '==='
         | 
| 95 | 
            +
                    count += 1 if rsl
         | 
| 96 | 
            +
                    rsl
         | 
| 97 | 
            +
                  end
         | 
| 98 | 
            +
                  count
         | 
| 99 | 
            +
                end
         | 
| 100 | 
            +
             | 
| 101 | 
            +
                def grab_channel(chan_id)
         | 
| 102 | 
            +
                  url = channel_url(chan_id)
         | 
| 103 | 
            +
                  page = fetch(url)
         | 
| 104 | 
            +
                  @channelhash = load_cachefile(chan_id)
         | 
| 105 | 
            +
              #    get_file(chan_id)
         | 
| 106 | 
            +
                  period = datum = nil
         | 
| 107 | 
            +
                  fetched = 0
         | 
| 108 | 
            +
                  begin
         | 
| 109 | 
            +
                    found = remaining = page.at("//table.overzicht//tr//td/strong").inner_text.to_i
         | 
| 110 | 
            +
                  rescue NoMethodError
         | 
| 111 | 
            +
                    niks =  page.at("//div#resultaten").at("//td").inner_text
         | 
| 112 | 
            +
                    STDERR.puts url, niks
         | 
| 113 | 
            +
                    return
         | 
| 114 | 
            +
                  end
         | 
| 115 | 
            +
              #    STDERR.puts("#{found} on site, #{@channelhash.size} in cache")
         | 
| 116 | 
            +
                  page.search("//table.overzicht//tr").each do |pg|
         | 
| 117 | 
            +
                    td = pg.at('td')
         | 
| 118 | 
            +
                    next if td.nil? || pg['class'] == 'zoekstring'
         | 
| 119 | 
            +
                    if td['class'] == 'bloktitel'
         | 
| 120 | 
            +
                      period = td.at('h5').inner_text rescue period
         | 
| 121 | 
            +
                      datum = td.at('h4').inner_text rescue datum
         | 
| 122 | 
            +
                      next
         | 
| 123 | 
            +
                    end
         | 
| 124 | 
            +
              #      puts "Period: #{period} "
         | 
| 125 | 
            +
                    if (tijd = pg.at('/th').inner_text) =~ /\d\d:\d\d/
         | 
| 126 | 
            +
                      det =  pg.at('/td//a')
         | 
| 127 | 
            +
                      href = det['href']
         | 
| 128 | 
            +
                      id = href[/ID=(\d+)/,1]
         | 
| 129 | 
            +
              #        puts @channelhash[id]
         | 
| 130 | 
            +
                      remaining -= 1
         | 
| 131 | 
            +
                      next if @channelhash[id]
         | 
| 132 | 
            +
                      fetched += 1
         | 
| 133 | 
            +
                      begin
         | 
| 134 | 
            +
                        @channelhash[id] = program = grab_detail(href)
         | 
| 135 | 
            +
                      rescue
         | 
| 136 | 
            +
                        STDERR.puts href, pg, '====='
         | 
| 137 | 
            +
                        raise
         | 
| 138 | 
            +
                      end
         | 
| 139 | 
            +
                      program['title'] = det.inner_text.strip.to_utf
         | 
| 140 | 
            +
                      program['period'] = period
         | 
| 141 | 
            +
                      program['datum'] = datum
         | 
| 142 | 
            +
                      program['tijd'] = tijd
         | 
| 143 | 
            +
                      program['progtip'] = '4/5' if pg['class'] == 'progTip'
         | 
| 144 | 
            +
                    end
         | 
| 145 | 
            +
                  end
         | 
| 146 | 
            +
                  STDERR.puts "Something wrong remaining: #{remaining}" if remaining != 0
         | 
| 147 | 
            +
                  save_object(@channelhash, cachefile(chan_id)) if fetched > 0
         | 
| 148 | 
            +
                  found
         | 
| 149 | 
            +
                end
         | 
| 150 | 
            +
             | 
| 151 | 
            +
                def parse_times(str)
         | 
| 152 | 
            +
                  rsl = nil
         | 
| 153 | 
            +
                  md = /(\d+)\s(\w+)\s(\d+),\s(\d+):(\d+)/.match(str)
         | 
| 154 | 
            +
                  if md
         | 
| 155 | 
            +
                    rsl = md.captures.map do |x|
         | 
| 156 | 
            +
                        x =~ /\d/ ? x.to_i : Date::Maanden.index(x)
         | 
| 157 | 
            +
                    end
         | 
| 158 | 
            +
                    mdstop = /(\d+):(\d+)/.match(str[md.offset(0)[1]..-1])
         | 
| 159 | 
            +
                    if mdstop
         | 
| 160 | 
            +
                      rsl << mdstop.captures.map {|x| x.to_i}
         | 
| 161 | 
            +
                    end
         | 
| 162 | 
            +
                    rsl.flatten!
         | 
| 163 | 
            +
                  end
         | 
| 164 | 
            +
                  rsl
         | 
| 165 | 
            +
                end
         | 
| 166 | 
            +
                def transform(chan_id)
         | 
| 167 | 
            +
              #    get_file(chan_id)
         | 
| 168 | 
            +
              #    STDERR.print "#{chan_id} #{@channelhash.size}"
         | 
| 169 | 
            +
                  progdata_array = Array.new
         | 
| 170 | 
            +
                  @channelhash.each_pair do |id, entry|
         | 
| 171 | 
            +
                    begin
         | 
| 172 | 
            +
                      progdata = proghash(entry, chan_id)
         | 
| 173 | 
            +
                      a=entry['datum en tijdstip']
         | 
| 174 | 
            +
                      shift = entry['period'] == 'Nacht' ? Dag : 0
         | 
| 175 | 
            +
                      dag, maand, jaar, startuur, startmin, stopuur, stopmin = parse_times(a)
         | 
| 176 | 
            +
                      next if dag.nil?
         | 
| 177 | 
            +
                      progdata['start'] = start = Time.local(jaar, maand, dag, startuur, startmin) + shift
         | 
| 178 | 
            +
                      
         | 
| 179 | 
            +
                      if stopuur
         | 
| 180 | 
            +
                        stop = Time.local(jaar, maand, dag, stopuur, stopmin) + shift
         | 
| 181 | 
            +
                        if start > stop && start.hour >= 21 && stop.hour <= 7
         | 
| 182 | 
            +
                          stop += Dag
         | 
| 183 | 
            +
                        end
         | 
| 184 | 
            +
                        progdata['stop'] = stop
         | 
| 185 | 
            +
                      end
         | 
| 186 | 
            +
                      date_stats(chan_id, progdata['start'])
         | 
| 187 | 
            +
                      if (b = entry['bijzonderheden'])
         | 
| 188 | 
            +
                        b.downcase.split(',').each do |bijz|
         | 
| 189 | 
            +
                          case bijz
         | 
| 190 | 
            +
                            when /breedbeeld/
         | 
| 191 | 
            +
                              progdata['video']['aspect'] = '16:9'
         | 
| 192 | 
            +
                            when /zwart/
         | 
| 193 | 
            +
                              progdata['video']['colour'] = 'no'
         | 
| 194 | 
            +
                           when /teletekst/
         | 
| 195 | 
            +
                              progdata['subtitles']['type'] = 'teletext'
         | 
| 196 | 
            +
                            when /stereo/
         | 
| 197 | 
            +
                              progdata['audio']['stereo'] = 'stereo'
         | 
| 198 | 
            +
                          end
         | 
| 199 | 
            +
                        end
         | 
| 200 | 
            +
                      end
         | 
| 201 | 
            +
                      %w{ regie acteurs scenario presentatie }.each do |role|
         | 
| 202 | 
            +
                        if entry[role]
         | 
| 203 | 
            +
                          progdata['credits'][Roletrans[role]] = entry[role]
         | 
| 204 | 
            +
                        end
         | 
| 205 | 
            +
                      end
         | 
| 206 | 
            +
                      progdata['category'] = Cattrans[entry['genre'].downcase] if entry['genre']
         | 
| 207 | 
            +
                      progdata['star-rating']['value'] = entry['progtip'] if entry['progtip']
         | 
| 208 | 
            +
                      Titeltrans.each do |key|
         | 
| 209 | 
            +
                        progdata[Titeltrans[key]] = entry[key] if entry[key]
         | 
| 210 | 
            +
                      end
         | 
| 211 | 
            +
                      progdata_array << progdata
         | 
| 212 | 
            +
             | 
| 213 | 
            +
                    rescue StandardError => exc
         | 
| 214 | 
            +
                      STDERR.puts exc, exc.message, exc.backtrace
         | 
| 215 | 
            +
                      PP.pp(entry, STDERR)
         | 
| 216 | 
            +
                      raise
         | 
| 217 | 
            +
                    end
         | 
| 218 | 
            +
                  end
         | 
| 219 | 
            +
                  progdata_array
         | 
| 220 | 
            +
                end
         | 
| 221 | 
            +
              end
         | 
| 222 | 
            +
              
         | 
| 223 | 
            +
            end
         | 
| 224 | 
            +
            XMLTV::TvgidsGrabber.new.run
         | 
| @@ -0,0 +1,185 @@ | |
| 1 | 
            +
            #!/usr/bin/ruby -w
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            autoload :XMLTV, 'xmltv/xmltv'
         | 
| 4 | 
            +
            require 'date'
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            class Date
         | 
| 7 | 
            +
              def german
         | 
| 8 | 
            +
                "#{day}.#{month}.#{year}"
         | 
| 9 | 
            +
              end
         | 
| 10 | 
            +
            end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            module XMLTV
         | 
| 13 | 
            +
              class TvtodayGrabber < Grabber
         | 
| 14 | 
            +
                # Must implement fetch_all_channels or define channel_list as nil
         | 
| 15 | 
            +
                # Must returns something that respond_to? []
         | 
| 16 | 
            +
                def lang
         | 
| 17 | 
            +
                  'de'
         | 
| 18 | 
            +
                end     
         | 
| 19 | 
            +
                def fetch_all_channels
         | 
| 20 | 
            +
                  puts base_url
         | 
| 21 | 
            +
                  url = @base_url
         | 
| 22 | 
            +
                  page = fetch(url)
         | 
| 23 | 
            +
                  channels = Hash.new
         | 
| 24 | 
            +
                  page.at('//select[@name=channel]').search('//option').each do |op|
         | 
| 25 | 
            +
                    val = op['value']
         | 
| 26 | 
            +
                    next if val.index('gruppeId') || val.empty?
         | 
| 27 | 
            +
                    channels[val] = op.inner_text.strip.to_utf
         | 
| 28 | 
            +
                  end
         | 
| 29 | 
            +
                  save_object(channels, channel_list)
         | 
| 30 | 
            +
                  channels
         | 
| 31 | 
            +
                end
         | 
| 32 | 
            +
                def grab_detail(href)
         | 
| 33 | 
            +
                  program = Hash.new
         | 
| 34 | 
            +
                  if href[0] == ?/
         | 
| 35 | 
            +
                    href="#{base_url}#{href}"
         | 
| 36 | 
            +
                  end
         | 
| 37 | 
            +
            #      puts href
         | 
| 38 | 
            +
                  page = fetch(href)
         | 
| 39 | 
            +
                  info = page.at('span.tv-titel-popup').parent
         | 
| 40 | 
            +
            #      puts info.at('span.tv-titel-popup').inner_text.to_utf
         | 
| 41 | 
            +
                  program['desc-s'] = info.at('span.tv-vorspann-popup').inner_text.to_utf
         | 
| 42 | 
            +
                  page.at('td.tv-sendung-info').each_child do |el|
         | 
| 43 | 
            +
                    if el.is_a? Hpricot::Text
         | 
| 44 | 
            +
                      program['desc'] = el.inner_text.to_utf
         | 
| 45 | 
            +
                      break
         | 
| 46 | 
            +
                    end
         | 
| 47 | 
            +
                  end
         | 
| 48 | 
            +
                  credits = page.at('span.tv-credits')
         | 
| 49 | 
            +
                  if credits
         | 
| 50 | 
            +
                    program['info'] = credits.parent.inner_text.to_utf
         | 
| 51 | 
            +
                  end
         | 
| 52 | 
            +
                  categ = info.at('span.tv-auszeichnung')
         | 
| 53 | 
            +
                  if categ
         | 
| 54 | 
            +
                    program['kind'] = categ.inner_text.to_utf
         | 
| 55 | 
            +
                  end
         | 
| 56 | 
            +
                  info.search('img').each do |img|
         | 
| 57 | 
            +
                    program[img['title']] = true
         | 
| 58 | 
            +
                  end
         | 
| 59 | 
            +
                  program
         | 
| 60 | 
            +
                end
         | 
| 61 | 
            +
                
         | 
| 62 | 
            +
                def clean_cache(cache)
         | 
| 63 | 
            +
                  count = 0
         | 
| 64 | 
            +
                  cache.delete_if do |dt, en|
         | 
| 65 | 
            +
                    rsl = (Date.parse(en['datum']) < Vandaag)
         | 
| 66 | 
            +
                    count += 1 if rsl
         | 
| 67 | 
            +
                    rsl
         | 
| 68 | 
            +
                  end
         | 
| 69 | 
            +
                  count
         | 
| 70 | 
            +
                end
         | 
| 71 | 
            +
                
         | 
| 72 | 
            +
                def day_url(chan_id, day)
         | 
| 73 | 
            +
                  "#{base_url}/program2007?format=genre&offset=0&date=#{day.german}&slotIndex=all&channel=#{chan_id}&order=time"
         | 
| 74 | 
            +
                end
         | 
| 75 | 
            +
                # grab_channel return chan_idber of programs found
         | 
| 76 | 
            +
                def grab_channel(chan_id)
         | 
| 77 | 
            +
                  fetched = found = 0
         | 
| 78 | 
            +
                  @channelhash = load_cachefile(chan_id)
         | 
| 79 | 
            +
                  now = Date.today
         | 
| 80 | 
            +
                  (now .. now + 14).each do |date|
         | 
| 81 | 
            +
                    url = day_url(chan_id, date)
         | 
| 82 | 
            +
            #        puts url
         | 
| 83 | 
            +
              #      url = '/home/han/program2007'
         | 
| 84 | 
            +
                    page = fetch(url)
         | 
| 85 | 
            +
                    avond = false
         | 
| 86 | 
            +
                    page.search('div#program_complete//tr').each do |prog|
         | 
| 87 | 
            +
              #        puts prog
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                      times = prog.at('span.tv-sendung-uhrzeit')
         | 
| 90 | 
            +
                      next unless times
         | 
| 91 | 
            +
              #        puts times.inner_text
         | 
| 92 | 
            +
                      titel = prog.at("td/a.tv-sendung-titel")
         | 
| 93 | 
            +
                      id = titel['href'][/detailPopup\('(\d+)'/, 1]
         | 
| 94 | 
            +
                      href = "/program2007?format=detail&sid=#{id}"
         | 
| 95 | 
            +
            #          puts "#{id}: #{@channelhash[id]}"
         | 
| 96 | 
            +
                      found += 1
         | 
| 97 | 
            +
                      next if @channelhash[id]
         | 
| 98 | 
            +
                      begin
         | 
| 99 | 
            +
                        @channelhash[id] = program = grab_detail(href)
         | 
| 100 | 
            +
                        fetched += 1
         | 
| 101 | 
            +
                      rescue
         | 
| 102 | 
            +
                        STDERR.puts href, page, '====='
         | 
| 103 | 
            +
                        raise
         | 
| 104 | 
            +
                      end
         | 
| 105 | 
            +
                      times = times.inner_text
         | 
| 106 | 
            +
                      start = times.to_i
         | 
| 107 | 
            +
                      avond = true if start > 17 
         | 
| 108 | 
            +
                      program['times'] = times
         | 
| 109 | 
            +
                      program['title'] = titel.inner_text.to_utf
         | 
| 110 | 
            +
                      program['datum'] = date.to_s
         | 
| 111 | 
            +
                      program['evening'] = avond
         | 
| 112 | 
            +
             | 
| 113 | 
            +
              #        exit
         | 
| 114 | 
            +
                    end
         | 
| 115 | 
            +
                  end
         | 
| 116 | 
            +
                  save_object(@channelhash, cachefile(chan_id)) if fetched > 0
         | 
| 117 | 
            +
                  found
         | 
| 118 | 
            +
                end
         | 
| 119 | 
            +
                # transform returns an array of special hashes (obtained with proghash)
         | 
| 120 | 
            +
                # 
         | 
| 121 | 
            +
                def transform(chan_id)
         | 
| 122 | 
            +
                  progdata_array = Array.new
         | 
| 123 | 
            +
                  @channelhash.each_pair do |id, entry|
         | 
| 124 | 
            +
                    begin
         | 
| 125 | 
            +
                      progdata = proghash(entry, chan_id)
         | 
| 126 | 
            +
            #          pp progdata
         | 
| 127 | 
            +
            #          pp entry
         | 
| 128 | 
            +
                      start, stop = entry['times'].split('-')
         | 
| 129 | 
            +
                      date = Date.parse(entry['datum'])
         | 
| 130 | 
            +
                      startuur, startmin, stopuur, stopmin = entry['times'].split(/[-.]/).map{|x| x.to_i}
         | 
| 131 | 
            +
                      if startuur < 7 && entry['evening']
         | 
| 132 | 
            +
                        date += 1
         | 
| 133 | 
            +
                      end
         | 
| 134 | 
            +
                      progdata['start'] = Time.local(date.year, date.month, date.day, startuur, startmin)
         | 
| 135 | 
            +
                      progdata['stop']  = Time.local(date.year, date.month, date.day, stopuur, stopmin)
         | 
| 136 | 
            +
                      if progdata['stop'] < progdata['start']
         | 
| 137 | 
            +
                        progdata['stop'] += Dag
         | 
| 138 | 
            +
                      end
         | 
| 139 | 
            +
                      date_stats(chan_id, progdata['start'])
         | 
| 140 | 
            +
            #          puts progdata['stop'], progdata['start']
         | 
| 141 | 
            +
                      if entry['desc'].index("\t\t\t\t>") || entry['desc'].empty? && ! entry['desc-s'].empty?             
         | 
| 142 | 
            +
                        progdata['desc'] = entry['desc-s']
         | 
| 143 | 
            +
                      end
         | 
| 144 | 
            +
                      if entry['info']
         | 
| 145 | 
            +
            #            puts entry['info']
         | 
| 146 | 
            +
                        details = entry['info'].split(';').map{|x| x.strip}
         | 
| 147 | 
            +
                        role = nil
         | 
| 148 | 
            +
                        details.each do |det|
         | 
| 149 | 
            +
                          year = nil
         | 
| 150 | 
            +
                          case det[0,2]
         | 
| 151 | 
            +
                            when 'R:'
         | 
| 152 | 
            +
                              role = 'director'
         | 
| 153 | 
            +
                              det = det[2..-1].strip
         | 
| 154 | 
            +
                            when 'D:'
         | 
| 155 | 
            +
                              role = 'actor'
         | 
| 156 | 
            +
                              det = det[2..-1]
         | 
| 157 | 
            +
                            when 'O:'
         | 
| 158 | 
            +
                              year = det[-5..-1].to_i
         | 
| 159 | 
            +
                            else
         | 
| 160 | 
            +
                              year = det[-5..-1].to_i
         | 
| 161 | 
            +
                          end
         | 
| 162 | 
            +
                          if year && year > 1900
         | 
| 163 | 
            +
                            progdata['date'] = year.to_s
         | 
| 164 | 
            +
                          elsif role
         | 
| 165 | 
            +
                            (progdata['credits'][role] ||= Array.new) << det.gsub(/\(.*\)/,'').strip
         | 
| 166 | 
            +
                          end
         | 
| 167 | 
            +
                        end
         | 
| 168 | 
            +
                        progdata['audio']['stereo'] = 'stereo' if entry['Stereo']
         | 
| 169 | 
            +
                        progdata['subtitles']['type'] = 'teletext' if entry['Untertitel']
         | 
| 170 | 
            +
                        progdata['video']['aspect'] = '16:9' if entry['Breitbild']
         | 
| 171 | 
            +
                      end
         | 
| 172 | 
            +
                      
         | 
| 173 | 
            +
                      progdata_array << progdata
         | 
| 174 | 
            +
                    rescue StandardError => exc
         | 
| 175 | 
            +
                      STDERR.puts exc, exc.message, exc.backtrace
         | 
| 176 | 
            +
                      PP.pp(entry, STDERR)
         | 
| 177 | 
            +
                      raise
         | 
| 178 | 
            +
                    end
         | 
| 179 | 
            +
                  end
         | 
| 180 | 
            +
                  progdata_array
         | 
| 181 | 
            +
                end
         | 
| 182 | 
            +
              end
         | 
| 183 | 
            +
             | 
| 184 | 
            +
            end
         | 
| 185 | 
            +
            XMLTV::TvtodayGrabber.new.run
         |