RubyGems - serienrenamer - Versions diffs - 0.0.1 - Mend

serienrenamer 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

data/.gemtest +0 -0
data/History.txt +4 -0
data/Manifest.txt +23 -0
data/README.rdoc +52 -0
data/Rakefile +27 -0
data/bin/serienrenamer +155 -0
data/lib/plugin/serienjunkies_de.rb +129 -0
data/lib/plugin/serienjunkies_feed.rb +105 -0
data/lib/plugin/textfile.rb +50 -0
data/lib/plugin/wikipedia.rb +362 -0
data/lib/plugin.rb +8 -0
data/lib/serienrenamer/episode.rb +313 -0
data/lib/serienrenamer.rb +28 -0
data/script/console +10 -0
data/script/destroy +14 -0
data/script/generate +14 -0
data/serienrenamer.gemspec +52 -0
data/test/serienjunkies_feed_sample.xml +14472 -0
data/test/test_episode.rb +188 -0
data/test/test_helper.rb +4 -0
data/test/test_plugin_serienjunkies_de.rb +95 -0
data/test/test_plugin_serienjunkies_feed.rb +75 -0
data/test/test_plugin_textfile.rb +38 -0
data/test/test_plugin_wikipedia.rb +178 -0
metadata +161 -0

data/.gemtest ADDED Viewed

File without changes

data/History.txt ADDED Viewed

@@ -0,0 +1,4 @@
+=== 0.0.1 2012-02-01
+* 1 major enhancement:
+  * Initial release

data/Manifest.txt ADDED Viewed

@@ -0,0 +1,23 @@
+History.txt
+Manifest.txt
+README.rdoc
+Rakefile
+bin/serienrenamer
+lib/plugin.rb
+lib/plugin/serienjunkies_de.rb
+lib/plugin/serienjunkies_feed.rb
+lib/plugin/textfile.rb
+lib/plugin/wikipedia.rb
+lib/serienrenamer.rb
+lib/serienrenamer/episode.rb
+script/console
+script/destroy
+script/generate
+serienrenamer.gemspec
+test/serienjunkies_feed_sample.xml
+test/test_episode.rb
+test/test_helper.rb
+test/test_plugin_serienjunkies_de.rb
+test/test_plugin_serienjunkies_feed.rb
+test/test_plugin_textfile.rb
+test/test_plugin_wikipedia.rb

data/README.rdoc ADDED Viewed

@@ -0,0 +1,52 @@
+= serienrenamer
+* http://github.com/pboehm/serienrenamer
+== DESCRIPTION:
+Ruby Script that brings your series into an appropriate format
+like "S01E01 - Episodename.avi"
+== FEATURES/PROBLEMS:
+* extract information from episode files in various formats
+* clean these information and bring that into a recommended format
+* search for information in specific textfiles like "filename.txt"
+* query the seriejunkies.org-feed for episode information
+* query the serienjunkies.de Page for series specific data
+* query wikipedia for episode information
+* repair broken german umlauts if the occur in the episode title
+* rename these files
+== REQUIREMENTS:
+* ruby (>= 1.9)
+* wlapi
+* mediawiki_gateway
+* highline
+* nokogiri
+* mechanize
+== INSTALL:
+* FIX (sudo gem install, anything else)
+== LICENSE:
+(General Public License)
+Copyright (c) 2012 Philipp Böhm
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation in version 3 of the License.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+MA 02110-1301, USA.

data/Rakefile ADDED Viewed

@@ -0,0 +1,27 @@
+# encoding: UTF-8
+require 'rubygems'
+gem 'hoe', '>= 2.1.0'
+require 'hoe'
+require 'fileutils'
+require './lib/serienrenamer'
+require './lib/plugin'
+Hoe.plugin :newgem
+# Generate all the Rake tasks
+# Run 'rake -T' to see list of generated tasks (from gem root directory)
+$hoe = Hoe.spec 'serienrenamer' do
+  self.developer 'Philipp Boehm', 'philipp@i77i.de'
+  self.rubyforge_name       = self.name
+  self.dependency('wlapi', '>= 0.8.4')
+  self.dependency('mediawiki-gateway', '>= 0.4.4')
+  self.dependency('mechanize', '>= 2.3')
+  self.dependency('highline', '>= 1.6.11')
+end
+require 'newgem/tasks'
+Dir['tasks/**/*.rake'].each { |t| load t }
+# TODO - want other tests/tasks run by default? Add them to the list
+# remove_task :default
+# task :default => [:spec, :features]

data/bin/serienrenamer ADDED Viewed

@@ -0,0 +1,155 @@
+#!/usr/bin/env ruby
+# -*- ruby -*-
+# encoding: UTF-8
+$LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib')
+require 'serienrenamer'
+require 'plugin'
+require 'optparse'
+require 'fileutils'
+require "highline/system_extensions"
+include HighLine::SystemExtensions
+###
+# Option definition and handling
+options = {}
+opts = OptionParser.new("Usage: #{$0} [OPTIONS] DIR")
+opts.separator("")
+opts.separator("Ruby Script that brings your series into an")
+opts.separator("appropriate format like 'S01E01 - Episodename.avi'")
+opts.separator("")
+opts.separator("  Options:")
+opts.on( "-p", "--plugin STRING", String,
+         "use only this plugin") do |opt|
+    options[:plugin] = opt
+end
+opts.on( "-s", "--series STRING", String,
+         "series name that will be set for all episodes") do |opt|
+    options[:series] = opt
+end
+opts.on( "-S", "--[no-]season",
+         "DIR contains episodes of one season of one series") do |opt|
+    options[:is_single_season] = opt
+end
+opts.on( "-i", "--[no-]ignore-filenamedata",
+         "Always ask plugins for episode information") do |opt|
+    options[:ignore_filenamedata] = opt
+end
+opts.on( "-a", "--[no-]all",
+         "Process all files (including right formatted files)") do |opt|
+    options[:process_all_files] = opt
+end
+opts.separator("")
+opts.separator("  Arguments:")
+opts.separator("     DIR      The path that includes the episodes")
+opts.separator("              defaults to ~/Downloads")
+opts.separator("")
+rest = opts.permute(ARGV)
+################
+# Load plugins #
+################
+Dir[File.join(File.dirname(__FILE__),"../lib/plugin/*.rb")].each do |plugin|
+	load plugin
+end
+Serienrenamer::Pluginbase.registered_plugins.sort! {|x,y| y.priority <=> x.priority }
+puts "Plugins loaded: #{Serienrenamer::Pluginbase.registered_plugins.inspect}"
+puts ""
+# change into DIR
+episode_directory = rest.pop || File.join( File.expand_path('~'), "Downloads" )
+fail "'#{episode_directory}' does not exist or is not a directory" unless
+    Dir.exists?(episode_directory)
+Dir.chdir(episode_directory)
+#########################################
+# Iterate through all directory entries #
+#########################################
+begin
+    for entry in Dir.entries('.').sort do
+        next if entry.match(/^\./)
+        next unless Serienrenamer::Episode.determine_video_file(entry)
+        # skip files that already have the right format
+        unless options[:process_all_files]
+            next if entry.match(/^S\d+E\d+.-.\w+.*\.\w+$/)
+        end
+        begin
+            epi = Serienrenamer::Episode.new(entry)
+            if options[:series]
+                epi.series = options[:series]
+            end
+        rescue => e
+            next
+        end
+        puts "<<< #{entry}"
+        # if episodename is empty than query plugins
+        if epi.episodename.match(/\w+/).nil? || options[:ignore_filenamedata]
+            Serienrenamer::Pluginbase.registered_plugins.each do |plugin|
+                # skip plugins that are not feasable
+                next unless plugin.usable
+                next unless plugin.respond_to?(:generate_episode_information)
+                if options[:plugin]
+                    next unless plugin.plugin_name.match(/#{options[:plugin]}/i)
+                end
+                # configure cleanup
+                clean_data, extract_seriesname = false, false
+                case plugin.plugin_name
+                when "Textfile"
+                    clean_data, extract_seriesname = true, true
+                when "SerienjunkiesOrgFeed"
+                    clean_data = true
+                end
+                extract_seriesname = false if options[:series]
+                # ask plugin for information
+                epiname = plugin.generate_episode_information(epi)[0]
+                next if epiname == nil
+                puts "[#{plugin.plugin_name}] - #{epiname}"
+                epi.add_episode_information(epiname, clean_data, extract_seriesname)
+                next unless epi.episodename.match(/\w+/)
+                break
+            end
+        end
+        puts ">>> #{epi.to_s}"
+        print "Filename okay ([jy]/n): "
+        char = get_character
+        print char.chr
+        unless char.chr.match(/[jy\r]/i)
+            puts "\nwill be skipped ...\n\n"
+            next
+        end
+        puts "\n\n"
+        epi.rename()
+    end
+rescue Interrupt => e
+    puts
+end

data/lib/plugin/serienjunkies_de.rb ADDED Viewed

@@ -0,0 +1,129 @@
+#
+# Class that extracts information about episodes
+# from the serienjunkies.de-Page
+#
+require 'uri'
+require 'mechanize'
+module Plugin
+    class SerienjunkiesDe < Serienrenamer::Pluginbase
+        def self.plugin_name; "SerienjunkiesDe" end
+        def self.plugin_url; "http://serienjunkies.de" end
+        def self.usable; true end
+        def self.priority; 4 end
+        # this method will be called from the main program
+        # with an Serienrenamer::Episode instance as parameter
+        #
+        # if this is the first call to this method, it builds up
+        # a hash with all series and existing episodes, which can
+        # be used by all future method calls
+        #
+        def self.generate_episode_information(episode)
+            raise ArgumentError, "Serienrenamer::Episode instance needed" unless
+                episode.is_a?(Serienrenamer::Episode)
+            unless defined? @cached_data
+                @cached_data = Hash.new
+            end
+            if ! @cached_data.has_key?(episode.series)
+                if episode.series.match(/\w+/)
+                    # determine link to series
+                    seriespage_link = self.find_link_to_series_page(episode.series)
+                    if seriespage_link
+                        seriesdata = self.parse_seriespage(seriespage_link)
+                        @cached_data[episode.series] = seriesdata
+                    end
+                end
+            end
+            matched_episodes = []
+            # tries to find an episodename in cached_data
+            # otherwise returns empty array
+            begin
+                series = @cached_data[episode.series]
+                identifier = "S%.2dE%.2d" % [ episode.season, episode.episode ]
+                episodename = series[identifier]
+                if episodename.match(/\w+/)
+                    matched_episodes.push(episodename)
+                end
+            rescue
+            end
+            return matched_episodes
+        end
+        # tries to find the link to the series page because there are
+        # plenty of different writings of some series
+        #   :seriesname:    -  name of the series
+        #
+        # TODO make this more intelligent so that it tries other forms
+        # of the name
+        #
+        # returns a link to a seriejunkies.de-page or nil if no page was found
+        def self.find_link_to_series_page(seriesname)
+            raise ArgumentError, "seriesname expected" unless seriesname.match(/\w+/)
+            self.build_agent unless defined? @agent
+            url = URI.join(plugin_url, "serien/%s.html" % seriesname[0].downcase )
+            @agent.get(url).search("a.slink").each do |series|
+                if series.text.match(/#{seriesname}/i)
+                    return URI.join( plugin_url, series[:href]).to_s
+                end
+            end
+            return nil
+        end
+        # parses the supplied url and returns a hash with
+        # episode information indexed by episode identifier
+        #   :page_url:      -  url of the serienjunkies page
+        #   :german:        -  extract only german titles if true
+        def self.parse_seriespage(page_url, german=true)
+            self.build_agent unless defined? @agent
+            series = {}
+            seriesdoc  = @agent.get(page_url)
+            epidoc = @agent.click(seriesdoc.link_with(:text => /^Episoden$/i))
+            epidoc.search('div#sjserie > div.topabstand > table.eplist tr').each do |episode|
+                next unless episode.search("td.thh").empty? # skip headings
+                firstchild = episode.search(":first-child")[0].text
+                md = firstchild.match(/(?<season>\d+)x(?<episode>\d+)/)
+                next unless md
+                # extract and save these information
+                identifier = "S%.2dE%.2d" % [ md[:season].to_i, md[:episode].to_i ]
+                german = episode.search("a")[1]
+                next unless german
+                series[identifier] = german.text.strip
+            end
+            return series
+        end
+        # build up a mechanize instance
+        def self.build_agent
+            @agent = Mechanize.new
+        end
+    end
+end

data/lib/plugin/serienjunkies_feed.rb ADDED Viewed

@@ -0,0 +1,105 @@
+#
+# Class that extracts information about episodes
+# from the serienjunkies.org-Feed
+#
+require 'rss'
+require 'open-uri'
+module Plugin
+    class SerienjunkiesOrgFeed < Serienrenamer::Pluginbase
+        def self.plugin_name; "SerienjunkiesOrgFeed" end
+        def self.usable; true end
+        def self.priority; 10 end
+        @feed_url = 'http://serienjunkies.org/xml/feeds/episoden.xml'
+        # this method will be called from the main program
+        # with an Serienrenamer::Episode instance as parameter
+        #
+        # if this is the first call to this method, it builds up
+        # a hash with all series and existing episodes, which can
+        # be used by all future method calls
+        #
+        def self.generate_episode_information(episode)
+            raise ArgumentError, "Serienrenamer::Episode instance needed" unless
+                episode.is_a?(Serienrenamer::Episode)
+            unless defined? @feed_data
+                @feed_data = self.build_up_series_data
+            end
+            episode_definition = 'S%.2dE%.2d' % [ episode.season, episode.episode ]
+            # search for all items that match the definition
+            # and save them uniquely in an array
+            matched_definitions = []
+            for epi in @feed_data.grep(/#{episode_definition}/)
+                serdef = epi.match(/(^.*S\d+E\d+)/)[0]
+                exist = matched_definitions.grep(/^#{serdef}/)[0]
+                if exist != nil && epi.length > exist.length
+                    matched_definitions.delete(exist)
+                elsif exist != nil && epi.length < exist.length
+                    next
+                end
+                matched_definitions.push(epi)
+            end
+            # find suitable episode string in the array of
+            # matched definitions
+            #
+            # start with a pattern that includes all words from
+            # Episode#series and if this does not match, it cuts
+            # off the first word and tries to match again
+            #
+            # if the pattern contains one word and if this
+            # still not match, the last word is splitted
+            # characterwise, so that:
+            #  crmi ==> Criminal Minds
+            #
+            matched_episodes = []
+            name_words = episode.series.split(/ /)
+            word_splitted = false
+            while ! name_words.empty?
+                pattern = name_words.join('.*')
+                matched_episodes = matched_definitions.grep(/#{pattern}.*S\d+E\d+/i)
+                break if ! matched_episodes.empty?
+                # split characterwise if last word does not match
+                if name_words.length == 1 && ! word_splitted
+                    name_words = pattern.split(//)
+                    word_splitted = true
+                    next
+                end
+                name_words.delete_at(0)
+            end
+            return matched_episodes
+        end
+        # create a list of exisiting episodes
+        def self.build_up_series_data
+            feed_data = []
+            open(@feed_url) do |rss|
+                feed = RSS::Parser.parse(rss)
+                feed.items.each do |item|
+                    feed_data.push(item.title.split(/ /)[1])
+                end
+            end
+            return feed_data
+        end
+        # set the feed url (e.g for testing)
+        def self.feed_url=(feed)
+            @feed_url = feed
+        end
+    end
+end

data/lib/plugin/textfile.rb ADDED Viewed

@@ -0,0 +1,50 @@
+#
+# Class that searches for a file with
+# episode information in the directory
+# like "episode.txt"
+#
+module Plugin
+    class Textfile < Serienrenamer::Pluginbase
+        def self.plugin_name; "Textfile" end
+        def self.usable; true end
+        def self.priority; 100 end
+        # this method will be called from the main program
+        # with an Serienrenamer::Episode instance or a path
+        # to to a directory as parameter
+        #
+        # it returns an array of episode information
+        def self.generate_episode_information(episode)
+            sourcedir = ""
+            if episode.is_a?(Serienrenamer::Episode) && episode.source_directory
+                sourcedir = episode.source_directory
+            elsif episode.is_a?(String) && File.directory?(episode)
+                sourcedir = episode
+            end
+            matched_episodes = []
+            if sourcedir != "" && Dir.exists?(sourcedir)
+                # search for files that are smaller than 128 Bytes
+                # an check if they contain episode information
+                Dir.new(sourcedir).each do |e|
+                    file = File.join(sourcedir, e)
+                    next if File.size(file) > 128 || File.zero?(file)
+                    data = File.open(file, "rb").read
+                    if data != nil && data.match(/\w+/) &&
+                            Serienrenamer::Episode.contains_episode_information?(data)
+                        matched_episodes.push(data)
+                    end
+                end
+            end
+            return matched_episodes
+        end
+    end
+end