RubyGems - serienrenamer - Versions diffs - 0.0.14 → 0.0.15 - Mend

serienrenamer 0.0.14 → 0.0.15

Files changed (25) hide show

data/bin/serienrenamer +0 -6
data/lib/serienrenamer/plugin/episode_identifier.rb +35 -0
data/lib/serienrenamer/plugin/serienjunkies_de.rb +131 -0
data/lib/serienrenamer/plugin/serienjunkies_feed.rb +112 -0
data/lib/serienrenamer/plugin/serienjunkies_org.rb +181 -0
data/lib/serienrenamer/plugin/textfile.rb +57 -0
data/lib/serienrenamer/plugin/wikipedia.rb +448 -0
data/lib/serienrenamer/plugin.rb +26 -0
data/lib/serienrenamer/version.rb +1 -1
data/lib/serienrenamer.rb +4 -25
data/serienrenamer.gemspec +1 -0
data/test/test_helper.rb +1 -1
data/test/test_plugin_episode_identifier.rb +1 -1
data/test/test_plugin_serienjunkies_de.rb +3 -3
data/test/test_plugin_serienjunkies_feed.rb +1 -1
data/test/{test_serienjunkies_org.rb → test_plugin_serienjunkies_org.rb} +3 -3
data/test/test_plugin_textfile.rb +3 -3
metadata +12 -12
data/lib/plugin/episode_identifier.rb +0 -33
data/lib/plugin/serienjunkies_de.rb +0 -129
data/lib/plugin/serienjunkies_feed.rb +0 -110
data/lib/plugin/serienjunkies_org.rb +0 -179
data/lib/plugin/textfile.rb +0 -54
data/lib/plugin/wikipedia.rb +0 -446
data/lib/plugin.rb +0 -8

data/bin/serienrenamer CHANGED Viewed

@@ -5,7 +5,6 @@
 $LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib')
 require 'serienrenamer'
-require 'plugin'
 require 'optparse'
 require 'fileutils'
 require 'hashconfig'
@@ -85,11 +84,6 @@ opts.separator("")
 rest = opts.permute(ARGV)
-###
-# Load plugins #
-Dir[File.join(File.dirname(__FILE__),"../lib/plugin/*.rb")].each do |plugin|
-  load plugin
-end
 Serienrenamer::Pluginbase.registered_plugins.sort! {|x,y| y.priority <=> x.priority }
 puts "Plugins loaded: #{Serienrenamer::Pluginbase.registered_plugins.inspect}"

data/lib/serienrenamer/plugin/episode_identifier.rb ADDED Viewed

@@ -0,0 +1,35 @@
+# class that creates an episodename out of the episode identifier
+# for S02E04 the episodename would be "Episode 4"
+module Serienrenamer
+  module Plugin
+    class EpisodeIdentifier < Serienrenamer::Pluginbase
+      def self.plugin_name; "EpisodeIdentifier" end
+      def self.usable; true end
+      def self.priority; 1 end
+      # this method will be called from the main program
+      # with an Serienrenamer::Episode instance or a path
+      # to to a directory as parameter
+      #
+      # it returns an array of episode information
+      def self.generate_episode_information(episode)
+        path = episode.episodepath
+        matched_episodes = []
+        if Serienrenamer::Episode.contains_episode_information?(path)
+          if md = Serienrenamer::Episode.extract_episode_information(path)
+            episodename = "Episode %d" % [ md[:episode].to_i ]
+            matched_episodes << episodename
+          end
+        end
+        return matched_episodes
+      end
+    end
+  end
+end

data/lib/serienrenamer/plugin/serienjunkies_de.rb ADDED Viewed

@@ -0,0 +1,131 @@
+#
+# Class that extracts information about episodes
+# from the serienjunkies.de-Page
+#
+require 'uri'
+require 'mechanize'
+module Serienrenamer
+  module Plugin
+    class SerienjunkiesDe < Serienrenamer::Pluginbase
+      def self.plugin_name; "SerienjunkiesDe" end
+      def self.plugin_url; "http://serienjunkies.de" end
+      def self.usable; true end
+      def self.priority; 50 end
+      # this method will be called from the main program
+      # with an Serienrenamer::Episode instance as parameter
+      #
+      # if this is the first call to this method, it builds up
+      # a hash with all series and existing episodes, which can
+      # be used by all future method calls
+      #
+      def self.generate_episode_information(episode)
+        raise ArgumentError, "Serienrenamer::Episode instance needed" unless
+          episode.is_a?(Serienrenamer::Episode)
+        unless defined? @cached_data
+          @cached_data = Hash.new
+        end
+        if ! @cached_data.has_key?(episode.series)
+          if episode.series.match(/\w+/)
+            # determine link to series
+            seriespage_link = self.find_link_to_series_page(episode.series)
+            if seriespage_link
+              seriesdata = self.parse_seriespage(seriespage_link)
+              @cached_data[episode.series] = seriesdata
+            end
+          end
+        end
+        matched_episodes = []
+        # tries to find an episodename in cached_data
+        # otherwise returns empty array
+        begin
+          series = @cached_data[episode.series]
+          identifier = "S%.2dE%.2d" % [ episode.season, episode.episode ]
+          episodename = series[identifier]
+          if episodename.match(/\w+/)
+            matched_episodes.push(episodename)
+          end
+        rescue
+        end
+        return matched_episodes
+      end
+      # tries to find the link to the series page because there are
+      # plenty of different writings of some series
+      #   :seriesname:    -  name of the series
+      #
+      # TODO make this more intelligent so that it tries other forms
+      # of the name
+      #
+      # returns a link to a seriejunkies.de-page or nil if no page was found
+      def self.find_link_to_series_page(seriesname)
+        raise ArgumentError, "seriesname expected" unless seriesname.match(/\w+/)
+        self.build_agent unless defined? @agent
+        url = URI.join(plugin_url, "serien/%s.html" % seriesname[0].downcase )
+        @agent.get(url).search("a.slink").each do |series|
+          if series.text.match(/#{seriesname}/i)
+            return URI.join( plugin_url, series[:href]).to_s
+          end
+        end
+        return nil
+      end
+      # parses the supplied url and returns a hash with
+      # episode information indexed by episode identifier
+      #   :page_url:      -  url of the serienjunkies page
+      #   :german:        -  extract only german titles if true
+      def self.parse_seriespage(page_url, german=true)
+        self.build_agent unless defined? @agent
+        series = {}
+        seriesdoc  = @agent.get(page_url)
+        epidoc = @agent.click(seriesdoc.link_with(:text => /^Episoden$/i))
+        epidoc.search('div#sjserie > div.topabstand > table.eplist tr').each do |episode|
+          next unless episode.search("td.thh").empty? # skip headings
+          firstchild = episode.search(":first-child")[0].text
+          md = firstchild.match(/(?<season>\d+)x(?<episode>\d+)/)
+          next unless md
+          # extract and save these information
+          identifier = "S%.2dE%.2d" % [ md[:season].to_i, md[:episode].to_i ]
+          german = episode.search("a")[1]
+          next unless german
+          series[identifier] = german.text.strip
+        end
+        return series
+      end
+      # build up a mechanize instance
+      def self.build_agent
+        @agent = Mechanize.new
+      end
+    end
+  end
+end

data/lib/serienrenamer/plugin/serienjunkies_feed.rb ADDED Viewed

@@ -0,0 +1,112 @@
+#
+# Class that extracts information about episodes
+# from the serienjunkies.org-Feed
+#
+require 'rss'
+require 'open-uri'
+module Serienrenamer
+  module Plugin
+    class SerienjunkiesOrgFeed < Serienrenamer::Pluginbase
+      def self.plugin_name; "SerienjunkiesOrgFeed" end
+      def self.usable; true end
+      def self.priority; 80 end
+      @feed_url = 'http://serienjunkies.org/xml/feeds/episoden.xml'
+      # this method will be called from the main program
+      # with an Serienrenamer::Episode instance as parameter
+      #
+      # if this is the first call to this method, it builds up
+      # a hash with all series and existing episodes, which can
+      # be used by all future method calls
+      #
+      def self.generate_episode_information(episode, debug=false)
+        raise ArgumentError, "Serienrenamer::Episode instance needed" unless
+        episode.is_a?(Serienrenamer::Episode)
+        unless defined? @feed_data
+          @feed_data = self.build_up_series_data
+        end
+        episode_definition = 'S%.2dE%.2d' % [ episode.season, episode.episode ]
+        # search for all items that match the definition
+        # and save them uniquely in an array
+        matched_definitions = []
+        for epi in @feed_data.grep(/#{episode_definition}/)
+          serdef = epi.match(/(^.*S\d+E\d+)/)[0]
+          exist = matched_definitions.grep(/^#{serdef}/)[0]
+          if exist != nil && epi.length > exist.length
+            matched_definitions.delete(exist)
+          elsif exist != nil && epi.length < exist.length
+            next
+          end
+          matched_definitions.push(epi)
+        end
+        # find suitable episode string in the array of
+        # matched definitions
+        #
+        # start with a pattern that includes all words from
+        # Episode#series and if this does not match, it cuts
+        # off the first word and tries to match again
+        #
+        # if the pattern contains one word and if this
+        # still not match, the last word is splitted
+        # characterwise, so that:
+        #  crmi ==> Criminal Minds
+        #
+        matched_episodes = []
+        name_words = episode.series.split(/ /)
+        word_splitted = false
+        while ! name_words.empty?
+          p name_words if debug
+          pattern = name_words.join('.*')
+          matched_episodes = matched_definitions.grep(/#{pattern}.*S\d+E\d+/i)
+            break if ! matched_episodes.empty?
+          # split characterwise if last word does not match
+          if name_words.length == 1 && ! word_splitted
+            name_words = pattern.split(//)
+            word_splitted = true
+            next
+          end
+          # if last word was splitted and does not match than break
+          # and return empty resultset
+          break if word_splitted
+          name_words.delete_at(0)
+        end
+        return matched_episodes
+      end
+      # create a list of exisiting episodes
+      def self.build_up_series_data
+        feed_data = []
+        open(@feed_url) do |rss|
+          feed = RSS::Parser.parse(rss)
+          feed.items.each do |item|
+            feed_data.push(item.title.split(/ /)[1])
+          end
+        end
+        return feed_data
+      end
+      # set the feed url (e.g for testing)
+      def self.feed_url=(feed)
+        @feed_url = File.absolute_path(feed)
+      end
+    end
+  end
+end

data/lib/serienrenamer/plugin/serienjunkies_org.rb ADDED Viewed

@@ -0,0 +1,181 @@
+#
+# Class that extracts information about episodes
+# from the serienjunkies.org-Page
+#
+require 'uri'
+require 'mechanize'
+require 'yaml'
+module Serienrenamer
+  module Plugin
+    class SerienjunkiesOrg < Serienrenamer::Pluginbase
+      def self.plugin_name; "SerienjunkiesOrg" end
+      def self.plugin_url; "http://serienjunkies.org" end
+      def self.usable; true end
+      def self.priority; 60 end
+      # Public: tries to search for an appropriate episodename
+      #
+      # if this is the first call to this method, it builds up
+      # a hash with all series and existing episodes, which can
+      # be used by all future method calls
+      #
+      # episode - Serienrenamer::Episode instance which holds the information
+      #
+      # Returns an array of possible episodenames
+      def self.generate_episode_information(episode)
+        raise ArgumentError, "Serienrenamer::Episode instance needed" unless
+        episode.is_a?(Serienrenamer::Episode)
+        unless defined? @cached_data
+          @cached_data = Hash.new
+        end
+        if ! @cached_data.has_key?(episode.series)
+          if episode.series.match(/\w+/)
+            # determine link to series
+            seriespage_link = self.find_link_to_series_page(episode.series)
+            if seriespage_link
+              seriesdata = self.parse_seriespage(seriespage_link)
+              @cached_data[episode.series] = seriesdata
+            end
+          end
+        end
+        matched_episodes = []
+        # tries to find an episodename in cached_data
+        # otherwise returns empty array
+        begin
+          series = @cached_data[episode.series]
+          identifier = "%d_%d" % [ episode.season, episode.episode ]
+          episodename = series[identifier]
+          if episodename.match(/\w+/)
+            matched_episodes.push(episodename)
+          end
+        rescue
+        end
+        return matched_episodes
+      end
+      # Public: tries to find a link to the seriespage
+      #
+      # seriesname  - the series name for which the page is searched
+      #
+      # Returns the link or nil
+      def self.find_link_to_series_page(seriesname)
+        raise ArgumentError, "seriesname expected" unless seriesname.match(/\w+/)
+        self.build_agent unless defined? @agent
+        url = URI.join(plugin_url, "?cat=0&l=%s" % seriesname[0].downcase )
+        pattern = seriesname.gsub(/\s/, ".*")
+        @agent.get(url).search("div#sidebar > ul > li > a").each do |series|
+          if series.text.match(/#{pattern}/i)
+            return URI.join( plugin_url, series[:href]).to_s
+          end
+        end
+        nil
+      end
+      # Public: parses a series page and extracts the episode information
+      #
+      # page_url   - the url to the seriespage
+      # german     - if true it extracts only german data (Defaults to true)
+      #
+      # Returns a hash which contains the episode information or an empty
+      # hash if there aren't any episodes
+      def self.parse_seriespage(page_url, german=true, debug=false)
+        self.build_agent unless defined? @agent
+        series = {}
+        doc = @agent.get(page_url)
+        doc.search('div#sidebar > div#scb > div.bkname > a').each do |link|
+          if german
+            next unless link.content.match(/Staffel/i)
+          else
+            next unless link.content.match(/Season/i)
+          end
+          site = @agent.get(link[:href])
+          episodes = self.parse_season_subpage(site, german)
+          series.merge!(episodes)
+        end
+        puts series.to_yaml if debug
+        return series
+      end
+      # Public: extracts the episodes from one season
+      #
+      # page   - Mechanize page object which holds the season
+      # german - extracts german or international episodes
+      #
+      # Returns a hash with all episodes (unique)
+      def self.parse_season_subpage(page, german=true)
+        episodes = {}
+        page.search('div.post > div.post-content strong:nth-child(1)').each do |e|
+          content =  e.content
+          md = Serienrenamer::Episode.extract_episode_information(content)
+          next unless md
+          if german
+            next unless content.match(/German/i)
+            next if content.match(/Subbed/i)
+          else
+            next if content.match(/German/i)
+          end
+          episodename =
+            Serienrenamer::Episode.clean_episode_data(md[:episodename], true)
+          next unless episodename && episodename.match(/\w+/)
+          id = "%d_%d" % [ md[:season].to_i, md[:episode].to_i ]
+          next if episodes[id] && episodes[id].size > episodename.size
+          episodes[id] = episodename
+        end
+        return episodes
+      end
+      private
+      # Private: constructs a Mechanize instance and adds a fix that interprets
+      #          every response as html
+      #
+      # Returns the agent
+      def self.build_agent
+        @agent = Mechanize.new do |a|
+          a.post_connect_hooks << lambda do |_,_,response,_|
+            if response.content_type.nil? || response.content_type.empty?
+              response.content_type = 'text/html'
+            end
+          end
+        end
+      end
+    end
+  end
+end

data/lib/serienrenamer/plugin/textfile.rb ADDED Viewed

@@ -0,0 +1,57 @@
+#
+# Class that searches for a file with
+# episode information in the directory
+# like "episode.txt"
+#
+require 'serienrenamer'
+module Serienrenamer
+  module Plugin
+    class Textfile < Serienrenamer::Pluginbase
+      def self.plugin_name; "Textfile" end
+      def self.usable; true end
+      def self.priority; 100 end
+      # this method will be called from the main program
+      # with an Serienrenamer::Episode instance or a path
+      # to to a directory as parameter
+      #
+      # it returns an array of episode information
+      def self.generate_episode_information(episode)
+        sourcedir = ""
+        if episode.is_a?(Serienrenamer::Episode) && episode.source_directory
+          sourcedir = episode.source_directory
+        elsif episode.is_a?(String) && File.directory?(episode)
+          sourcedir = episode
+        end
+        matched_episodes = []
+        if sourcedir != "" && Dir.exists?(sourcedir)
+          # search for files that are smaller than 128 Bytes
+          # an check if they contain episode information
+          Dir.new(sourcedir).each do |e|
+            file = File.join(sourcedir, e)
+            next if File.size(file) > 128 || File.zero?(file)
+            data = File.open(file, "rb").read
+            # only files with one line with the title are interesting
+            next if data.lines.to_a.size > 1
+            if data != nil && data.match(/\w+/) &&
+              Serienrenamer::Episode.contains_episode_information?(data)
+              matched_episodes.push(data)
+            end
+          end
+        end
+        return matched_episodes
+      end
+    end
+  end
+end