serienrenamer 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gemtest ADDED
File without changes
data/History.txt ADDED
@@ -0,0 +1,4 @@
1
+ === 0.0.1 2012-02-01
2
+
3
+ * 1 major enhancement:
4
+ * Initial release
data/Manifest.txt ADDED
@@ -0,0 +1,23 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.rdoc
4
+ Rakefile
5
+ bin/serienrenamer
6
+ lib/plugin.rb
7
+ lib/plugin/serienjunkies_de.rb
8
+ lib/plugin/serienjunkies_feed.rb
9
+ lib/plugin/textfile.rb
10
+ lib/plugin/wikipedia.rb
11
+ lib/serienrenamer.rb
12
+ lib/serienrenamer/episode.rb
13
+ script/console
14
+ script/destroy
15
+ script/generate
16
+ serienrenamer.gemspec
17
+ test/serienjunkies_feed_sample.xml
18
+ test/test_episode.rb
19
+ test/test_helper.rb
20
+ test/test_plugin_serienjunkies_de.rb
21
+ test/test_plugin_serienjunkies_feed.rb
22
+ test/test_plugin_textfile.rb
23
+ test/test_plugin_wikipedia.rb
data/README.rdoc ADDED
@@ -0,0 +1,52 @@
1
+ = serienrenamer
2
+
3
+ * http://github.com/pboehm/serienrenamer
4
+
5
+ == DESCRIPTION:
6
+
7
+ Ruby Script that brings your series into an appropriate format
8
+ like "S01E01 - Episodename.avi"
9
+
10
+ == FEATURES/PROBLEMS:
11
+
12
+ * extract information from episode files in various formats
13
+ * clean these information and bring that into a recommended format
14
+ * search for information in specific textfiles like "filename.txt"
15
+ * query the seriejunkies.org-feed for episode information
16
+ * query the serienjunkies.de Page for series specific data
17
+ * query wikipedia for episode information
18
+ * repair broken german umlauts if the occur in the episode title
19
+ * rename these files
20
+
21
+ == REQUIREMENTS:
22
+
23
+ * ruby (>= 1.9)
24
+ * wlapi
25
+ * mediawiki_gateway
26
+ * highline
27
+ * nokogiri
28
+ * mechanize
29
+
30
+ == INSTALL:
31
+
32
+ * FIX (sudo gem install, anything else)
33
+
34
+ == LICENSE:
35
+
36
+ (General Public License)
37
+
38
+ Copyright (c) 2012 Philipp Böhm
39
+
40
+ This program is free software; you can redistribute it and/or modify
41
+ it under the terms of the GNU General Public License as published by
42
+ the Free Software Foundation in version 3 of the License.
43
+
44
+ This program is distributed in the hope that it will be useful,
45
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
46
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
47
+ GNU General Public License for more details.
48
+
49
+ You should have received a copy of the GNU General Public License
50
+ along with this program; if not, write to the Free Software
51
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
52
+ MA 02110-1301, USA.
data/Rakefile ADDED
@@ -0,0 +1,27 @@
1
+ # encoding: UTF-8
2
+ require 'rubygems'
3
+ gem 'hoe', '>= 2.1.0'
4
+ require 'hoe'
5
+ require 'fileutils'
6
+ require './lib/serienrenamer'
7
+ require './lib/plugin'
8
+
9
+ Hoe.plugin :newgem
10
+
11
+ # Generate all the Rake tasks
12
+ # Run 'rake -T' to see list of generated tasks (from gem root directory)
13
+ $hoe = Hoe.spec 'serienrenamer' do
14
+ self.developer 'Philipp Boehm', 'philipp@i77i.de'
15
+ self.rubyforge_name = self.name
16
+ self.dependency('wlapi', '>= 0.8.4')
17
+ self.dependency('mediawiki-gateway', '>= 0.4.4')
18
+ self.dependency('mechanize', '>= 2.3')
19
+ self.dependency('highline', '>= 1.6.11')
20
+ end
21
+
22
+ require 'newgem/tasks'
23
+ Dir['tasks/**/*.rake'].each { |t| load t }
24
+
25
+ # TODO - want other tests/tasks run by default? Add them to the list
26
+ # remove_task :default
27
+ # task :default => [:spec, :features]
data/bin/serienrenamer ADDED
@@ -0,0 +1,155 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- ruby -*-
3
+ # encoding: UTF-8
4
+
5
+ $LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib')
6
+
7
+ require 'serienrenamer'
8
+ require 'plugin'
9
+ require 'optparse'
10
+ require 'fileutils'
11
+ require "highline/system_extensions"
12
+ include HighLine::SystemExtensions
13
+
14
+ ###
15
+ # Option definition and handling
16
+ options = {}
17
+ opts = OptionParser.new("Usage: #{$0} [OPTIONS] DIR")
18
+ opts.separator("")
19
+ opts.separator("Ruby Script that brings your series into an")
20
+ opts.separator("appropriate format like 'S01E01 - Episodename.avi'")
21
+ opts.separator("")
22
+ opts.separator(" Options:")
23
+
24
+ opts.on( "-p", "--plugin STRING", String,
25
+ "use only this plugin") do |opt|
26
+ options[:plugin] = opt
27
+ end
28
+
29
+ opts.on( "-s", "--series STRING", String,
30
+ "series name that will be set for all episodes") do |opt|
31
+ options[:series] = opt
32
+ end
33
+
34
+ opts.on( "-S", "--[no-]season",
35
+ "DIR contains episodes of one season of one series") do |opt|
36
+ options[:is_single_season] = opt
37
+ end
38
+
39
+ opts.on( "-i", "--[no-]ignore-filenamedata",
40
+ "Always ask plugins for episode information") do |opt|
41
+ options[:ignore_filenamedata] = opt
42
+ end
43
+
44
+ opts.on( "-a", "--[no-]all",
45
+ "Process all files (including right formatted files)") do |opt|
46
+ options[:process_all_files] = opt
47
+ end
48
+
49
+ opts.separator("")
50
+ opts.separator(" Arguments:")
51
+ opts.separator(" DIR The path that includes the episodes")
52
+ opts.separator(" defaults to ~/Downloads")
53
+ opts.separator("")
54
+
55
+ rest = opts.permute(ARGV)
56
+
57
+ ################
58
+ # Load plugins #
59
+ ################
60
+ Dir[File.join(File.dirname(__FILE__),"../lib/plugin/*.rb")].each do |plugin|
61
+ load plugin
62
+ end
63
+ Serienrenamer::Pluginbase.registered_plugins.sort! {|x,y| y.priority <=> x.priority }
64
+
65
+ puts "Plugins loaded: #{Serienrenamer::Pluginbase.registered_plugins.inspect}"
66
+ puts ""
67
+
68
+ # change into DIR
69
+ episode_directory = rest.pop || File.join( File.expand_path('~'), "Downloads" )
70
+
71
+ fail "'#{episode_directory}' does not exist or is not a directory" unless
72
+ Dir.exists?(episode_directory)
73
+
74
+ Dir.chdir(episode_directory)
75
+
76
+ #########################################
77
+ # Iterate through all directory entries #
78
+ #########################################
79
+ begin
80
+
81
+ for entry in Dir.entries('.').sort do
82
+
83
+ next if entry.match(/^\./)
84
+ next unless Serienrenamer::Episode.determine_video_file(entry)
85
+
86
+ # skip files that already have the right format
87
+ unless options[:process_all_files]
88
+ next if entry.match(/^S\d+E\d+.-.\w+.*\.\w+$/)
89
+ end
90
+
91
+ begin
92
+ epi = Serienrenamer::Episode.new(entry)
93
+ if options[:series]
94
+ epi.series = options[:series]
95
+ end
96
+ rescue => e
97
+ next
98
+ end
99
+
100
+ puts "<<< #{entry}"
101
+
102
+ # if episodename is empty than query plugins
103
+ if epi.episodename.match(/\w+/).nil? || options[:ignore_filenamedata]
104
+
105
+ Serienrenamer::Pluginbase.registered_plugins.each do |plugin|
106
+ # skip plugins that are not feasable
107
+ next unless plugin.usable
108
+ next unless plugin.respond_to?(:generate_episode_information)
109
+ if options[:plugin]
110
+ next unless plugin.plugin_name.match(/#{options[:plugin]}/i)
111
+ end
112
+
113
+ # configure cleanup
114
+ clean_data, extract_seriesname = false, false
115
+ case plugin.plugin_name
116
+ when "Textfile"
117
+ clean_data, extract_seriesname = true, true
118
+ when "SerienjunkiesOrgFeed"
119
+ clean_data = true
120
+ end
121
+
122
+ extract_seriesname = false if options[:series]
123
+
124
+ # ask plugin for information
125
+ epiname = plugin.generate_episode_information(epi)[0]
126
+ next if epiname == nil
127
+
128
+ puts "[#{plugin.plugin_name}] - #{epiname}"
129
+
130
+ epi.add_episode_information(epiname, clean_data, extract_seriesname)
131
+ next unless epi.episodename.match(/\w+/)
132
+
133
+ break
134
+ end
135
+ end
136
+
137
+ puts ">>> #{epi.to_s}"
138
+
139
+ print "Filename okay ([jy]/n): "
140
+ char = get_character
141
+ print char.chr
142
+
143
+ unless char.chr.match(/[jy\r]/i)
144
+ puts "\nwill be skipped ...\n\n"
145
+ next
146
+ end
147
+
148
+ puts "\n\n"
149
+
150
+ epi.rename()
151
+ end
152
+
153
+ rescue Interrupt => e
154
+ puts
155
+ end
@@ -0,0 +1,129 @@
1
+ #
2
+ # Class that extracts information about episodes
3
+ # from the serienjunkies.de-Page
4
+ #
5
+ require 'uri'
6
+ require 'mechanize'
7
+
8
+ module Plugin
9
+
10
+ class SerienjunkiesDe < Serienrenamer::Pluginbase
11
+
12
+ def self.plugin_name; "SerienjunkiesDe" end
13
+ def self.plugin_url; "http://serienjunkies.de" end
14
+ def self.usable; true end
15
+ def self.priority; 4 end
16
+
17
+ # this method will be called from the main program
18
+ # with an Serienrenamer::Episode instance as parameter
19
+ #
20
+ # if this is the first call to this method, it builds up
21
+ # a hash with all series and existing episodes, which can
22
+ # be used by all future method calls
23
+ #
24
+ def self.generate_episode_information(episode)
25
+
26
+ raise ArgumentError, "Serienrenamer::Episode instance needed" unless
27
+ episode.is_a?(Serienrenamer::Episode)
28
+
29
+ unless defined? @cached_data
30
+ @cached_data = Hash.new
31
+ end
32
+
33
+ if ! @cached_data.has_key?(episode.series)
34
+
35
+ if episode.series.match(/\w+/)
36
+
37
+ # determine link to series
38
+ seriespage_link = self.find_link_to_series_page(episode.series)
39
+
40
+ if seriespage_link
41
+ seriesdata = self.parse_seriespage(seriespage_link)
42
+
43
+ @cached_data[episode.series] = seriesdata
44
+ end
45
+ end
46
+ end
47
+
48
+ matched_episodes = []
49
+
50
+ # tries to find an episodename in cached_data
51
+ # otherwise returns empty array
52
+ begin
53
+ series = @cached_data[episode.series]
54
+ identifier = "S%.2dE%.2d" % [ episode.season, episode.episode ]
55
+ episodename = series[identifier]
56
+
57
+ if episodename.match(/\w+/)
58
+ matched_episodes.push(episodename)
59
+ end
60
+ rescue
61
+ end
62
+
63
+ return matched_episodes
64
+ end
65
+
66
+ # tries to find the link to the series page because there are
67
+ # plenty of different writings of some series
68
+ # :seriesname: - name of the series
69
+ #
70
+ # TODO make this more intelligent so that it tries other forms
71
+ # of the name
72
+ #
73
+ # returns a link to a seriejunkies.de-page or nil if no page was found
74
+ def self.find_link_to_series_page(seriesname)
75
+ raise ArgumentError, "seriesname expected" unless seriesname.match(/\w+/)
76
+
77
+ self.build_agent unless defined? @agent
78
+
79
+ url = URI.join(plugin_url, "serien/%s.html" % seriesname[0].downcase )
80
+
81
+ @agent.get(url).search("a.slink").each do |series|
82
+ if series.text.match(/#{seriesname}/i)
83
+ return URI.join( plugin_url, series[:href]).to_s
84
+ end
85
+ end
86
+
87
+ return nil
88
+ end
89
+
90
+ # parses the supplied url and returns a hash with
91
+ # episode information indexed by episode identifier
92
+ # :page_url: - url of the serienjunkies page
93
+ # :german: - extract only german titles if true
94
+ def self.parse_seriespage(page_url, german=true)
95
+
96
+ self.build_agent unless defined? @agent
97
+
98
+ series = {}
99
+
100
+ seriesdoc = @agent.get(page_url)
101
+ epidoc = @agent.click(seriesdoc.link_with(:text => /^Episoden$/i))
102
+
103
+ epidoc.search('div#sjserie > div.topabstand > table.eplist tr').each do |episode|
104
+
105
+ next unless episode.search("td.thh").empty? # skip headings
106
+
107
+ firstchild = episode.search(":first-child")[0].text
108
+ md = firstchild.match(/(?<season>\d+)x(?<episode>\d+)/)
109
+
110
+ next unless md
111
+
112
+ # extract and save these information
113
+ identifier = "S%.2dE%.2d" % [ md[:season].to_i, md[:episode].to_i ]
114
+
115
+ german = episode.search("a")[1]
116
+ next unless german
117
+
118
+ series[identifier] = german.text.strip
119
+ end
120
+
121
+ return series
122
+ end
123
+
124
+ # build up a mechanize instance
125
+ def self.build_agent
126
+ @agent = Mechanize.new
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,105 @@
1
+ #
2
+ # Class that extracts information about episodes
3
+ # from the serienjunkies.org-Feed
4
+ #
5
+ require 'rss'
6
+ require 'open-uri'
7
+
8
+ module Plugin
9
+
10
+ class SerienjunkiesOrgFeed < Serienrenamer::Pluginbase
11
+
12
+ def self.plugin_name; "SerienjunkiesOrgFeed" end
13
+ def self.usable; true end
14
+ def self.priority; 10 end
15
+
16
+ @feed_url = 'http://serienjunkies.org/xml/feeds/episoden.xml'
17
+
18
+ # this method will be called from the main program
19
+ # with an Serienrenamer::Episode instance as parameter
20
+ #
21
+ # if this is the first call to this method, it builds up
22
+ # a hash with all series and existing episodes, which can
23
+ # be used by all future method calls
24
+ #
25
+ def self.generate_episode_information(episode)
26
+
27
+ raise ArgumentError, "Serienrenamer::Episode instance needed" unless
28
+ episode.is_a?(Serienrenamer::Episode)
29
+
30
+ unless defined? @feed_data
31
+ @feed_data = self.build_up_series_data
32
+ end
33
+
34
+ episode_definition = 'S%.2dE%.2d' % [ episode.season, episode.episode ]
35
+
36
+ # search for all items that match the definition
37
+ # and save them uniquely in an array
38
+ matched_definitions = []
39
+ for epi in @feed_data.grep(/#{episode_definition}/)
40
+ serdef = epi.match(/(^.*S\d+E\d+)/)[0]
41
+ exist = matched_definitions.grep(/^#{serdef}/)[0]
42
+
43
+ if exist != nil && epi.length > exist.length
44
+ matched_definitions.delete(exist)
45
+ elsif exist != nil && epi.length < exist.length
46
+ next
47
+ end
48
+
49
+ matched_definitions.push(epi)
50
+ end
51
+
52
+ # find suitable episode string in the array of
53
+ # matched definitions
54
+ #
55
+ # start with a pattern that includes all words from
56
+ # Episode#series and if this does not match, it cuts
57
+ # off the first word and tries to match again
58
+ #
59
+ # if the pattern contains one word and if this
60
+ # still not match, the last word is splitted
61
+ # characterwise, so that:
62
+ # crmi ==> Criminal Minds
63
+ #
64
+ matched_episodes = []
65
+ name_words = episode.series.split(/ /)
66
+ word_splitted = false
67
+
68
+ while ! name_words.empty?
69
+
70
+ pattern = name_words.join('.*')
71
+ matched_episodes = matched_definitions.grep(/#{pattern}.*S\d+E\d+/i)
72
+ break if ! matched_episodes.empty?
73
+
74
+ # split characterwise if last word does not match
75
+ if name_words.length == 1 && ! word_splitted
76
+ name_words = pattern.split(//)
77
+ word_splitted = true
78
+ next
79
+ end
80
+
81
+ name_words.delete_at(0)
82
+ end
83
+
84
+ return matched_episodes
85
+ end
86
+
87
+ # create a list of exisiting episodes
88
+ def self.build_up_series_data
89
+ feed_data = []
90
+
91
+ open(@feed_url) do |rss|
92
+ feed = RSS::Parser.parse(rss)
93
+ feed.items.each do |item|
94
+ feed_data.push(item.title.split(/ /)[1])
95
+ end
96
+ end
97
+ return feed_data
98
+ end
99
+
100
+ # set the feed url (e.g for testing)
101
+ def self.feed_url=(feed)
102
+ @feed_url = feed
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,50 @@
1
+ #
2
+ # Class that searches for a file with
3
+ # episode information in the directory
4
+ # like "episode.txt"
5
+ #
6
+
7
+ module Plugin
8
+
9
+ class Textfile < Serienrenamer::Pluginbase
10
+
11
+ def self.plugin_name; "Textfile" end
12
+ def self.usable; true end
13
+ def self.priority; 100 end
14
+
15
+ # this method will be called from the main program
16
+ # with an Serienrenamer::Episode instance or a path
17
+ # to to a directory as parameter
18
+ #
19
+ # it returns an array of episode information
20
+ def self.generate_episode_information(episode)
21
+
22
+ sourcedir = ""
23
+ if episode.is_a?(Serienrenamer::Episode) && episode.source_directory
24
+ sourcedir = episode.source_directory
25
+ elsif episode.is_a?(String) && File.directory?(episode)
26
+ sourcedir = episode
27
+ end
28
+
29
+ matched_episodes = []
30
+
31
+ if sourcedir != "" && Dir.exists?(sourcedir)
32
+
33
+ # search for files that are smaller than 128 Bytes
34
+ # an check if they contain episode information
35
+ Dir.new(sourcedir).each do |e|
36
+ file = File.join(sourcedir, e)
37
+ next if File.size(file) > 128 || File.zero?(file)
38
+
39
+ data = File.open(file, "rb").read
40
+ if data != nil && data.match(/\w+/) &&
41
+ Serienrenamer::Episode.contains_episode_information?(data)
42
+ matched_episodes.push(data)
43
+ end
44
+ end
45
+ end
46
+
47
+ return matched_episodes
48
+ end
49
+ end
50
+ end