serienrenamer 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gemtest ADDED
File without changes
data/History.txt ADDED
@@ -0,0 +1,4 @@
1
+ === 0.0.1 2012-02-01
2
+
3
+ * 1 major enhancement:
4
+ * Initial release
data/Manifest.txt ADDED
@@ -0,0 +1,23 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.rdoc
4
+ Rakefile
5
+ bin/serienrenamer
6
+ lib/plugin.rb
7
+ lib/plugin/serienjunkies_de.rb
8
+ lib/plugin/serienjunkies_feed.rb
9
+ lib/plugin/textfile.rb
10
+ lib/plugin/wikipedia.rb
11
+ lib/serienrenamer.rb
12
+ lib/serienrenamer/episode.rb
13
+ script/console
14
+ script/destroy
15
+ script/generate
16
+ serienrenamer.gemspec
17
+ test/serienjunkies_feed_sample.xml
18
+ test/test_episode.rb
19
+ test/test_helper.rb
20
+ test/test_plugin_serienjunkies_de.rb
21
+ test/test_plugin_serienjunkies_feed.rb
22
+ test/test_plugin_textfile.rb
23
+ test/test_plugin_wikipedia.rb
data/README.rdoc ADDED
@@ -0,0 +1,52 @@
1
+ = serienrenamer
2
+
3
+ * http://github.com/pboehm/serienrenamer
4
+
5
+ == DESCRIPTION:
6
+
7
+ Ruby Script that brings your series into an appropriate format
8
+ like "S01E01 - Episodename.avi"
9
+
10
+ == FEATURES/PROBLEMS:
11
+
12
+ * extract information from episode files in various formats
13
+ * clean these information and bring that into a recommended format
14
+ * search for information in specific textfiles like "filename.txt"
15
+ * query the seriejunkies.org-feed for episode information
16
+ * query the serienjunkies.de Page for series specific data
17
+ * query wikipedia for episode information
18
+ * repair broken german umlauts if the occur in the episode title
19
+ * rename these files
20
+
21
+ == REQUIREMENTS:
22
+
23
+ * ruby (>= 1.9)
24
+ * wlapi
25
+ * mediawiki_gateway
26
+ * highline
27
+ * nokogiri
28
+ * mechanize
29
+
30
+ == INSTALL:
31
+
32
+ * FIX (sudo gem install, anything else)
33
+
34
+ == LICENSE:
35
+
36
+ (General Public License)
37
+
38
+ Copyright (c) 2012 Philipp Böhm
39
+
40
+ This program is free software; you can redistribute it and/or modify
41
+ it under the terms of the GNU General Public License as published by
42
+ the Free Software Foundation in version 3 of the License.
43
+
44
+ This program is distributed in the hope that it will be useful,
45
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
46
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
47
+ GNU General Public License for more details.
48
+
49
+ You should have received a copy of the GNU General Public License
50
+ along with this program; if not, write to the Free Software
51
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
52
+ MA 02110-1301, USA.
data/Rakefile ADDED
@@ -0,0 +1,27 @@
1
+ # encoding: UTF-8
2
+ require 'rubygems'
3
+ gem 'hoe', '>= 2.1.0'
4
+ require 'hoe'
5
+ require 'fileutils'
6
+ require './lib/serienrenamer'
7
+ require './lib/plugin'
8
+
9
+ Hoe.plugin :newgem
10
+
11
+ # Generate all the Rake tasks
12
+ # Run 'rake -T' to see list of generated tasks (from gem root directory)
13
+ $hoe = Hoe.spec 'serienrenamer' do
14
+ self.developer 'Philipp Boehm', 'philipp@i77i.de'
15
+ self.rubyforge_name = self.name
16
+ self.dependency('wlapi', '>= 0.8.4')
17
+ self.dependency('mediawiki-gateway', '>= 0.4.4')
18
+ self.dependency('mechanize', '>= 2.3')
19
+ self.dependency('highline', '>= 1.6.11')
20
+ end
21
+
22
+ require 'newgem/tasks'
23
+ Dir['tasks/**/*.rake'].each { |t| load t }
24
+
25
+ # TODO - want other tests/tasks run by default? Add them to the list
26
+ # remove_task :default
27
+ # task :default => [:spec, :features]
data/bin/serienrenamer ADDED
@@ -0,0 +1,155 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- ruby -*-
3
+ # encoding: UTF-8
4
+
5
+ $LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib')
6
+
7
+ require 'serienrenamer'
8
+ require 'plugin'
9
+ require 'optparse'
10
+ require 'fileutils'
11
+ require "highline/system_extensions"
12
+ include HighLine::SystemExtensions
13
+
14
+ ###
15
+ # Option definition and handling
16
+ options = {}
17
+ opts = OptionParser.new("Usage: #{$0} [OPTIONS] DIR")
18
+ opts.separator("")
19
+ opts.separator("Ruby Script that brings your series into an")
20
+ opts.separator("appropriate format like 'S01E01 - Episodename.avi'")
21
+ opts.separator("")
22
+ opts.separator(" Options:")
23
+
24
+ opts.on( "-p", "--plugin STRING", String,
25
+ "use only this plugin") do |opt|
26
+ options[:plugin] = opt
27
+ end
28
+
29
+ opts.on( "-s", "--series STRING", String,
30
+ "series name that will be set for all episodes") do |opt|
31
+ options[:series] = opt
32
+ end
33
+
34
+ opts.on( "-S", "--[no-]season",
35
+ "DIR contains episodes of one season of one series") do |opt|
36
+ options[:is_single_season] = opt
37
+ end
38
+
39
+ opts.on( "-i", "--[no-]ignore-filenamedata",
40
+ "Always ask plugins for episode information") do |opt|
41
+ options[:ignore_filenamedata] = opt
42
+ end
43
+
44
+ opts.on( "-a", "--[no-]all",
45
+ "Process all files (including right formatted files)") do |opt|
46
+ options[:process_all_files] = opt
47
+ end
48
+
49
+ opts.separator("")
50
+ opts.separator(" Arguments:")
51
+ opts.separator(" DIR The path that includes the episodes")
52
+ opts.separator(" defaults to ~/Downloads")
53
+ opts.separator("")
54
+
55
+ rest = opts.permute(ARGV)
56
+
57
+ ################
58
+ # Load plugins #
59
+ ################
60
+ Dir[File.join(File.dirname(__FILE__),"../lib/plugin/*.rb")].each do |plugin|
61
+ load plugin
62
+ end
63
+ Serienrenamer::Pluginbase.registered_plugins.sort! {|x,y| y.priority <=> x.priority }
64
+
65
+ puts "Plugins loaded: #{Serienrenamer::Pluginbase.registered_plugins.inspect}"
66
+ puts ""
67
+
68
+ # change into DIR
69
+ episode_directory = rest.pop || File.join( File.expand_path('~'), "Downloads" )
70
+
71
+ fail "'#{episode_directory}' does not exist or is not a directory" unless
72
+ Dir.exists?(episode_directory)
73
+
74
+ Dir.chdir(episode_directory)
75
+
76
+ #########################################
77
+ # Iterate through all directory entries #
78
+ #########################################
79
+ begin
80
+
81
+ for entry in Dir.entries('.').sort do
82
+
83
+ next if entry.match(/^\./)
84
+ next unless Serienrenamer::Episode.determine_video_file(entry)
85
+
86
+ # skip files that already have the right format
87
+ unless options[:process_all_files]
88
+ next if entry.match(/^S\d+E\d+.-.\w+.*\.\w+$/)
89
+ end
90
+
91
+ begin
92
+ epi = Serienrenamer::Episode.new(entry)
93
+ if options[:series]
94
+ epi.series = options[:series]
95
+ end
96
+ rescue => e
97
+ next
98
+ end
99
+
100
+ puts "<<< #{entry}"
101
+
102
+ # if episodename is empty than query plugins
103
+ if epi.episodename.match(/\w+/).nil? || options[:ignore_filenamedata]
104
+
105
+ Serienrenamer::Pluginbase.registered_plugins.each do |plugin|
106
+ # skip plugins that are not feasable
107
+ next unless plugin.usable
108
+ next unless plugin.respond_to?(:generate_episode_information)
109
+ if options[:plugin]
110
+ next unless plugin.plugin_name.match(/#{options[:plugin]}/i)
111
+ end
112
+
113
+ # configure cleanup
114
+ clean_data, extract_seriesname = false, false
115
+ case plugin.plugin_name
116
+ when "Textfile"
117
+ clean_data, extract_seriesname = true, true
118
+ when "SerienjunkiesOrgFeed"
119
+ clean_data = true
120
+ end
121
+
122
+ extract_seriesname = false if options[:series]
123
+
124
+ # ask plugin for information
125
+ epiname = plugin.generate_episode_information(epi)[0]
126
+ next if epiname == nil
127
+
128
+ puts "[#{plugin.plugin_name}] - #{epiname}"
129
+
130
+ epi.add_episode_information(epiname, clean_data, extract_seriesname)
131
+ next unless epi.episodename.match(/\w+/)
132
+
133
+ break
134
+ end
135
+ end
136
+
137
+ puts ">>> #{epi.to_s}"
138
+
139
+ print "Filename okay ([jy]/n): "
140
+ char = get_character
141
+ print char.chr
142
+
143
+ unless char.chr.match(/[jy\r]/i)
144
+ puts "\nwill be skipped ...\n\n"
145
+ next
146
+ end
147
+
148
+ puts "\n\n"
149
+
150
+ epi.rename()
151
+ end
152
+
153
+ rescue Interrupt => e
154
+ puts
155
+ end
@@ -0,0 +1,129 @@
1
+ #
2
+ # Class that extracts information about episodes
3
+ # from the serienjunkies.de-Page
4
+ #
5
+ require 'uri'
6
+ require 'mechanize'
7
+
8
+ module Plugin
9
+
10
+ class SerienjunkiesDe < Serienrenamer::Pluginbase
11
+
12
+ def self.plugin_name; "SerienjunkiesDe" end
13
+ def self.plugin_url; "http://serienjunkies.de" end
14
+ def self.usable; true end
15
+ def self.priority; 4 end
16
+
17
+ # this method will be called from the main program
18
+ # with an Serienrenamer::Episode instance as parameter
19
+ #
20
+ # if this is the first call to this method, it builds up
21
+ # a hash with all series and existing episodes, which can
22
+ # be used by all future method calls
23
+ #
24
+ def self.generate_episode_information(episode)
25
+
26
+ raise ArgumentError, "Serienrenamer::Episode instance needed" unless
27
+ episode.is_a?(Serienrenamer::Episode)
28
+
29
+ unless defined? @cached_data
30
+ @cached_data = Hash.new
31
+ end
32
+
33
+ if ! @cached_data.has_key?(episode.series)
34
+
35
+ if episode.series.match(/\w+/)
36
+
37
+ # determine link to series
38
+ seriespage_link = self.find_link_to_series_page(episode.series)
39
+
40
+ if seriespage_link
41
+ seriesdata = self.parse_seriespage(seriespage_link)
42
+
43
+ @cached_data[episode.series] = seriesdata
44
+ end
45
+ end
46
+ end
47
+
48
+ matched_episodes = []
49
+
50
+ # tries to find an episodename in cached_data
51
+ # otherwise returns empty array
52
+ begin
53
+ series = @cached_data[episode.series]
54
+ identifier = "S%.2dE%.2d" % [ episode.season, episode.episode ]
55
+ episodename = series[identifier]
56
+
57
+ if episodename.match(/\w+/)
58
+ matched_episodes.push(episodename)
59
+ end
60
+ rescue
61
+ end
62
+
63
+ return matched_episodes
64
+ end
65
+
66
+ # tries to find the link to the series page because there are
67
+ # plenty of different writings of some series
68
+ # :seriesname: - name of the series
69
+ #
70
+ # TODO make this more intelligent so that it tries other forms
71
+ # of the name
72
+ #
73
+ # returns a link to a seriejunkies.de-page or nil if no page was found
74
+ def self.find_link_to_series_page(seriesname)
75
+ raise ArgumentError, "seriesname expected" unless seriesname.match(/\w+/)
76
+
77
+ self.build_agent unless defined? @agent
78
+
79
+ url = URI.join(plugin_url, "serien/%s.html" % seriesname[0].downcase )
80
+
81
+ @agent.get(url).search("a.slink").each do |series|
82
+ if series.text.match(/#{seriesname}/i)
83
+ return URI.join( plugin_url, series[:href]).to_s
84
+ end
85
+ end
86
+
87
+ return nil
88
+ end
89
+
90
+ # parses the supplied url and returns a hash with
91
+ # episode information indexed by episode identifier
92
+ # :page_url: - url of the serienjunkies page
93
+ # :german: - extract only german titles if true
94
+ def self.parse_seriespage(page_url, german=true)
95
+
96
+ self.build_agent unless defined? @agent
97
+
98
+ series = {}
99
+
100
+ seriesdoc = @agent.get(page_url)
101
+ epidoc = @agent.click(seriesdoc.link_with(:text => /^Episoden$/i))
102
+
103
+ epidoc.search('div#sjserie > div.topabstand > table.eplist tr').each do |episode|
104
+
105
+ next unless episode.search("td.thh").empty? # skip headings
106
+
107
+ firstchild = episode.search(":first-child")[0].text
108
+ md = firstchild.match(/(?<season>\d+)x(?<episode>\d+)/)
109
+
110
+ next unless md
111
+
112
+ # extract and save these information
113
+ identifier = "S%.2dE%.2d" % [ md[:season].to_i, md[:episode].to_i ]
114
+
115
+ german = episode.search("a")[1]
116
+ next unless german
117
+
118
+ series[identifier] = german.text.strip
119
+ end
120
+
121
+ return series
122
+ end
123
+
124
+ # build up a mechanize instance
125
+ def self.build_agent
126
+ @agent = Mechanize.new
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,105 @@
1
+ #
2
+ # Class that extracts information about episodes
3
+ # from the serienjunkies.org-Feed
4
+ #
5
+ require 'rss'
6
+ require 'open-uri'
7
+
8
+ module Plugin
9
+
10
+ class SerienjunkiesOrgFeed < Serienrenamer::Pluginbase
11
+
12
+ def self.plugin_name; "SerienjunkiesOrgFeed" end
13
+ def self.usable; true end
14
+ def self.priority; 10 end
15
+
16
+ @feed_url = 'http://serienjunkies.org/xml/feeds/episoden.xml'
17
+
18
+ # this method will be called from the main program
19
+ # with an Serienrenamer::Episode instance as parameter
20
+ #
21
+ # if this is the first call to this method, it builds up
22
+ # a hash with all series and existing episodes, which can
23
+ # be used by all future method calls
24
+ #
25
+ def self.generate_episode_information(episode)
26
+
27
+ raise ArgumentError, "Serienrenamer::Episode instance needed" unless
28
+ episode.is_a?(Serienrenamer::Episode)
29
+
30
+ unless defined? @feed_data
31
+ @feed_data = self.build_up_series_data
32
+ end
33
+
34
+ episode_definition = 'S%.2dE%.2d' % [ episode.season, episode.episode ]
35
+
36
+ # search for all items that match the definition
37
+ # and save them uniquely in an array
38
+ matched_definitions = []
39
+ for epi in @feed_data.grep(/#{episode_definition}/)
40
+ serdef = epi.match(/(^.*S\d+E\d+)/)[0]
41
+ exist = matched_definitions.grep(/^#{serdef}/)[0]
42
+
43
+ if exist != nil && epi.length > exist.length
44
+ matched_definitions.delete(exist)
45
+ elsif exist != nil && epi.length < exist.length
46
+ next
47
+ end
48
+
49
+ matched_definitions.push(epi)
50
+ end
51
+
52
+ # find suitable episode string in the array of
53
+ # matched definitions
54
+ #
55
+ # start with a pattern that includes all words from
56
+ # Episode#series and if this does not match, it cuts
57
+ # off the first word and tries to match again
58
+ #
59
+ # if the pattern contains one word and if this
60
+ # still not match, the last word is splitted
61
+ # characterwise, so that:
62
+ # crmi ==> Criminal Minds
63
+ #
64
+ matched_episodes = []
65
+ name_words = episode.series.split(/ /)
66
+ word_splitted = false
67
+
68
+ while ! name_words.empty?
69
+
70
+ pattern = name_words.join('.*')
71
+ matched_episodes = matched_definitions.grep(/#{pattern}.*S\d+E\d+/i)
72
+ break if ! matched_episodes.empty?
73
+
74
+ # split characterwise if last word does not match
75
+ if name_words.length == 1 && ! word_splitted
76
+ name_words = pattern.split(//)
77
+ word_splitted = true
78
+ next
79
+ end
80
+
81
+ name_words.delete_at(0)
82
+ end
83
+
84
+ return matched_episodes
85
+ end
86
+
87
+ # create a list of exisiting episodes
88
+ def self.build_up_series_data
89
+ feed_data = []
90
+
91
+ open(@feed_url) do |rss|
92
+ feed = RSS::Parser.parse(rss)
93
+ feed.items.each do |item|
94
+ feed_data.push(item.title.split(/ /)[1])
95
+ end
96
+ end
97
+ return feed_data
98
+ end
99
+
100
+ # set the feed url (e.g for testing)
101
+ def self.feed_url=(feed)
102
+ @feed_url = feed
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,50 @@
1
+ #
2
+ # Class that searches for a file with
3
+ # episode information in the directory
4
+ # like "episode.txt"
5
+ #
6
+
7
+ module Plugin
8
+
9
+ class Textfile < Serienrenamer::Pluginbase
10
+
11
+ def self.plugin_name; "Textfile" end
12
+ def self.usable; true end
13
+ def self.priority; 100 end
14
+
15
+ # this method will be called from the main program
16
+ # with an Serienrenamer::Episode instance or a path
17
+ # to to a directory as parameter
18
+ #
19
+ # it returns an array of episode information
20
+ def self.generate_episode_information(episode)
21
+
22
+ sourcedir = ""
23
+ if episode.is_a?(Serienrenamer::Episode) && episode.source_directory
24
+ sourcedir = episode.source_directory
25
+ elsif episode.is_a?(String) && File.directory?(episode)
26
+ sourcedir = episode
27
+ end
28
+
29
+ matched_episodes = []
30
+
31
+ if sourcedir != "" && Dir.exists?(sourcedir)
32
+
33
+ # search for files that are smaller than 128 Bytes
34
+ # an check if they contain episode information
35
+ Dir.new(sourcedir).each do |e|
36
+ file = File.join(sourcedir, e)
37
+ next if File.size(file) > 128 || File.zero?(file)
38
+
39
+ data = File.open(file, "rb").read
40
+ if data != nil && data.match(/\w+/) &&
41
+ Serienrenamer::Episode.contains_episode_information?(data)
42
+ matched_episodes.push(data)
43
+ end
44
+ end
45
+ end
46
+
47
+ return matched_episodes
48
+ end
49
+ end
50
+ end