serienrenamer 0.0.14 → 0.0.15

Sign up to get free protection for your applications and to get access to all the features.
data/bin/serienrenamer CHANGED
@@ -5,7 +5,6 @@
5
5
  $LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib')
6
6
 
7
7
  require 'serienrenamer'
8
- require 'plugin'
9
8
  require 'optparse'
10
9
  require 'fileutils'
11
10
  require 'hashconfig'
@@ -85,11 +84,6 @@ opts.separator("")
85
84
 
86
85
  rest = opts.permute(ARGV)
87
86
 
88
- ###
89
- # Load plugins #
90
- Dir[File.join(File.dirname(__FILE__),"../lib/plugin/*.rb")].each do |plugin|
91
- load plugin
92
- end
93
87
  Serienrenamer::Pluginbase.registered_plugins.sort! {|x,y| y.priority <=> x.priority }
94
88
 
95
89
  puts "Plugins loaded: #{Serienrenamer::Pluginbase.registered_plugins.inspect}"
@@ -0,0 +1,35 @@
1
+ # class that creates an episodename out of the episode identifier
2
+ # for S02E04 the episodename would be "Episode 4"
3
+
4
+ module Serienrenamer
5
+ module Plugin
6
+
7
+ class EpisodeIdentifier < Serienrenamer::Pluginbase
8
+
9
+ def self.plugin_name; "EpisodeIdentifier" end
10
+ def self.usable; true end
11
+ def self.priority; 1 end
12
+
13
+ # this method will be called from the main program
14
+ # with an Serienrenamer::Episode instance or a path
15
+ # to to a directory as parameter
16
+ #
17
+ # it returns an array of episode information
18
+ def self.generate_episode_information(episode)
19
+
20
+ path = episode.episodepath
21
+
22
+ matched_episodes = []
23
+
24
+ if Serienrenamer::Episode.contains_episode_information?(path)
25
+ if md = Serienrenamer::Episode.extract_episode_information(path)
26
+ episodename = "Episode %d" % [ md[:episode].to_i ]
27
+ matched_episodes << episodename
28
+ end
29
+ end
30
+
31
+ return matched_episodes
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,131 @@
1
+ #
2
+ # Class that extracts information about episodes
3
+ # from the serienjunkies.de-Page
4
+ #
5
+ require 'uri'
6
+ require 'mechanize'
7
+
8
+ module Serienrenamer
9
+ module Plugin
10
+
11
+ class SerienjunkiesDe < Serienrenamer::Pluginbase
12
+
13
+ def self.plugin_name; "SerienjunkiesDe" end
14
+ def self.plugin_url; "http://serienjunkies.de" end
15
+ def self.usable; true end
16
+ def self.priority; 50 end
17
+
18
+ # this method will be called from the main program
19
+ # with an Serienrenamer::Episode instance as parameter
20
+ #
21
+ # if this is the first call to this method, it builds up
22
+ # a hash with all series and existing episodes, which can
23
+ # be used by all future method calls
24
+ #
25
+ def self.generate_episode_information(episode)
26
+
27
+ raise ArgumentError, "Serienrenamer::Episode instance needed" unless
28
+ episode.is_a?(Serienrenamer::Episode)
29
+
30
+ unless defined? @cached_data
31
+ @cached_data = Hash.new
32
+ end
33
+
34
+ if ! @cached_data.has_key?(episode.series)
35
+
36
+ if episode.series.match(/\w+/)
37
+
38
+ # determine link to series
39
+ seriespage_link = self.find_link_to_series_page(episode.series)
40
+
41
+ if seriespage_link
42
+ seriesdata = self.parse_seriespage(seriespage_link)
43
+
44
+ @cached_data[episode.series] = seriesdata
45
+ end
46
+ end
47
+ end
48
+
49
+ matched_episodes = []
50
+
51
+ # tries to find an episodename in cached_data
52
+ # otherwise returns empty array
53
+ begin
54
+ series = @cached_data[episode.series]
55
+ identifier = "S%.2dE%.2d" % [ episode.season, episode.episode ]
56
+ episodename = series[identifier]
57
+
58
+ if episodename.match(/\w+/)
59
+ matched_episodes.push(episodename)
60
+ end
61
+ rescue
62
+ end
63
+
64
+ return matched_episodes
65
+ end
66
+
67
+ # tries to find the link to the series page because there are
68
+ # plenty of different writings of some series
69
+ # :seriesname: - name of the series
70
+ #
71
+ # TODO make this more intelligent so that it tries other forms
72
+ # of the name
73
+ #
74
+ # returns a link to a seriejunkies.de-page or nil if no page was found
75
+ def self.find_link_to_series_page(seriesname)
76
+ raise ArgumentError, "seriesname expected" unless seriesname.match(/\w+/)
77
+
78
+ self.build_agent unless defined? @agent
79
+
80
+ url = URI.join(plugin_url, "serien/%s.html" % seriesname[0].downcase )
81
+
82
+ @agent.get(url).search("a.slink").each do |series|
83
+ if series.text.match(/#{seriesname}/i)
84
+ return URI.join( plugin_url, series[:href]).to_s
85
+ end
86
+ end
87
+
88
+ return nil
89
+ end
90
+
91
+ # parses the supplied url and returns a hash with
92
+ # episode information indexed by episode identifier
93
+ # :page_url: - url of the serienjunkies page
94
+ # :german: - extract only german titles if true
95
+ def self.parse_seriespage(page_url, german=true)
96
+
97
+ self.build_agent unless defined? @agent
98
+
99
+ series = {}
100
+
101
+ seriesdoc = @agent.get(page_url)
102
+ epidoc = @agent.click(seriesdoc.link_with(:text => /^Episoden$/i))
103
+
104
+ epidoc.search('div#sjserie > div.topabstand > table.eplist tr').each do |episode|
105
+
106
+ next unless episode.search("td.thh").empty? # skip headings
107
+
108
+ firstchild = episode.search(":first-child")[0].text
109
+ md = firstchild.match(/(?<season>\d+)x(?<episode>\d+)/)
110
+
111
+ next unless md
112
+
113
+ # extract and save these information
114
+ identifier = "S%.2dE%.2d" % [ md[:season].to_i, md[:episode].to_i ]
115
+
116
+ german = episode.search("a")[1]
117
+ next unless german
118
+
119
+ series[identifier] = german.text.strip
120
+ end
121
+
122
+ return series
123
+ end
124
+
125
+ # build up a mechanize instance
126
+ def self.build_agent
127
+ @agent = Mechanize.new
128
+ end
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,112 @@
1
+ #
2
+ # Class that extracts information about episodes
3
+ # from the serienjunkies.org-Feed
4
+ #
5
+ require 'rss'
6
+ require 'open-uri'
7
+
8
+ module Serienrenamer
9
+ module Plugin
10
+
11
+ class SerienjunkiesOrgFeed < Serienrenamer::Pluginbase
12
+
13
+ def self.plugin_name; "SerienjunkiesOrgFeed" end
14
+ def self.usable; true end
15
+ def self.priority; 80 end
16
+
17
+ @feed_url = 'http://serienjunkies.org/xml/feeds/episoden.xml'
18
+
19
+ # this method will be called from the main program
20
+ # with an Serienrenamer::Episode instance as parameter
21
+ #
22
+ # if this is the first call to this method, it builds up
23
+ # a hash with all series and existing episodes, which can
24
+ # be used by all future method calls
25
+ #
26
+ def self.generate_episode_information(episode, debug=false)
27
+
28
+ raise ArgumentError, "Serienrenamer::Episode instance needed" unless
29
+ episode.is_a?(Serienrenamer::Episode)
30
+
31
+ unless defined? @feed_data
32
+ @feed_data = self.build_up_series_data
33
+ end
34
+
35
+ episode_definition = 'S%.2dE%.2d' % [ episode.season, episode.episode ]
36
+
37
+ # search for all items that match the definition
38
+ # and save them uniquely in an array
39
+ matched_definitions = []
40
+ for epi in @feed_data.grep(/#{episode_definition}/)
41
+ serdef = epi.match(/(^.*S\d+E\d+)/)[0]
42
+ exist = matched_definitions.grep(/^#{serdef}/)[0]
43
+
44
+ if exist != nil && epi.length > exist.length
45
+ matched_definitions.delete(exist)
46
+ elsif exist != nil && epi.length < exist.length
47
+ next
48
+ end
49
+
50
+ matched_definitions.push(epi)
51
+ end
52
+
53
+ # find suitable episode string in the array of
54
+ # matched definitions
55
+ #
56
+ # start with a pattern that includes all words from
57
+ # Episode#series and if this does not match, it cuts
58
+ # off the first word and tries to match again
59
+ #
60
+ # if the pattern contains one word and if this
61
+ # still not match, the last word is splitted
62
+ # characterwise, so that:
63
+ # crmi ==> Criminal Minds
64
+ #
65
+ matched_episodes = []
66
+ name_words = episode.series.split(/ /)
67
+ word_splitted = false
68
+
69
+ while ! name_words.empty?
70
+ p name_words if debug
71
+
72
+ pattern = name_words.join('.*')
73
+ matched_episodes = matched_definitions.grep(/#{pattern}.*S\d+E\d+/i)
74
+ break if ! matched_episodes.empty?
75
+
76
+ # split characterwise if last word does not match
77
+ if name_words.length == 1 && ! word_splitted
78
+ name_words = pattern.split(//)
79
+ word_splitted = true
80
+ next
81
+ end
82
+
83
+ # if last word was splitted and does not match than break
84
+ # and return empty resultset
85
+ break if word_splitted
86
+
87
+ name_words.delete_at(0)
88
+ end
89
+
90
+ return matched_episodes
91
+ end
92
+
93
+ # create a list of exisiting episodes
94
+ def self.build_up_series_data
95
+ feed_data = []
96
+
97
+ open(@feed_url) do |rss|
98
+ feed = RSS::Parser.parse(rss)
99
+ feed.items.each do |item|
100
+ feed_data.push(item.title.split(/ /)[1])
101
+ end
102
+ end
103
+ return feed_data
104
+ end
105
+
106
+ # set the feed url (e.g for testing)
107
+ def self.feed_url=(feed)
108
+ @feed_url = File.absolute_path(feed)
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,181 @@
1
+ #
2
+ # Class that extracts information about episodes
3
+ # from the serienjunkies.org-Page
4
+ #
5
+ require 'uri'
6
+ require 'mechanize'
7
+ require 'yaml'
8
+
9
+ module Serienrenamer
10
+ module Plugin
11
+
12
+ class SerienjunkiesOrg < Serienrenamer::Pluginbase
13
+
14
+ def self.plugin_name; "SerienjunkiesOrg" end
15
+ def self.plugin_url; "http://serienjunkies.org" end
16
+ def self.usable; true end
17
+ def self.priority; 60 end
18
+
19
+ # Public: tries to search for an appropriate episodename
20
+ #
21
+ # if this is the first call to this method, it builds up
22
+ # a hash with all series and existing episodes, which can
23
+ # be used by all future method calls
24
+ #
25
+ # episode - Serienrenamer::Episode instance which holds the information
26
+ #
27
+ # Returns an array of possible episodenames
28
+ def self.generate_episode_information(episode)
29
+
30
+ raise ArgumentError, "Serienrenamer::Episode instance needed" unless
31
+ episode.is_a?(Serienrenamer::Episode)
32
+
33
+ unless defined? @cached_data
34
+ @cached_data = Hash.new
35
+ end
36
+
37
+ if ! @cached_data.has_key?(episode.series)
38
+
39
+ if episode.series.match(/\w+/)
40
+
41
+ # determine link to series
42
+ seriespage_link = self.find_link_to_series_page(episode.series)
43
+
44
+ if seriespage_link
45
+ seriesdata = self.parse_seriespage(seriespage_link)
46
+
47
+ @cached_data[episode.series] = seriesdata
48
+ end
49
+ end
50
+ end
51
+
52
+ matched_episodes = []
53
+
54
+ # tries to find an episodename in cached_data
55
+ # otherwise returns empty array
56
+ begin
57
+ series = @cached_data[episode.series]
58
+
59
+ identifier = "%d_%d" % [ episode.season, episode.episode ]
60
+ episodename = series[identifier]
61
+
62
+ if episodename.match(/\w+/)
63
+ matched_episodes.push(episodename)
64
+ end
65
+ rescue
66
+ end
67
+
68
+ return matched_episodes
69
+ end
70
+
71
+ # Public: tries to find a link to the seriespage
72
+ #
73
+ # seriesname - the series name for which the page is searched
74
+ #
75
+ # Returns the link or nil
76
+ def self.find_link_to_series_page(seriesname)
77
+ raise ArgumentError, "seriesname expected" unless seriesname.match(/\w+/)
78
+
79
+ self.build_agent unless defined? @agent
80
+
81
+ url = URI.join(plugin_url, "?cat=0&l=%s" % seriesname[0].downcase )
82
+
83
+ pattern = seriesname.gsub(/\s/, ".*")
84
+
85
+ @agent.get(url).search("div#sidebar > ul > li > a").each do |series|
86
+ if series.text.match(/#{pattern}/i)
87
+ return URI.join( plugin_url, series[:href]).to_s
88
+ end
89
+ end
90
+
91
+ nil
92
+ end
93
+
94
+ # Public: parses a series page and extracts the episode information
95
+ #
96
+ # page_url - the url to the seriespage
97
+ # german - if true it extracts only german data (Defaults to true)
98
+ #
99
+ # Returns a hash which contains the episode information or an empty
100
+ # hash if there aren't any episodes
101
+ def self.parse_seriespage(page_url, german=true, debug=false)
102
+
103
+ self.build_agent unless defined? @agent
104
+
105
+ series = {}
106
+ doc = @agent.get(page_url)
107
+
108
+ doc.search('div#sidebar > div#scb > div.bkname > a').each do |link|
109
+ if german
110
+ next unless link.content.match(/Staffel/i)
111
+ else
112
+ next unless link.content.match(/Season/i)
113
+ end
114
+
115
+ site = @agent.get(link[:href])
116
+ episodes = self.parse_season_subpage(site, german)
117
+
118
+ series.merge!(episodes)
119
+ end
120
+
121
+ puts series.to_yaml if debug
122
+
123
+ return series
124
+ end
125
+
126
+ # Public: extracts the episodes from one season
127
+ #
128
+ # page - Mechanize page object which holds the season
129
+ # german - extracts german or international episodes
130
+ #
131
+ # Returns a hash with all episodes (unique)
132
+ def self.parse_season_subpage(page, german=true)
133
+
134
+ episodes = {}
135
+
136
+ page.search('div.post > div.post-content strong:nth-child(1)').each do |e|
137
+
138
+ content = e.content
139
+ md = Serienrenamer::Episode.extract_episode_information(content)
140
+ next unless md
141
+
142
+ if german
143
+ next unless content.match(/German/i)
144
+ next if content.match(/Subbed/i)
145
+ else
146
+ next if content.match(/German/i)
147
+ end
148
+
149
+ episodename =
150
+ Serienrenamer::Episode.clean_episode_data(md[:episodename], true)
151
+ next unless episodename && episodename.match(/\w+/)
152
+
153
+ id = "%d_%d" % [ md[:season].to_i, md[:episode].to_i ]
154
+
155
+ next if episodes[id] && episodes[id].size > episodename.size
156
+
157
+ episodes[id] = episodename
158
+
159
+ end
160
+
161
+ return episodes
162
+ end
163
+
164
+ private
165
+
166
+ # Private: constructs a Mechanize instance and adds a fix that interprets
167
+ # every response as html
168
+ #
169
+ # Returns the agent
170
+ def self.build_agent
171
+ @agent = Mechanize.new do |a|
172
+ a.post_connect_hooks << lambda do |_,_,response,_|
173
+ if response.content_type.nil? || response.content_type.empty?
174
+ response.content_type = 'text/html'
175
+ end
176
+ end
177
+ end
178
+ end
179
+ end
180
+ end
181
+ end
@@ -0,0 +1,57 @@
1
+ #
2
+ # Class that searches for a file with
3
+ # episode information in the directory
4
+ # like "episode.txt"
5
+ #
6
+ require 'serienrenamer'
7
+
8
+ module Serienrenamer
9
+ module Plugin
10
+
11
+ class Textfile < Serienrenamer::Pluginbase
12
+
13
+ def self.plugin_name; "Textfile" end
14
+ def self.usable; true end
15
+ def self.priority; 100 end
16
+
17
+ # this method will be called from the main program
18
+ # with an Serienrenamer::Episode instance or a path
19
+ # to to a directory as parameter
20
+ #
21
+ # it returns an array of episode information
22
+ def self.generate_episode_information(episode)
23
+
24
+ sourcedir = ""
25
+ if episode.is_a?(Serienrenamer::Episode) && episode.source_directory
26
+ sourcedir = episode.source_directory
27
+ elsif episode.is_a?(String) && File.directory?(episode)
28
+ sourcedir = episode
29
+ end
30
+
31
+ matched_episodes = []
32
+
33
+ if sourcedir != "" && Dir.exists?(sourcedir)
34
+
35
+ # search for files that are smaller than 128 Bytes
36
+ # an check if they contain episode information
37
+ Dir.new(sourcedir).each do |e|
38
+ file = File.join(sourcedir, e)
39
+ next if File.size(file) > 128 || File.zero?(file)
40
+
41
+ data = File.open(file, "rb").read
42
+
43
+ # only files with one line with the title are interesting
44
+ next if data.lines.to_a.size > 1
45
+
46
+ if data != nil && data.match(/\w+/) &&
47
+ Serienrenamer::Episode.contains_episode_information?(data)
48
+ matched_episodes.push(data)
49
+ end
50
+ end
51
+ end
52
+
53
+ return matched_episodes
54
+ end
55
+ end
56
+ end
57
+ end