serienrenamer 0.0.14 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/serienrenamer CHANGED
@@ -5,7 +5,6 @@
5
5
  $LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib')
6
6
 
7
7
  require 'serienrenamer'
8
- require 'plugin'
9
8
  require 'optparse'
10
9
  require 'fileutils'
11
10
  require 'hashconfig'
@@ -85,11 +84,6 @@ opts.separator("")
85
84
 
86
85
  rest = opts.permute(ARGV)
87
86
 
88
- ###
89
- # Load plugins #
90
- Dir[File.join(File.dirname(__FILE__),"../lib/plugin/*.rb")].each do |plugin|
91
- load plugin
92
- end
93
87
  Serienrenamer::Pluginbase.registered_plugins.sort! {|x,y| y.priority <=> x.priority }
94
88
 
95
89
  puts "Plugins loaded: #{Serienrenamer::Pluginbase.registered_plugins.inspect}"
@@ -0,0 +1,35 @@
1
+ # class that creates an episodename out of the episode identifier
2
+ # for S02E04 the episodename would be "Episode 4"
3
+
4
+ module Serienrenamer
5
+ module Plugin
6
+
7
+ class EpisodeIdentifier < Serienrenamer::Pluginbase
8
+
9
+ def self.plugin_name; "EpisodeIdentifier" end
10
+ def self.usable; true end
11
+ def self.priority; 1 end
12
+
13
+ # this method will be called from the main program
14
+ # with an Serienrenamer::Episode instance or a path
15
+ # to to a directory as parameter
16
+ #
17
+ # it returns an array of episode information
18
+ def self.generate_episode_information(episode)
19
+
20
+ path = episode.episodepath
21
+
22
+ matched_episodes = []
23
+
24
+ if Serienrenamer::Episode.contains_episode_information?(path)
25
+ if md = Serienrenamer::Episode.extract_episode_information(path)
26
+ episodename = "Episode %d" % [ md[:episode].to_i ]
27
+ matched_episodes << episodename
28
+ end
29
+ end
30
+
31
+ return matched_episodes
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,131 @@
1
+ #
2
+ # Class that extracts information about episodes
3
+ # from the serienjunkies.de-Page
4
+ #
5
+ require 'uri'
6
+ require 'mechanize'
7
+
8
+ module Serienrenamer
9
+ module Plugin
10
+
11
+ class SerienjunkiesDe < Serienrenamer::Pluginbase
12
+
13
+ def self.plugin_name; "SerienjunkiesDe" end
14
+ def self.plugin_url; "http://serienjunkies.de" end
15
+ def self.usable; true end
16
+ def self.priority; 50 end
17
+
18
+ # this method will be called from the main program
19
+ # with an Serienrenamer::Episode instance as parameter
20
+ #
21
+ # if this is the first call to this method, it builds up
22
+ # a hash with all series and existing episodes, which can
23
+ # be used by all future method calls
24
+ #
25
+ def self.generate_episode_information(episode)
26
+
27
+ raise ArgumentError, "Serienrenamer::Episode instance needed" unless
28
+ episode.is_a?(Serienrenamer::Episode)
29
+
30
+ unless defined? @cached_data
31
+ @cached_data = Hash.new
32
+ end
33
+
34
+ if ! @cached_data.has_key?(episode.series)
35
+
36
+ if episode.series.match(/\w+/)
37
+
38
+ # determine link to series
39
+ seriespage_link = self.find_link_to_series_page(episode.series)
40
+
41
+ if seriespage_link
42
+ seriesdata = self.parse_seriespage(seriespage_link)
43
+
44
+ @cached_data[episode.series] = seriesdata
45
+ end
46
+ end
47
+ end
48
+
49
+ matched_episodes = []
50
+
51
+ # tries to find an episodename in cached_data
52
+ # otherwise returns empty array
53
+ begin
54
+ series = @cached_data[episode.series]
55
+ identifier = "S%.2dE%.2d" % [ episode.season, episode.episode ]
56
+ episodename = series[identifier]
57
+
58
+ if episodename.match(/\w+/)
59
+ matched_episodes.push(episodename)
60
+ end
61
+ rescue
62
+ end
63
+
64
+ return matched_episodes
65
+ end
66
+
67
+ # tries to find the link to the series page because there are
68
+ # plenty of different writings of some series
69
+ # :seriesname: - name of the series
70
+ #
71
+ # TODO make this more intelligent so that it tries other forms
72
+ # of the name
73
+ #
74
+ # returns a link to a seriejunkies.de-page or nil if no page was found
75
+ def self.find_link_to_series_page(seriesname)
76
+ raise ArgumentError, "seriesname expected" unless seriesname.match(/\w+/)
77
+
78
+ self.build_agent unless defined? @agent
79
+
80
+ url = URI.join(plugin_url, "serien/%s.html" % seriesname[0].downcase )
81
+
82
+ @agent.get(url).search("a.slink").each do |series|
83
+ if series.text.match(/#{seriesname}/i)
84
+ return URI.join( plugin_url, series[:href]).to_s
85
+ end
86
+ end
87
+
88
+ return nil
89
+ end
90
+
91
+ # parses the supplied url and returns a hash with
92
+ # episode information indexed by episode identifier
93
+ # :page_url: - url of the serienjunkies page
94
+ # :german: - extract only german titles if true
95
+ def self.parse_seriespage(page_url, german=true)
96
+
97
+ self.build_agent unless defined? @agent
98
+
99
+ series = {}
100
+
101
+ seriesdoc = @agent.get(page_url)
102
+ epidoc = @agent.click(seriesdoc.link_with(:text => /^Episoden$/i))
103
+
104
+ epidoc.search('div#sjserie > div.topabstand > table.eplist tr').each do |episode|
105
+
106
+ next unless episode.search("td.thh").empty? # skip headings
107
+
108
+ firstchild = episode.search(":first-child")[0].text
109
+ md = firstchild.match(/(?<season>\d+)x(?<episode>\d+)/)
110
+
111
+ next unless md
112
+
113
+ # extract and save these information
114
+ identifier = "S%.2dE%.2d" % [ md[:season].to_i, md[:episode].to_i ]
115
+
116
+ german = episode.search("a")[1]
117
+ next unless german
118
+
119
+ series[identifier] = german.text.strip
120
+ end
121
+
122
+ return series
123
+ end
124
+
125
+ # build up a mechanize instance
126
+ def self.build_agent
127
+ @agent = Mechanize.new
128
+ end
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,112 @@
1
+ #
2
+ # Class that extracts information about episodes
3
+ # from the serienjunkies.org-Feed
4
+ #
5
+ require 'rss'
6
+ require 'open-uri'
7
+
8
+ module Serienrenamer
9
+ module Plugin
10
+
11
+ class SerienjunkiesOrgFeed < Serienrenamer::Pluginbase
12
+
13
+ def self.plugin_name; "SerienjunkiesOrgFeed" end
14
+ def self.usable; true end
15
+ def self.priority; 80 end
16
+
17
+ @feed_url = 'http://serienjunkies.org/xml/feeds/episoden.xml'
18
+
19
+ # this method will be called from the main program
20
+ # with an Serienrenamer::Episode instance as parameter
21
+ #
22
+ # if this is the first call to this method, it builds up
23
+ # a hash with all series and existing episodes, which can
24
+ # be used by all future method calls
25
+ #
26
+ def self.generate_episode_information(episode, debug=false)
27
+
28
+ raise ArgumentError, "Serienrenamer::Episode instance needed" unless
29
+ episode.is_a?(Serienrenamer::Episode)
30
+
31
+ unless defined? @feed_data
32
+ @feed_data = self.build_up_series_data
33
+ end
34
+
35
+ episode_definition = 'S%.2dE%.2d' % [ episode.season, episode.episode ]
36
+
37
+ # search for all items that match the definition
38
+ # and save them uniquely in an array
39
+ matched_definitions = []
40
+ for epi in @feed_data.grep(/#{episode_definition}/)
41
+ serdef = epi.match(/(^.*S\d+E\d+)/)[0]
42
+ exist = matched_definitions.grep(/^#{serdef}/)[0]
43
+
44
+ if exist != nil && epi.length > exist.length
45
+ matched_definitions.delete(exist)
46
+ elsif exist != nil && epi.length < exist.length
47
+ next
48
+ end
49
+
50
+ matched_definitions.push(epi)
51
+ end
52
+
53
+ # find suitable episode string in the array of
54
+ # matched definitions
55
+ #
56
+ # start with a pattern that includes all words from
57
+ # Episode#series and if this does not match, it cuts
58
+ # off the first word and tries to match again
59
+ #
60
+ # if the pattern contains one word and if this
61
+ # still not match, the last word is splitted
62
+ # characterwise, so that:
63
+ # crmi ==> Criminal Minds
64
+ #
65
+ matched_episodes = []
66
+ name_words = episode.series.split(/ /)
67
+ word_splitted = false
68
+
69
+ while ! name_words.empty?
70
+ p name_words if debug
71
+
72
+ pattern = name_words.join('.*')
73
+ matched_episodes = matched_definitions.grep(/#{pattern}.*S\d+E\d+/i)
74
+ break if ! matched_episodes.empty?
75
+
76
+ # split characterwise if last word does not match
77
+ if name_words.length == 1 && ! word_splitted
78
+ name_words = pattern.split(//)
79
+ word_splitted = true
80
+ next
81
+ end
82
+
83
+ # if last word was splitted and does not match than break
84
+ # and return empty resultset
85
+ break if word_splitted
86
+
87
+ name_words.delete_at(0)
88
+ end
89
+
90
+ return matched_episodes
91
+ end
92
+
93
+ # create a list of exisiting episodes
94
+ def self.build_up_series_data
95
+ feed_data = []
96
+
97
+ open(@feed_url) do |rss|
98
+ feed = RSS::Parser.parse(rss)
99
+ feed.items.each do |item|
100
+ feed_data.push(item.title.split(/ /)[1])
101
+ end
102
+ end
103
+ return feed_data
104
+ end
105
+
106
+ # set the feed url (e.g for testing)
107
+ def self.feed_url=(feed)
108
+ @feed_url = File.absolute_path(feed)
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,181 @@
1
+ #
2
+ # Class that extracts information about episodes
3
+ # from the serienjunkies.org-Page
4
+ #
5
+ require 'uri'
6
+ require 'mechanize'
7
+ require 'yaml'
8
+
9
+ module Serienrenamer
10
+ module Plugin
11
+
12
+ class SerienjunkiesOrg < Serienrenamer::Pluginbase
13
+
14
+ def self.plugin_name; "SerienjunkiesOrg" end
15
+ def self.plugin_url; "http://serienjunkies.org" end
16
+ def self.usable; true end
17
+ def self.priority; 60 end
18
+
19
+ # Public: tries to search for an appropriate episodename
20
+ #
21
+ # if this is the first call to this method, it builds up
22
+ # a hash with all series and existing episodes, which can
23
+ # be used by all future method calls
24
+ #
25
+ # episode - Serienrenamer::Episode instance which holds the information
26
+ #
27
+ # Returns an array of possible episodenames
28
+ def self.generate_episode_information(episode)
29
+
30
+ raise ArgumentError, "Serienrenamer::Episode instance needed" unless
31
+ episode.is_a?(Serienrenamer::Episode)
32
+
33
+ unless defined? @cached_data
34
+ @cached_data = Hash.new
35
+ end
36
+
37
+ if ! @cached_data.has_key?(episode.series)
38
+
39
+ if episode.series.match(/\w+/)
40
+
41
+ # determine link to series
42
+ seriespage_link = self.find_link_to_series_page(episode.series)
43
+
44
+ if seriespage_link
45
+ seriesdata = self.parse_seriespage(seriespage_link)
46
+
47
+ @cached_data[episode.series] = seriesdata
48
+ end
49
+ end
50
+ end
51
+
52
+ matched_episodes = []
53
+
54
+ # tries to find an episodename in cached_data
55
+ # otherwise returns empty array
56
+ begin
57
+ series = @cached_data[episode.series]
58
+
59
+ identifier = "%d_%d" % [ episode.season, episode.episode ]
60
+ episodename = series[identifier]
61
+
62
+ if episodename.match(/\w+/)
63
+ matched_episodes.push(episodename)
64
+ end
65
+ rescue
66
+ end
67
+
68
+ return matched_episodes
69
+ end
70
+
71
+ # Public: tries to find a link to the seriespage
72
+ #
73
+ # seriesname - the series name for which the page is searched
74
+ #
75
+ # Returns the link or nil
76
+ def self.find_link_to_series_page(seriesname)
77
+ raise ArgumentError, "seriesname expected" unless seriesname.match(/\w+/)
78
+
79
+ self.build_agent unless defined? @agent
80
+
81
+ url = URI.join(plugin_url, "?cat=0&l=%s" % seriesname[0].downcase )
82
+
83
+ pattern = seriesname.gsub(/\s/, ".*")
84
+
85
+ @agent.get(url).search("div#sidebar > ul > li > a").each do |series|
86
+ if series.text.match(/#{pattern}/i)
87
+ return URI.join( plugin_url, series[:href]).to_s
88
+ end
89
+ end
90
+
91
+ nil
92
+ end
93
+
94
+ # Public: parses a series page and extracts the episode information
95
+ #
96
+ # page_url - the url to the seriespage
97
+ # german - if true it extracts only german data (Defaults to true)
98
+ #
99
+ # Returns a hash which contains the episode information or an empty
100
+ # hash if there aren't any episodes
101
+ def self.parse_seriespage(page_url, german=true, debug=false)
102
+
103
+ self.build_agent unless defined? @agent
104
+
105
+ series = {}
106
+ doc = @agent.get(page_url)
107
+
108
+ doc.search('div#sidebar > div#scb > div.bkname > a').each do |link|
109
+ if german
110
+ next unless link.content.match(/Staffel/i)
111
+ else
112
+ next unless link.content.match(/Season/i)
113
+ end
114
+
115
+ site = @agent.get(link[:href])
116
+ episodes = self.parse_season_subpage(site, german)
117
+
118
+ series.merge!(episodes)
119
+ end
120
+
121
+ puts series.to_yaml if debug
122
+
123
+ return series
124
+ end
125
+
126
+ # Public: extracts the episodes from one season
127
+ #
128
+ # page - Mechanize page object which holds the season
129
+ # german - extracts german or international episodes
130
+ #
131
+ # Returns a hash with all episodes (unique)
132
+ def self.parse_season_subpage(page, german=true)
133
+
134
+ episodes = {}
135
+
136
+ page.search('div.post > div.post-content strong:nth-child(1)').each do |e|
137
+
138
+ content = e.content
139
+ md = Serienrenamer::Episode.extract_episode_information(content)
140
+ next unless md
141
+
142
+ if german
143
+ next unless content.match(/German/i)
144
+ next if content.match(/Subbed/i)
145
+ else
146
+ next if content.match(/German/i)
147
+ end
148
+
149
+ episodename =
150
+ Serienrenamer::Episode.clean_episode_data(md[:episodename], true)
151
+ next unless episodename && episodename.match(/\w+/)
152
+
153
+ id = "%d_%d" % [ md[:season].to_i, md[:episode].to_i ]
154
+
155
+ next if episodes[id] && episodes[id].size > episodename.size
156
+
157
+ episodes[id] = episodename
158
+
159
+ end
160
+
161
+ return episodes
162
+ end
163
+
164
+ private
165
+
166
+ # Private: constructs a Mechanize instance and adds a fix that interprets
167
+ # every response as html
168
+ #
169
+ # Returns the agent
170
+ def self.build_agent
171
+ @agent = Mechanize.new do |a|
172
+ a.post_connect_hooks << lambda do |_,_,response,_|
173
+ if response.content_type.nil? || response.content_type.empty?
174
+ response.content_type = 'text/html'
175
+ end
176
+ end
177
+ end
178
+ end
179
+ end
180
+ end
181
+ end
@@ -0,0 +1,57 @@
1
+ #
2
+ # Class that searches for a file with
3
+ # episode information in the directory
4
+ # like "episode.txt"
5
+ #
6
+ require 'serienrenamer'
7
+
8
+ module Serienrenamer
9
+ module Plugin
10
+
11
+ class Textfile < Serienrenamer::Pluginbase
12
+
13
+ def self.plugin_name; "Textfile" end
14
+ def self.usable; true end
15
+ def self.priority; 100 end
16
+
17
+ # this method will be called from the main program
18
+ # with an Serienrenamer::Episode instance or a path
19
+ # to to a directory as parameter
20
+ #
21
+ # it returns an array of episode information
22
+ def self.generate_episode_information(episode)
23
+
24
+ sourcedir = ""
25
+ if episode.is_a?(Serienrenamer::Episode) && episode.source_directory
26
+ sourcedir = episode.source_directory
27
+ elsif episode.is_a?(String) && File.directory?(episode)
28
+ sourcedir = episode
29
+ end
30
+
31
+ matched_episodes = []
32
+
33
+ if sourcedir != "" && Dir.exists?(sourcedir)
34
+
35
+ # search for files that are smaller than 128 Bytes
36
+ # an check if they contain episode information
37
+ Dir.new(sourcedir).each do |e|
38
+ file = File.join(sourcedir, e)
39
+ next if File.size(file) > 128 || File.zero?(file)
40
+
41
+ data = File.open(file, "rb").read
42
+
43
+ # only files with one line with the title are interesting
44
+ next if data.lines.to_a.size > 1
45
+
46
+ if data != nil && data.match(/\w+/) &&
47
+ Serienrenamer::Episode.contains_episode_information?(data)
48
+ matched_episodes.push(data)
49
+ end
50
+ end
51
+ end
52
+
53
+ return matched_episodes
54
+ end
55
+ end
56
+ end
57
+ end