serienrenamer 0.0.14 → 0.0.15
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/serienrenamer +0 -6
- data/lib/serienrenamer/plugin/episode_identifier.rb +35 -0
- data/lib/serienrenamer/plugin/serienjunkies_de.rb +131 -0
- data/lib/serienrenamer/plugin/serienjunkies_feed.rb +112 -0
- data/lib/serienrenamer/plugin/serienjunkies_org.rb +181 -0
- data/lib/serienrenamer/plugin/textfile.rb +57 -0
- data/lib/serienrenamer/plugin/wikipedia.rb +448 -0
- data/lib/serienrenamer/plugin.rb +26 -0
- data/lib/serienrenamer/version.rb +1 -1
- data/lib/serienrenamer.rb +4 -25
- data/serienrenamer.gemspec +1 -0
- data/test/test_helper.rb +1 -1
- data/test/test_plugin_episode_identifier.rb +1 -1
- data/test/test_plugin_serienjunkies_de.rb +3 -3
- data/test/test_plugin_serienjunkies_feed.rb +1 -1
- data/test/{test_serienjunkies_org.rb → test_plugin_serienjunkies_org.rb} +3 -3
- data/test/test_plugin_textfile.rb +3 -3
- metadata +12 -12
- data/lib/plugin/episode_identifier.rb +0 -33
- data/lib/plugin/serienjunkies_de.rb +0 -129
- data/lib/plugin/serienjunkies_feed.rb +0 -110
- data/lib/plugin/serienjunkies_org.rb +0 -179
- data/lib/plugin/textfile.rb +0 -54
- data/lib/plugin/wikipedia.rb +0 -446
- data/lib/plugin.rb +0 -8
data/bin/serienrenamer
CHANGED
@@ -5,7 +5,6 @@
|
|
5
5
|
$LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib')
|
6
6
|
|
7
7
|
require 'serienrenamer'
|
8
|
-
require 'plugin'
|
9
8
|
require 'optparse'
|
10
9
|
require 'fileutils'
|
11
10
|
require 'hashconfig'
|
@@ -85,11 +84,6 @@ opts.separator("")
|
|
85
84
|
|
86
85
|
rest = opts.permute(ARGV)
|
87
86
|
|
88
|
-
###
|
89
|
-
# Load plugins #
|
90
|
-
Dir[File.join(File.dirname(__FILE__),"../lib/plugin/*.rb")].each do |plugin|
|
91
|
-
load plugin
|
92
|
-
end
|
93
87
|
Serienrenamer::Pluginbase.registered_plugins.sort! {|x,y| y.priority <=> x.priority }
|
94
88
|
|
95
89
|
puts "Plugins loaded: #{Serienrenamer::Pluginbase.registered_plugins.inspect}"
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# class that creates an episodename out of the episode identifier
|
2
|
+
# for S02E04 the episodename would be "Episode 4"
|
3
|
+
|
4
|
+
module Serienrenamer
|
5
|
+
module Plugin
|
6
|
+
|
7
|
+
class EpisodeIdentifier < Serienrenamer::Pluginbase
|
8
|
+
|
9
|
+
def self.plugin_name; "EpisodeIdentifier" end
|
10
|
+
def self.usable; true end
|
11
|
+
def self.priority; 1 end
|
12
|
+
|
13
|
+
# this method will be called from the main program
|
14
|
+
# with an Serienrenamer::Episode instance or a path
|
15
|
+
# to to a directory as parameter
|
16
|
+
#
|
17
|
+
# it returns an array of episode information
|
18
|
+
def self.generate_episode_information(episode)
|
19
|
+
|
20
|
+
path = episode.episodepath
|
21
|
+
|
22
|
+
matched_episodes = []
|
23
|
+
|
24
|
+
if Serienrenamer::Episode.contains_episode_information?(path)
|
25
|
+
if md = Serienrenamer::Episode.extract_episode_information(path)
|
26
|
+
episodename = "Episode %d" % [ md[:episode].to_i ]
|
27
|
+
matched_episodes << episodename
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
return matched_episodes
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,131 @@
|
|
1
|
+
#
|
2
|
+
# Class that extracts information about episodes
|
3
|
+
# from the serienjunkies.de-Page
|
4
|
+
#
|
5
|
+
require 'uri'
|
6
|
+
require 'mechanize'
|
7
|
+
|
8
|
+
module Serienrenamer
|
9
|
+
module Plugin
|
10
|
+
|
11
|
+
class SerienjunkiesDe < Serienrenamer::Pluginbase
|
12
|
+
|
13
|
+
def self.plugin_name; "SerienjunkiesDe" end
|
14
|
+
def self.plugin_url; "http://serienjunkies.de" end
|
15
|
+
def self.usable; true end
|
16
|
+
def self.priority; 50 end
|
17
|
+
|
18
|
+
# this method will be called from the main program
|
19
|
+
# with an Serienrenamer::Episode instance as parameter
|
20
|
+
#
|
21
|
+
# if this is the first call to this method, it builds up
|
22
|
+
# a hash with all series and existing episodes, which can
|
23
|
+
# be used by all future method calls
|
24
|
+
#
|
25
|
+
def self.generate_episode_information(episode)
|
26
|
+
|
27
|
+
raise ArgumentError, "Serienrenamer::Episode instance needed" unless
|
28
|
+
episode.is_a?(Serienrenamer::Episode)
|
29
|
+
|
30
|
+
unless defined? @cached_data
|
31
|
+
@cached_data = Hash.new
|
32
|
+
end
|
33
|
+
|
34
|
+
if ! @cached_data.has_key?(episode.series)
|
35
|
+
|
36
|
+
if episode.series.match(/\w+/)
|
37
|
+
|
38
|
+
# determine link to series
|
39
|
+
seriespage_link = self.find_link_to_series_page(episode.series)
|
40
|
+
|
41
|
+
if seriespage_link
|
42
|
+
seriesdata = self.parse_seriespage(seriespage_link)
|
43
|
+
|
44
|
+
@cached_data[episode.series] = seriesdata
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
matched_episodes = []
|
50
|
+
|
51
|
+
# tries to find an episodename in cached_data
|
52
|
+
# otherwise returns empty array
|
53
|
+
begin
|
54
|
+
series = @cached_data[episode.series]
|
55
|
+
identifier = "S%.2dE%.2d" % [ episode.season, episode.episode ]
|
56
|
+
episodename = series[identifier]
|
57
|
+
|
58
|
+
if episodename.match(/\w+/)
|
59
|
+
matched_episodes.push(episodename)
|
60
|
+
end
|
61
|
+
rescue
|
62
|
+
end
|
63
|
+
|
64
|
+
return matched_episodes
|
65
|
+
end
|
66
|
+
|
67
|
+
# tries to find the link to the series page because there are
|
68
|
+
# plenty of different writings of some series
|
69
|
+
# :seriesname: - name of the series
|
70
|
+
#
|
71
|
+
# TODO make this more intelligent so that it tries other forms
|
72
|
+
# of the name
|
73
|
+
#
|
74
|
+
# returns a link to a seriejunkies.de-page or nil if no page was found
|
75
|
+
def self.find_link_to_series_page(seriesname)
|
76
|
+
raise ArgumentError, "seriesname expected" unless seriesname.match(/\w+/)
|
77
|
+
|
78
|
+
self.build_agent unless defined? @agent
|
79
|
+
|
80
|
+
url = URI.join(plugin_url, "serien/%s.html" % seriesname[0].downcase )
|
81
|
+
|
82
|
+
@agent.get(url).search("a.slink").each do |series|
|
83
|
+
if series.text.match(/#{seriesname}/i)
|
84
|
+
return URI.join( plugin_url, series[:href]).to_s
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
return nil
|
89
|
+
end
|
90
|
+
|
91
|
+
# parses the supplied url and returns a hash with
|
92
|
+
# episode information indexed by episode identifier
|
93
|
+
# :page_url: - url of the serienjunkies page
|
94
|
+
# :german: - extract only german titles if true
|
95
|
+
def self.parse_seriespage(page_url, german=true)
|
96
|
+
|
97
|
+
self.build_agent unless defined? @agent
|
98
|
+
|
99
|
+
series = {}
|
100
|
+
|
101
|
+
seriesdoc = @agent.get(page_url)
|
102
|
+
epidoc = @agent.click(seriesdoc.link_with(:text => /^Episoden$/i))
|
103
|
+
|
104
|
+
epidoc.search('div#sjserie > div.topabstand > table.eplist tr').each do |episode|
|
105
|
+
|
106
|
+
next unless episode.search("td.thh").empty? # skip headings
|
107
|
+
|
108
|
+
firstchild = episode.search(":first-child")[0].text
|
109
|
+
md = firstchild.match(/(?<season>\d+)x(?<episode>\d+)/)
|
110
|
+
|
111
|
+
next unless md
|
112
|
+
|
113
|
+
# extract and save these information
|
114
|
+
identifier = "S%.2dE%.2d" % [ md[:season].to_i, md[:episode].to_i ]
|
115
|
+
|
116
|
+
german = episode.search("a")[1]
|
117
|
+
next unless german
|
118
|
+
|
119
|
+
series[identifier] = german.text.strip
|
120
|
+
end
|
121
|
+
|
122
|
+
return series
|
123
|
+
end
|
124
|
+
|
125
|
+
# build up a mechanize instance
|
126
|
+
def self.build_agent
|
127
|
+
@agent = Mechanize.new
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
#
|
2
|
+
# Class that extracts information about episodes
|
3
|
+
# from the serienjunkies.org-Feed
|
4
|
+
#
|
5
|
+
require 'rss'
|
6
|
+
require 'open-uri'
|
7
|
+
|
8
|
+
module Serienrenamer
|
9
|
+
module Plugin
|
10
|
+
|
11
|
+
class SerienjunkiesOrgFeed < Serienrenamer::Pluginbase
|
12
|
+
|
13
|
+
def self.plugin_name; "SerienjunkiesOrgFeed" end
|
14
|
+
def self.usable; true end
|
15
|
+
def self.priority; 80 end
|
16
|
+
|
17
|
+
@feed_url = 'http://serienjunkies.org/xml/feeds/episoden.xml'
|
18
|
+
|
19
|
+
# this method will be called from the main program
|
20
|
+
# with an Serienrenamer::Episode instance as parameter
|
21
|
+
#
|
22
|
+
# if this is the first call to this method, it builds up
|
23
|
+
# a hash with all series and existing episodes, which can
|
24
|
+
# be used by all future method calls
|
25
|
+
#
|
26
|
+
def self.generate_episode_information(episode, debug=false)
|
27
|
+
|
28
|
+
raise ArgumentError, "Serienrenamer::Episode instance needed" unless
|
29
|
+
episode.is_a?(Serienrenamer::Episode)
|
30
|
+
|
31
|
+
unless defined? @feed_data
|
32
|
+
@feed_data = self.build_up_series_data
|
33
|
+
end
|
34
|
+
|
35
|
+
episode_definition = 'S%.2dE%.2d' % [ episode.season, episode.episode ]
|
36
|
+
|
37
|
+
# search for all items that match the definition
|
38
|
+
# and save them uniquely in an array
|
39
|
+
matched_definitions = []
|
40
|
+
for epi in @feed_data.grep(/#{episode_definition}/)
|
41
|
+
serdef = epi.match(/(^.*S\d+E\d+)/)[0]
|
42
|
+
exist = matched_definitions.grep(/^#{serdef}/)[0]
|
43
|
+
|
44
|
+
if exist != nil && epi.length > exist.length
|
45
|
+
matched_definitions.delete(exist)
|
46
|
+
elsif exist != nil && epi.length < exist.length
|
47
|
+
next
|
48
|
+
end
|
49
|
+
|
50
|
+
matched_definitions.push(epi)
|
51
|
+
end
|
52
|
+
|
53
|
+
# find suitable episode string in the array of
|
54
|
+
# matched definitions
|
55
|
+
#
|
56
|
+
# start with a pattern that includes all words from
|
57
|
+
# Episode#series and if this does not match, it cuts
|
58
|
+
# off the first word and tries to match again
|
59
|
+
#
|
60
|
+
# if the pattern contains one word and if this
|
61
|
+
# still not match, the last word is splitted
|
62
|
+
# characterwise, so that:
|
63
|
+
# crmi ==> Criminal Minds
|
64
|
+
#
|
65
|
+
matched_episodes = []
|
66
|
+
name_words = episode.series.split(/ /)
|
67
|
+
word_splitted = false
|
68
|
+
|
69
|
+
while ! name_words.empty?
|
70
|
+
p name_words if debug
|
71
|
+
|
72
|
+
pattern = name_words.join('.*')
|
73
|
+
matched_episodes = matched_definitions.grep(/#{pattern}.*S\d+E\d+/i)
|
74
|
+
break if ! matched_episodes.empty?
|
75
|
+
|
76
|
+
# split characterwise if last word does not match
|
77
|
+
if name_words.length == 1 && ! word_splitted
|
78
|
+
name_words = pattern.split(//)
|
79
|
+
word_splitted = true
|
80
|
+
next
|
81
|
+
end
|
82
|
+
|
83
|
+
# if last word was splitted and does not match than break
|
84
|
+
# and return empty resultset
|
85
|
+
break if word_splitted
|
86
|
+
|
87
|
+
name_words.delete_at(0)
|
88
|
+
end
|
89
|
+
|
90
|
+
return matched_episodes
|
91
|
+
end
|
92
|
+
|
93
|
+
# create a list of exisiting episodes
|
94
|
+
def self.build_up_series_data
|
95
|
+
feed_data = []
|
96
|
+
|
97
|
+
open(@feed_url) do |rss|
|
98
|
+
feed = RSS::Parser.parse(rss)
|
99
|
+
feed.items.each do |item|
|
100
|
+
feed_data.push(item.title.split(/ /)[1])
|
101
|
+
end
|
102
|
+
end
|
103
|
+
return feed_data
|
104
|
+
end
|
105
|
+
|
106
|
+
# set the feed url (e.g for testing)
|
107
|
+
def self.feed_url=(feed)
|
108
|
+
@feed_url = File.absolute_path(feed)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,181 @@
|
|
1
|
+
#
|
2
|
+
# Class that extracts information about episodes
|
3
|
+
# from the serienjunkies.org-Page
|
4
|
+
#
|
5
|
+
require 'uri'
|
6
|
+
require 'mechanize'
|
7
|
+
require 'yaml'
|
8
|
+
|
9
|
+
module Serienrenamer
|
10
|
+
module Plugin
|
11
|
+
|
12
|
+
class SerienjunkiesOrg < Serienrenamer::Pluginbase
|
13
|
+
|
14
|
+
def self.plugin_name; "SerienjunkiesOrg" end
|
15
|
+
def self.plugin_url; "http://serienjunkies.org" end
|
16
|
+
def self.usable; true end
|
17
|
+
def self.priority; 60 end
|
18
|
+
|
19
|
+
# Public: tries to search for an appropriate episodename
|
20
|
+
#
|
21
|
+
# if this is the first call to this method, it builds up
|
22
|
+
# a hash with all series and existing episodes, which can
|
23
|
+
# be used by all future method calls
|
24
|
+
#
|
25
|
+
# episode - Serienrenamer::Episode instance which holds the information
|
26
|
+
#
|
27
|
+
# Returns an array of possible episodenames
|
28
|
+
def self.generate_episode_information(episode)
|
29
|
+
|
30
|
+
raise ArgumentError, "Serienrenamer::Episode instance needed" unless
|
31
|
+
episode.is_a?(Serienrenamer::Episode)
|
32
|
+
|
33
|
+
unless defined? @cached_data
|
34
|
+
@cached_data = Hash.new
|
35
|
+
end
|
36
|
+
|
37
|
+
if ! @cached_data.has_key?(episode.series)
|
38
|
+
|
39
|
+
if episode.series.match(/\w+/)
|
40
|
+
|
41
|
+
# determine link to series
|
42
|
+
seriespage_link = self.find_link_to_series_page(episode.series)
|
43
|
+
|
44
|
+
if seriespage_link
|
45
|
+
seriesdata = self.parse_seriespage(seriespage_link)
|
46
|
+
|
47
|
+
@cached_data[episode.series] = seriesdata
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
matched_episodes = []
|
53
|
+
|
54
|
+
# tries to find an episodename in cached_data
|
55
|
+
# otherwise returns empty array
|
56
|
+
begin
|
57
|
+
series = @cached_data[episode.series]
|
58
|
+
|
59
|
+
identifier = "%d_%d" % [ episode.season, episode.episode ]
|
60
|
+
episodename = series[identifier]
|
61
|
+
|
62
|
+
if episodename.match(/\w+/)
|
63
|
+
matched_episodes.push(episodename)
|
64
|
+
end
|
65
|
+
rescue
|
66
|
+
end
|
67
|
+
|
68
|
+
return matched_episodes
|
69
|
+
end
|
70
|
+
|
71
|
+
# Public: tries to find a link to the seriespage
|
72
|
+
#
|
73
|
+
# seriesname - the series name for which the page is searched
|
74
|
+
#
|
75
|
+
# Returns the link or nil
|
76
|
+
def self.find_link_to_series_page(seriesname)
|
77
|
+
raise ArgumentError, "seriesname expected" unless seriesname.match(/\w+/)
|
78
|
+
|
79
|
+
self.build_agent unless defined? @agent
|
80
|
+
|
81
|
+
url = URI.join(plugin_url, "?cat=0&l=%s" % seriesname[0].downcase )
|
82
|
+
|
83
|
+
pattern = seriesname.gsub(/\s/, ".*")
|
84
|
+
|
85
|
+
@agent.get(url).search("div#sidebar > ul > li > a").each do |series|
|
86
|
+
if series.text.match(/#{pattern}/i)
|
87
|
+
return URI.join( plugin_url, series[:href]).to_s
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
nil
|
92
|
+
end
|
93
|
+
|
94
|
+
# Public: parses a series page and extracts the episode information
|
95
|
+
#
|
96
|
+
# page_url - the url to the seriespage
|
97
|
+
# german - if true it extracts only german data (Defaults to true)
|
98
|
+
#
|
99
|
+
# Returns a hash which contains the episode information or an empty
|
100
|
+
# hash if there aren't any episodes
|
101
|
+
def self.parse_seriespage(page_url, german=true, debug=false)
|
102
|
+
|
103
|
+
self.build_agent unless defined? @agent
|
104
|
+
|
105
|
+
series = {}
|
106
|
+
doc = @agent.get(page_url)
|
107
|
+
|
108
|
+
doc.search('div#sidebar > div#scb > div.bkname > a').each do |link|
|
109
|
+
if german
|
110
|
+
next unless link.content.match(/Staffel/i)
|
111
|
+
else
|
112
|
+
next unless link.content.match(/Season/i)
|
113
|
+
end
|
114
|
+
|
115
|
+
site = @agent.get(link[:href])
|
116
|
+
episodes = self.parse_season_subpage(site, german)
|
117
|
+
|
118
|
+
series.merge!(episodes)
|
119
|
+
end
|
120
|
+
|
121
|
+
puts series.to_yaml if debug
|
122
|
+
|
123
|
+
return series
|
124
|
+
end
|
125
|
+
|
126
|
+
# Public: extracts the episodes from one season
|
127
|
+
#
|
128
|
+
# page - Mechanize page object which holds the season
|
129
|
+
# german - extracts german or international episodes
|
130
|
+
#
|
131
|
+
# Returns a hash with all episodes (unique)
|
132
|
+
def self.parse_season_subpage(page, german=true)
|
133
|
+
|
134
|
+
episodes = {}
|
135
|
+
|
136
|
+
page.search('div.post > div.post-content strong:nth-child(1)').each do |e|
|
137
|
+
|
138
|
+
content = e.content
|
139
|
+
md = Serienrenamer::Episode.extract_episode_information(content)
|
140
|
+
next unless md
|
141
|
+
|
142
|
+
if german
|
143
|
+
next unless content.match(/German/i)
|
144
|
+
next if content.match(/Subbed/i)
|
145
|
+
else
|
146
|
+
next if content.match(/German/i)
|
147
|
+
end
|
148
|
+
|
149
|
+
episodename =
|
150
|
+
Serienrenamer::Episode.clean_episode_data(md[:episodename], true)
|
151
|
+
next unless episodename && episodename.match(/\w+/)
|
152
|
+
|
153
|
+
id = "%d_%d" % [ md[:season].to_i, md[:episode].to_i ]
|
154
|
+
|
155
|
+
next if episodes[id] && episodes[id].size > episodename.size
|
156
|
+
|
157
|
+
episodes[id] = episodename
|
158
|
+
|
159
|
+
end
|
160
|
+
|
161
|
+
return episodes
|
162
|
+
end
|
163
|
+
|
164
|
+
private
|
165
|
+
|
166
|
+
# Private: constructs a Mechanize instance and adds a fix that interprets
|
167
|
+
# every response as html
|
168
|
+
#
|
169
|
+
# Returns the agent
|
170
|
+
def self.build_agent
|
171
|
+
@agent = Mechanize.new do |a|
|
172
|
+
a.post_connect_hooks << lambda do |_,_,response,_|
|
173
|
+
if response.content_type.nil? || response.content_type.empty?
|
174
|
+
response.content_type = 'text/html'
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
#
|
2
|
+
# Class that searches for a file with
|
3
|
+
# episode information in the directory
|
4
|
+
# like "episode.txt"
|
5
|
+
#
|
6
|
+
require 'serienrenamer'
|
7
|
+
|
8
|
+
module Serienrenamer
|
9
|
+
module Plugin
|
10
|
+
|
11
|
+
class Textfile < Serienrenamer::Pluginbase
|
12
|
+
|
13
|
+
def self.plugin_name; "Textfile" end
|
14
|
+
def self.usable; true end
|
15
|
+
def self.priority; 100 end
|
16
|
+
|
17
|
+
# this method will be called from the main program
|
18
|
+
# with an Serienrenamer::Episode instance or a path
|
19
|
+
# to to a directory as parameter
|
20
|
+
#
|
21
|
+
# it returns an array of episode information
|
22
|
+
def self.generate_episode_information(episode)
|
23
|
+
|
24
|
+
sourcedir = ""
|
25
|
+
if episode.is_a?(Serienrenamer::Episode) && episode.source_directory
|
26
|
+
sourcedir = episode.source_directory
|
27
|
+
elsif episode.is_a?(String) && File.directory?(episode)
|
28
|
+
sourcedir = episode
|
29
|
+
end
|
30
|
+
|
31
|
+
matched_episodes = []
|
32
|
+
|
33
|
+
if sourcedir != "" && Dir.exists?(sourcedir)
|
34
|
+
|
35
|
+
# search for files that are smaller than 128 Bytes
|
36
|
+
# an check if they contain episode information
|
37
|
+
Dir.new(sourcedir).each do |e|
|
38
|
+
file = File.join(sourcedir, e)
|
39
|
+
next if File.size(file) > 128 || File.zero?(file)
|
40
|
+
|
41
|
+
data = File.open(file, "rb").read
|
42
|
+
|
43
|
+
# only files with one line with the title are interesting
|
44
|
+
next if data.lines.to_a.size > 1
|
45
|
+
|
46
|
+
if data != nil && data.match(/\w+/) &&
|
47
|
+
Serienrenamer::Episode.contains_episode_information?(data)
|
48
|
+
matched_episodes.push(data)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
return matched_episodes
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|