serienrenamer 0.0.14 → 0.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/serienrenamer +0 -6
- data/lib/serienrenamer/plugin/episode_identifier.rb +35 -0
- data/lib/serienrenamer/plugin/serienjunkies_de.rb +131 -0
- data/lib/serienrenamer/plugin/serienjunkies_feed.rb +112 -0
- data/lib/serienrenamer/plugin/serienjunkies_org.rb +181 -0
- data/lib/serienrenamer/plugin/textfile.rb +57 -0
- data/lib/serienrenamer/plugin/wikipedia.rb +448 -0
- data/lib/serienrenamer/plugin.rb +26 -0
- data/lib/serienrenamer/version.rb +1 -1
- data/lib/serienrenamer.rb +4 -25
- data/serienrenamer.gemspec +1 -0
- data/test/test_helper.rb +1 -1
- data/test/test_plugin_episode_identifier.rb +1 -1
- data/test/test_plugin_serienjunkies_de.rb +3 -3
- data/test/test_plugin_serienjunkies_feed.rb +1 -1
- data/test/{test_serienjunkies_org.rb → test_plugin_serienjunkies_org.rb} +3 -3
- data/test/test_plugin_textfile.rb +3 -3
- metadata +12 -12
- data/lib/plugin/episode_identifier.rb +0 -33
- data/lib/plugin/serienjunkies_de.rb +0 -129
- data/lib/plugin/serienjunkies_feed.rb +0 -110
- data/lib/plugin/serienjunkies_org.rb +0 -179
- data/lib/plugin/textfile.rb +0 -54
- data/lib/plugin/wikipedia.rb +0 -446
- data/lib/plugin.rb +0 -8
data/bin/serienrenamer
CHANGED
@@ -5,7 +5,6 @@
|
|
5
5
|
$LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib')
|
6
6
|
|
7
7
|
require 'serienrenamer'
|
8
|
-
require 'plugin'
|
9
8
|
require 'optparse'
|
10
9
|
require 'fileutils'
|
11
10
|
require 'hashconfig'
|
@@ -85,11 +84,6 @@ opts.separator("")
|
|
85
84
|
|
86
85
|
rest = opts.permute(ARGV)
|
87
86
|
|
88
|
-
###
|
89
|
-
# Load plugins #
|
90
|
-
Dir[File.join(File.dirname(__FILE__),"../lib/plugin/*.rb")].each do |plugin|
|
91
|
-
load plugin
|
92
|
-
end
|
93
87
|
Serienrenamer::Pluginbase.registered_plugins.sort! {|x,y| y.priority <=> x.priority }
|
94
88
|
|
95
89
|
puts "Plugins loaded: #{Serienrenamer::Pluginbase.registered_plugins.inspect}"
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# class that creates an episodename out of the episode identifier
|
2
|
+
# for S02E04 the episodename would be "Episode 4"
|
3
|
+
|
4
|
+
module Serienrenamer
|
5
|
+
module Plugin
|
6
|
+
|
7
|
+
class EpisodeIdentifier < Serienrenamer::Pluginbase
|
8
|
+
|
9
|
+
def self.plugin_name; "EpisodeIdentifier" end
|
10
|
+
def self.usable; true end
|
11
|
+
def self.priority; 1 end
|
12
|
+
|
13
|
+
# this method will be called from the main program
|
14
|
+
# with an Serienrenamer::Episode instance or a path
|
15
|
+
# to to a directory as parameter
|
16
|
+
#
|
17
|
+
# it returns an array of episode information
|
18
|
+
def self.generate_episode_information(episode)
|
19
|
+
|
20
|
+
path = episode.episodepath
|
21
|
+
|
22
|
+
matched_episodes = []
|
23
|
+
|
24
|
+
if Serienrenamer::Episode.contains_episode_information?(path)
|
25
|
+
if md = Serienrenamer::Episode.extract_episode_information(path)
|
26
|
+
episodename = "Episode %d" % [ md[:episode].to_i ]
|
27
|
+
matched_episodes << episodename
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
return matched_episodes
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,131 @@
|
|
1
|
+
#
|
2
|
+
# Class that extracts information about episodes
|
3
|
+
# from the serienjunkies.de-Page
|
4
|
+
#
|
5
|
+
require 'uri'
|
6
|
+
require 'mechanize'
|
7
|
+
|
8
|
+
module Serienrenamer
|
9
|
+
module Plugin
|
10
|
+
|
11
|
+
class SerienjunkiesDe < Serienrenamer::Pluginbase
|
12
|
+
|
13
|
+
def self.plugin_name; "SerienjunkiesDe" end
|
14
|
+
def self.plugin_url; "http://serienjunkies.de" end
|
15
|
+
def self.usable; true end
|
16
|
+
def self.priority; 50 end
|
17
|
+
|
18
|
+
# this method will be called from the main program
|
19
|
+
# with an Serienrenamer::Episode instance as parameter
|
20
|
+
#
|
21
|
+
# if this is the first call to this method, it builds up
|
22
|
+
# a hash with all series and existing episodes, which can
|
23
|
+
# be used by all future method calls
|
24
|
+
#
|
25
|
+
def self.generate_episode_information(episode)
|
26
|
+
|
27
|
+
raise ArgumentError, "Serienrenamer::Episode instance needed" unless
|
28
|
+
episode.is_a?(Serienrenamer::Episode)
|
29
|
+
|
30
|
+
unless defined? @cached_data
|
31
|
+
@cached_data = Hash.new
|
32
|
+
end
|
33
|
+
|
34
|
+
if ! @cached_data.has_key?(episode.series)
|
35
|
+
|
36
|
+
if episode.series.match(/\w+/)
|
37
|
+
|
38
|
+
# determine link to series
|
39
|
+
seriespage_link = self.find_link_to_series_page(episode.series)
|
40
|
+
|
41
|
+
if seriespage_link
|
42
|
+
seriesdata = self.parse_seriespage(seriespage_link)
|
43
|
+
|
44
|
+
@cached_data[episode.series] = seriesdata
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
matched_episodes = []
|
50
|
+
|
51
|
+
# tries to find an episodename in cached_data
|
52
|
+
# otherwise returns empty array
|
53
|
+
begin
|
54
|
+
series = @cached_data[episode.series]
|
55
|
+
identifier = "S%.2dE%.2d" % [ episode.season, episode.episode ]
|
56
|
+
episodename = series[identifier]
|
57
|
+
|
58
|
+
if episodename.match(/\w+/)
|
59
|
+
matched_episodes.push(episodename)
|
60
|
+
end
|
61
|
+
rescue
|
62
|
+
end
|
63
|
+
|
64
|
+
return matched_episodes
|
65
|
+
end
|
66
|
+
|
67
|
+
# tries to find the link to the series page because there are
|
68
|
+
# plenty of different writings of some series
|
69
|
+
# :seriesname: - name of the series
|
70
|
+
#
|
71
|
+
# TODO make this more intelligent so that it tries other forms
|
72
|
+
# of the name
|
73
|
+
#
|
74
|
+
# returns a link to a seriejunkies.de-page or nil if no page was found
|
75
|
+
def self.find_link_to_series_page(seriesname)
|
76
|
+
raise ArgumentError, "seriesname expected" unless seriesname.match(/\w+/)
|
77
|
+
|
78
|
+
self.build_agent unless defined? @agent
|
79
|
+
|
80
|
+
url = URI.join(plugin_url, "serien/%s.html" % seriesname[0].downcase )
|
81
|
+
|
82
|
+
@agent.get(url).search("a.slink").each do |series|
|
83
|
+
if series.text.match(/#{seriesname}/i)
|
84
|
+
return URI.join( plugin_url, series[:href]).to_s
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
return nil
|
89
|
+
end
|
90
|
+
|
91
|
+
# parses the supplied url and returns a hash with
|
92
|
+
# episode information indexed by episode identifier
|
93
|
+
# :page_url: - url of the serienjunkies page
|
94
|
+
# :german: - extract only german titles if true
|
95
|
+
def self.parse_seriespage(page_url, german=true)
|
96
|
+
|
97
|
+
self.build_agent unless defined? @agent
|
98
|
+
|
99
|
+
series = {}
|
100
|
+
|
101
|
+
seriesdoc = @agent.get(page_url)
|
102
|
+
epidoc = @agent.click(seriesdoc.link_with(:text => /^Episoden$/i))
|
103
|
+
|
104
|
+
epidoc.search('div#sjserie > div.topabstand > table.eplist tr').each do |episode|
|
105
|
+
|
106
|
+
next unless episode.search("td.thh").empty? # skip headings
|
107
|
+
|
108
|
+
firstchild = episode.search(":first-child")[0].text
|
109
|
+
md = firstchild.match(/(?<season>\d+)x(?<episode>\d+)/)
|
110
|
+
|
111
|
+
next unless md
|
112
|
+
|
113
|
+
# extract and save these information
|
114
|
+
identifier = "S%.2dE%.2d" % [ md[:season].to_i, md[:episode].to_i ]
|
115
|
+
|
116
|
+
german = episode.search("a")[1]
|
117
|
+
next unless german
|
118
|
+
|
119
|
+
series[identifier] = german.text.strip
|
120
|
+
end
|
121
|
+
|
122
|
+
return series
|
123
|
+
end
|
124
|
+
|
125
|
+
# build up a mechanize instance
|
126
|
+
def self.build_agent
|
127
|
+
@agent = Mechanize.new
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
#
|
2
|
+
# Class that extracts information about episodes
|
3
|
+
# from the serienjunkies.org-Feed
|
4
|
+
#
|
5
|
+
require 'rss'
|
6
|
+
require 'open-uri'
|
7
|
+
|
8
|
+
module Serienrenamer
|
9
|
+
module Plugin
|
10
|
+
|
11
|
+
class SerienjunkiesOrgFeed < Serienrenamer::Pluginbase
|
12
|
+
|
13
|
+
def self.plugin_name; "SerienjunkiesOrgFeed" end
|
14
|
+
def self.usable; true end
|
15
|
+
def self.priority; 80 end
|
16
|
+
|
17
|
+
@feed_url = 'http://serienjunkies.org/xml/feeds/episoden.xml'
|
18
|
+
|
19
|
+
# this method will be called from the main program
|
20
|
+
# with an Serienrenamer::Episode instance as parameter
|
21
|
+
#
|
22
|
+
# if this is the first call to this method, it builds up
|
23
|
+
# a hash with all series and existing episodes, which can
|
24
|
+
# be used by all future method calls
|
25
|
+
#
|
26
|
+
def self.generate_episode_information(episode, debug=false)
|
27
|
+
|
28
|
+
raise ArgumentError, "Serienrenamer::Episode instance needed" unless
|
29
|
+
episode.is_a?(Serienrenamer::Episode)
|
30
|
+
|
31
|
+
unless defined? @feed_data
|
32
|
+
@feed_data = self.build_up_series_data
|
33
|
+
end
|
34
|
+
|
35
|
+
episode_definition = 'S%.2dE%.2d' % [ episode.season, episode.episode ]
|
36
|
+
|
37
|
+
# search for all items that match the definition
|
38
|
+
# and save them uniquely in an array
|
39
|
+
matched_definitions = []
|
40
|
+
for epi in @feed_data.grep(/#{episode_definition}/)
|
41
|
+
serdef = epi.match(/(^.*S\d+E\d+)/)[0]
|
42
|
+
exist = matched_definitions.grep(/^#{serdef}/)[0]
|
43
|
+
|
44
|
+
if exist != nil && epi.length > exist.length
|
45
|
+
matched_definitions.delete(exist)
|
46
|
+
elsif exist != nil && epi.length < exist.length
|
47
|
+
next
|
48
|
+
end
|
49
|
+
|
50
|
+
matched_definitions.push(epi)
|
51
|
+
end
|
52
|
+
|
53
|
+
# find suitable episode string in the array of
|
54
|
+
# matched definitions
|
55
|
+
#
|
56
|
+
# start with a pattern that includes all words from
|
57
|
+
# Episode#series and if this does not match, it cuts
|
58
|
+
# off the first word and tries to match again
|
59
|
+
#
|
60
|
+
# if the pattern contains one word and if this
|
61
|
+
# still not match, the last word is splitted
|
62
|
+
# characterwise, so that:
|
63
|
+
# crmi ==> Criminal Minds
|
64
|
+
#
|
65
|
+
matched_episodes = []
|
66
|
+
name_words = episode.series.split(/ /)
|
67
|
+
word_splitted = false
|
68
|
+
|
69
|
+
while ! name_words.empty?
|
70
|
+
p name_words if debug
|
71
|
+
|
72
|
+
pattern = name_words.join('.*')
|
73
|
+
matched_episodes = matched_definitions.grep(/#{pattern}.*S\d+E\d+/i)
|
74
|
+
break if ! matched_episodes.empty?
|
75
|
+
|
76
|
+
# split characterwise if last word does not match
|
77
|
+
if name_words.length == 1 && ! word_splitted
|
78
|
+
name_words = pattern.split(//)
|
79
|
+
word_splitted = true
|
80
|
+
next
|
81
|
+
end
|
82
|
+
|
83
|
+
# if last word was splitted and does not match than break
|
84
|
+
# and return empty resultset
|
85
|
+
break if word_splitted
|
86
|
+
|
87
|
+
name_words.delete_at(0)
|
88
|
+
end
|
89
|
+
|
90
|
+
return matched_episodes
|
91
|
+
end
|
92
|
+
|
93
|
+
# create a list of exisiting episodes
|
94
|
+
def self.build_up_series_data
|
95
|
+
feed_data = []
|
96
|
+
|
97
|
+
open(@feed_url) do |rss|
|
98
|
+
feed = RSS::Parser.parse(rss)
|
99
|
+
feed.items.each do |item|
|
100
|
+
feed_data.push(item.title.split(/ /)[1])
|
101
|
+
end
|
102
|
+
end
|
103
|
+
return feed_data
|
104
|
+
end
|
105
|
+
|
106
|
+
# set the feed url (e.g for testing)
|
107
|
+
def self.feed_url=(feed)
|
108
|
+
@feed_url = File.absolute_path(feed)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,181 @@
|
|
1
|
+
#
|
2
|
+
# Class that extracts information about episodes
|
3
|
+
# from the serienjunkies.org-Page
|
4
|
+
#
|
5
|
+
require 'uri'
|
6
|
+
require 'mechanize'
|
7
|
+
require 'yaml'
|
8
|
+
|
9
|
+
module Serienrenamer
|
10
|
+
module Plugin
|
11
|
+
|
12
|
+
class SerienjunkiesOrg < Serienrenamer::Pluginbase
|
13
|
+
|
14
|
+
def self.plugin_name; "SerienjunkiesOrg" end
|
15
|
+
def self.plugin_url; "http://serienjunkies.org" end
|
16
|
+
def self.usable; true end
|
17
|
+
def self.priority; 60 end
|
18
|
+
|
19
|
+
# Public: tries to search for an appropriate episodename
|
20
|
+
#
|
21
|
+
# if this is the first call to this method, it builds up
|
22
|
+
# a hash with all series and existing episodes, which can
|
23
|
+
# be used by all future method calls
|
24
|
+
#
|
25
|
+
# episode - Serienrenamer::Episode instance which holds the information
|
26
|
+
#
|
27
|
+
# Returns an array of possible episodenames
|
28
|
+
def self.generate_episode_information(episode)
|
29
|
+
|
30
|
+
raise ArgumentError, "Serienrenamer::Episode instance needed" unless
|
31
|
+
episode.is_a?(Serienrenamer::Episode)
|
32
|
+
|
33
|
+
unless defined? @cached_data
|
34
|
+
@cached_data = Hash.new
|
35
|
+
end
|
36
|
+
|
37
|
+
if ! @cached_data.has_key?(episode.series)
|
38
|
+
|
39
|
+
if episode.series.match(/\w+/)
|
40
|
+
|
41
|
+
# determine link to series
|
42
|
+
seriespage_link = self.find_link_to_series_page(episode.series)
|
43
|
+
|
44
|
+
if seriespage_link
|
45
|
+
seriesdata = self.parse_seriespage(seriespage_link)
|
46
|
+
|
47
|
+
@cached_data[episode.series] = seriesdata
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
matched_episodes = []
|
53
|
+
|
54
|
+
# tries to find an episodename in cached_data
|
55
|
+
# otherwise returns empty array
|
56
|
+
begin
|
57
|
+
series = @cached_data[episode.series]
|
58
|
+
|
59
|
+
identifier = "%d_%d" % [ episode.season, episode.episode ]
|
60
|
+
episodename = series[identifier]
|
61
|
+
|
62
|
+
if episodename.match(/\w+/)
|
63
|
+
matched_episodes.push(episodename)
|
64
|
+
end
|
65
|
+
rescue
|
66
|
+
end
|
67
|
+
|
68
|
+
return matched_episodes
|
69
|
+
end
|
70
|
+
|
71
|
+
# Public: tries to find a link to the seriespage
|
72
|
+
#
|
73
|
+
# seriesname - the series name for which the page is searched
|
74
|
+
#
|
75
|
+
# Returns the link or nil
|
76
|
+
def self.find_link_to_series_page(seriesname)
|
77
|
+
raise ArgumentError, "seriesname expected" unless seriesname.match(/\w+/)
|
78
|
+
|
79
|
+
self.build_agent unless defined? @agent
|
80
|
+
|
81
|
+
url = URI.join(plugin_url, "?cat=0&l=%s" % seriesname[0].downcase )
|
82
|
+
|
83
|
+
pattern = seriesname.gsub(/\s/, ".*")
|
84
|
+
|
85
|
+
@agent.get(url).search("div#sidebar > ul > li > a").each do |series|
|
86
|
+
if series.text.match(/#{pattern}/i)
|
87
|
+
return URI.join( plugin_url, series[:href]).to_s
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
nil
|
92
|
+
end
|
93
|
+
|
94
|
+
# Public: parses a series page and extracts the episode information
|
95
|
+
#
|
96
|
+
# page_url - the url to the seriespage
|
97
|
+
# german - if true it extracts only german data (Defaults to true)
|
98
|
+
#
|
99
|
+
# Returns a hash which contains the episode information or an empty
|
100
|
+
# hash if there aren't any episodes
|
101
|
+
def self.parse_seriespage(page_url, german=true, debug=false)
|
102
|
+
|
103
|
+
self.build_agent unless defined? @agent
|
104
|
+
|
105
|
+
series = {}
|
106
|
+
doc = @agent.get(page_url)
|
107
|
+
|
108
|
+
doc.search('div#sidebar > div#scb > div.bkname > a').each do |link|
|
109
|
+
if german
|
110
|
+
next unless link.content.match(/Staffel/i)
|
111
|
+
else
|
112
|
+
next unless link.content.match(/Season/i)
|
113
|
+
end
|
114
|
+
|
115
|
+
site = @agent.get(link[:href])
|
116
|
+
episodes = self.parse_season_subpage(site, german)
|
117
|
+
|
118
|
+
series.merge!(episodes)
|
119
|
+
end
|
120
|
+
|
121
|
+
puts series.to_yaml if debug
|
122
|
+
|
123
|
+
return series
|
124
|
+
end
|
125
|
+
|
126
|
+
# Public: extracts the episodes from one season
|
127
|
+
#
|
128
|
+
# page - Mechanize page object which holds the season
|
129
|
+
# german - extracts german or international episodes
|
130
|
+
#
|
131
|
+
# Returns a hash with all episodes (unique)
|
132
|
+
def self.parse_season_subpage(page, german=true)
|
133
|
+
|
134
|
+
episodes = {}
|
135
|
+
|
136
|
+
page.search('div.post > div.post-content strong:nth-child(1)').each do |e|
|
137
|
+
|
138
|
+
content = e.content
|
139
|
+
md = Serienrenamer::Episode.extract_episode_information(content)
|
140
|
+
next unless md
|
141
|
+
|
142
|
+
if german
|
143
|
+
next unless content.match(/German/i)
|
144
|
+
next if content.match(/Subbed/i)
|
145
|
+
else
|
146
|
+
next if content.match(/German/i)
|
147
|
+
end
|
148
|
+
|
149
|
+
episodename =
|
150
|
+
Serienrenamer::Episode.clean_episode_data(md[:episodename], true)
|
151
|
+
next unless episodename && episodename.match(/\w+/)
|
152
|
+
|
153
|
+
id = "%d_%d" % [ md[:season].to_i, md[:episode].to_i ]
|
154
|
+
|
155
|
+
next if episodes[id] && episodes[id].size > episodename.size
|
156
|
+
|
157
|
+
episodes[id] = episodename
|
158
|
+
|
159
|
+
end
|
160
|
+
|
161
|
+
return episodes
|
162
|
+
end
|
163
|
+
|
164
|
+
private
|
165
|
+
|
166
|
+
# Private: constructs a Mechanize instance and adds a fix that interprets
|
167
|
+
# every response as html
|
168
|
+
#
|
169
|
+
# Returns the agent
|
170
|
+
def self.build_agent
|
171
|
+
@agent = Mechanize.new do |a|
|
172
|
+
a.post_connect_hooks << lambda do |_,_,response,_|
|
173
|
+
if response.content_type.nil? || response.content_type.empty?
|
174
|
+
response.content_type = 'text/html'
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
#
|
2
|
+
# Class that searches for a file with
|
3
|
+
# episode information in the directory
|
4
|
+
# like "episode.txt"
|
5
|
+
#
|
6
|
+
require 'serienrenamer'
|
7
|
+
|
8
|
+
module Serienrenamer
|
9
|
+
module Plugin
|
10
|
+
|
11
|
+
class Textfile < Serienrenamer::Pluginbase
|
12
|
+
|
13
|
+
def self.plugin_name; "Textfile" end
|
14
|
+
def self.usable; true end
|
15
|
+
def self.priority; 100 end
|
16
|
+
|
17
|
+
# this method will be called from the main program
|
18
|
+
# with an Serienrenamer::Episode instance or a path
|
19
|
+
# to to a directory as parameter
|
20
|
+
#
|
21
|
+
# it returns an array of episode information
|
22
|
+
def self.generate_episode_information(episode)
|
23
|
+
|
24
|
+
sourcedir = ""
|
25
|
+
if episode.is_a?(Serienrenamer::Episode) && episode.source_directory
|
26
|
+
sourcedir = episode.source_directory
|
27
|
+
elsif episode.is_a?(String) && File.directory?(episode)
|
28
|
+
sourcedir = episode
|
29
|
+
end
|
30
|
+
|
31
|
+
matched_episodes = []
|
32
|
+
|
33
|
+
if sourcedir != "" && Dir.exists?(sourcedir)
|
34
|
+
|
35
|
+
# search for files that are smaller than 128 Bytes
|
36
|
+
# an check if they contain episode information
|
37
|
+
Dir.new(sourcedir).each do |e|
|
38
|
+
file = File.join(sourcedir, e)
|
39
|
+
next if File.size(file) > 128 || File.zero?(file)
|
40
|
+
|
41
|
+
data = File.open(file, "rb").read
|
42
|
+
|
43
|
+
# only files with one line with the title are interesting
|
44
|
+
next if data.lines.to_a.size > 1
|
45
|
+
|
46
|
+
if data != nil && data.match(/\w+/) &&
|
47
|
+
Serienrenamer::Episode.contains_episode_information?(data)
|
48
|
+
matched_episodes.push(data)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
return matched_episodes
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|