serienrenamer 0.0.14 → 0.0.15

Sign up to get free protection for your applications and to get access to all the features.
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: serienrenamer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.14
4
+ version: 0.0.15
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-01-04 00:00:00.000000000 Z
12
+ date: 2013-01-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: wlapi
@@ -122,16 +122,16 @@ files:
122
122
  - README.rdoc
123
123
  - Rakefile
124
124
  - bin/serienrenamer
125
- - lib/plugin.rb
126
- - lib/plugin/episode_identifier.rb
127
- - lib/plugin/serienjunkies_de.rb
128
- - lib/plugin/serienjunkies_feed.rb
129
- - lib/plugin/serienjunkies_org.rb
130
- - lib/plugin/textfile.rb
131
- - lib/plugin/wikipedia.rb
132
125
  - lib/serienrenamer.rb
133
126
  - lib/serienrenamer/episode.rb
134
127
  - lib/serienrenamer/information_store.rb
128
+ - lib/serienrenamer/plugin.rb
129
+ - lib/serienrenamer/plugin/episode_identifier.rb
130
+ - lib/serienrenamer/plugin/serienjunkies_de.rb
131
+ - lib/serienrenamer/plugin/serienjunkies_feed.rb
132
+ - lib/serienrenamer/plugin/serienjunkies_org.rb
133
+ - lib/serienrenamer/plugin/textfile.rb
134
+ - lib/serienrenamer/plugin/wikipedia.rb
135
135
  - lib/serienrenamer/version.rb
136
136
  - serienrenamer.gemspec
137
137
  - test/serienjunkies_feed_sample.xml
@@ -142,8 +142,8 @@ files:
142
142
  - test/test_plugin_episode_identifier.rb
143
143
  - test/test_plugin_serienjunkies_de.rb
144
144
  - test/test_plugin_serienjunkies_feed.rb
145
+ - test/test_plugin_serienjunkies_org.rb
145
146
  - test/test_plugin_textfile.rb
146
- - test/test_serienjunkies_org.rb
147
147
  homepage: http://github.com/pboehm/serienrenamer
148
148
  licenses: []
149
149
  post_install_message:
@@ -155,7 +155,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
155
155
  requirements:
156
156
  - - ! '>='
157
157
  - !ruby/object:Gem::Version
158
- version: '0'
158
+ version: 1.9.0
159
159
  required_rubygems_version: !ruby/object:Gem::Requirement
160
160
  none: false
161
161
  requirements:
@@ -178,6 +178,6 @@ test_files:
178
178
  - test/test_plugin_episode_identifier.rb
179
179
  - test/test_plugin_serienjunkies_de.rb
180
180
  - test/test_plugin_serienjunkies_feed.rb
181
+ - test/test_plugin_serienjunkies_org.rb
181
182
  - test/test_plugin_textfile.rb
182
- - test/test_serienjunkies_org.rb
183
183
  has_rdoc:
@@ -1,33 +0,0 @@
1
- # class that creates an episodename out of the episode identifier
2
- # for S02E04 the episodename would be "Episode 4"
3
-
4
- module Plugin
5
-
6
- class EpisodeIdentifier < Serienrenamer::Pluginbase
7
-
8
- def self.plugin_name; "EpisodeIdentifier" end
9
- def self.usable; true end
10
- def self.priority; 1 end
11
-
12
- # this method will be called from the main program
13
- # with an Serienrenamer::Episode instance or a path
14
- # to to a directory as parameter
15
- #
16
- # it returns an array of episode information
17
- def self.generate_episode_information(episode)
18
-
19
- path = episode.episodepath
20
-
21
- matched_episodes = []
22
-
23
- if Serienrenamer::Episode.contains_episode_information?(path)
24
- if md = Serienrenamer::Episode.extract_episode_information(path)
25
- episodename = "Episode %d" % [ md[:episode].to_i ]
26
- matched_episodes << episodename
27
- end
28
- end
29
-
30
- return matched_episodes
31
- end
32
- end
33
- end
@@ -1,129 +0,0 @@
1
- #
2
- # Class that extracts information about episodes
3
- # from the serienjunkies.de-Page
4
- #
5
- require 'uri'
6
- require 'mechanize'
7
-
8
- module Plugin
9
-
10
- class SerienjunkiesDe < Serienrenamer::Pluginbase
11
-
12
- def self.plugin_name; "SerienjunkiesDe" end
13
- def self.plugin_url; "http://serienjunkies.de" end
14
- def self.usable; true end
15
- def self.priority; 50 end
16
-
17
- # this method will be called from the main program
18
- # with an Serienrenamer::Episode instance as parameter
19
- #
20
- # if this is the first call to this method, it builds up
21
- # a hash with all series and existing episodes, which can
22
- # be used by all future method calls
23
- #
24
- def self.generate_episode_information(episode)
25
-
26
- raise ArgumentError, "Serienrenamer::Episode instance needed" unless
27
- episode.is_a?(Serienrenamer::Episode)
28
-
29
- unless defined? @cached_data
30
- @cached_data = Hash.new
31
- end
32
-
33
- if ! @cached_data.has_key?(episode.series)
34
-
35
- if episode.series.match(/\w+/)
36
-
37
- # determine link to series
38
- seriespage_link = self.find_link_to_series_page(episode.series)
39
-
40
- if seriespage_link
41
- seriesdata = self.parse_seriespage(seriespage_link)
42
-
43
- @cached_data[episode.series] = seriesdata
44
- end
45
- end
46
- end
47
-
48
- matched_episodes = []
49
-
50
- # tries to find an episodename in cached_data
51
- # otherwise returns empty array
52
- begin
53
- series = @cached_data[episode.series]
54
- identifier = "S%.2dE%.2d" % [ episode.season, episode.episode ]
55
- episodename = series[identifier]
56
-
57
- if episodename.match(/\w+/)
58
- matched_episodes.push(episodename)
59
- end
60
- rescue
61
- end
62
-
63
- return matched_episodes
64
- end
65
-
66
- # tries to find the link to the series page because there are
67
- # plenty of different writings of some series
68
- # :seriesname: - name of the series
69
- #
70
- # TODO make this more intelligent so that it tries other forms
71
- # of the name
72
- #
73
- # returns a link to a seriejunkies.de-page or nil if no page was found
74
- def self.find_link_to_series_page(seriesname)
75
- raise ArgumentError, "seriesname expected" unless seriesname.match(/\w+/)
76
-
77
- self.build_agent unless defined? @agent
78
-
79
- url = URI.join(plugin_url, "serien/%s.html" % seriesname[0].downcase )
80
-
81
- @agent.get(url).search("a.slink").each do |series|
82
- if series.text.match(/#{seriesname}/i)
83
- return URI.join( plugin_url, series[:href]).to_s
84
- end
85
- end
86
-
87
- return nil
88
- end
89
-
90
- # parses the supplied url and returns a hash with
91
- # episode information indexed by episode identifier
92
- # :page_url: - url of the serienjunkies page
93
- # :german: - extract only german titles if true
94
- def self.parse_seriespage(page_url, german=true)
95
-
96
- self.build_agent unless defined? @agent
97
-
98
- series = {}
99
-
100
- seriesdoc = @agent.get(page_url)
101
- epidoc = @agent.click(seriesdoc.link_with(:text => /^Episoden$/i))
102
-
103
- epidoc.search('div#sjserie > div.topabstand > table.eplist tr').each do |episode|
104
-
105
- next unless episode.search("td.thh").empty? # skip headings
106
-
107
- firstchild = episode.search(":first-child")[0].text
108
- md = firstchild.match(/(?<season>\d+)x(?<episode>\d+)/)
109
-
110
- next unless md
111
-
112
- # extract and save these information
113
- identifier = "S%.2dE%.2d" % [ md[:season].to_i, md[:episode].to_i ]
114
-
115
- german = episode.search("a")[1]
116
- next unless german
117
-
118
- series[identifier] = german.text.strip
119
- end
120
-
121
- return series
122
- end
123
-
124
- # build up a mechanize instance
125
- def self.build_agent
126
- @agent = Mechanize.new
127
- end
128
- end
129
- end
@@ -1,110 +0,0 @@
1
- #
2
- # Class that extracts information about episodes
3
- # from the serienjunkies.org-Feed
4
- #
5
- require 'rss'
6
- require 'open-uri'
7
-
8
- module Plugin
9
-
10
- class SerienjunkiesOrgFeed < Serienrenamer::Pluginbase
11
-
12
- def self.plugin_name; "SerienjunkiesOrgFeed" end
13
- def self.usable; true end
14
- def self.priority; 80 end
15
-
16
- @feed_url = 'http://serienjunkies.org/xml/feeds/episoden.xml'
17
-
18
- # this method will be called from the main program
19
- # with an Serienrenamer::Episode instance as parameter
20
- #
21
- # if this is the first call to this method, it builds up
22
- # a hash with all series and existing episodes, which can
23
- # be used by all future method calls
24
- #
25
- def self.generate_episode_information(episode, debug=false)
26
-
27
- raise ArgumentError, "Serienrenamer::Episode instance needed" unless
28
- episode.is_a?(Serienrenamer::Episode)
29
-
30
- unless defined? @feed_data
31
- @feed_data = self.build_up_series_data
32
- end
33
-
34
- episode_definition = 'S%.2dE%.2d' % [ episode.season, episode.episode ]
35
-
36
- # search for all items that match the definition
37
- # and save them uniquely in an array
38
- matched_definitions = []
39
- for epi in @feed_data.grep(/#{episode_definition}/)
40
- serdef = epi.match(/(^.*S\d+E\d+)/)[0]
41
- exist = matched_definitions.grep(/^#{serdef}/)[0]
42
-
43
- if exist != nil && epi.length > exist.length
44
- matched_definitions.delete(exist)
45
- elsif exist != nil && epi.length < exist.length
46
- next
47
- end
48
-
49
- matched_definitions.push(epi)
50
- end
51
-
52
- # find suitable episode string in the array of
53
- # matched definitions
54
- #
55
- # start with a pattern that includes all words from
56
- # Episode#series and if this does not match, it cuts
57
- # off the first word and tries to match again
58
- #
59
- # if the pattern contains one word and if this
60
- # still not match, the last word is splitted
61
- # characterwise, so that:
62
- # crmi ==> Criminal Minds
63
- #
64
- matched_episodes = []
65
- name_words = episode.series.split(/ /)
66
- word_splitted = false
67
-
68
- while ! name_words.empty?
69
- p name_words if debug
70
-
71
- pattern = name_words.join('.*')
72
- matched_episodes = matched_definitions.grep(/#{pattern}.*S\d+E\d+/i)
73
- break if ! matched_episodes.empty?
74
-
75
- # split characterwise if last word does not match
76
- if name_words.length == 1 && ! word_splitted
77
- name_words = pattern.split(//)
78
- word_splitted = true
79
- next
80
- end
81
-
82
- # if last word was splitted and does not match than break
83
- # and return empty resultset
84
- break if word_splitted
85
-
86
- name_words.delete_at(0)
87
- end
88
-
89
- return matched_episodes
90
- end
91
-
92
- # create a list of exisiting episodes
93
- def self.build_up_series_data
94
- feed_data = []
95
-
96
- open(@feed_url) do |rss|
97
- feed = RSS::Parser.parse(rss)
98
- feed.items.each do |item|
99
- feed_data.push(item.title.split(/ /)[1])
100
- end
101
- end
102
- return feed_data
103
- end
104
-
105
- # set the feed url (e.g for testing)
106
- def self.feed_url=(feed)
107
- @feed_url = File.absolute_path(feed)
108
- end
109
- end
110
- end
@@ -1,179 +0,0 @@
1
- #
2
- # Class that extracts information about episodes
3
- # from the serienjunkies.org-Page
4
- #
5
- require 'uri'
6
- require 'mechanize'
7
- require 'yaml'
8
-
9
- module Plugin
10
-
11
- class SerienjunkiesOrg < Serienrenamer::Pluginbase
12
-
13
- def self.plugin_name; "SerienjunkiesOrg" end
14
- def self.plugin_url; "http://serienjunkies.org" end
15
- def self.usable; true end
16
- def self.priority; 60 end
17
-
18
- # Public: tries to search for an appropriate episodename
19
- #
20
- # if this is the first call to this method, it builds up
21
- # a hash with all series and existing episodes, which can
22
- # be used by all future method calls
23
- #
24
- # episode - Serienrenamer::Episode instance which holds the information
25
- #
26
- # Returns an array of possible episodenames
27
- def self.generate_episode_information(episode)
28
-
29
- raise ArgumentError, "Serienrenamer::Episode instance needed" unless
30
- episode.is_a?(Serienrenamer::Episode)
31
-
32
- unless defined? @cached_data
33
- @cached_data = Hash.new
34
- end
35
-
36
- if ! @cached_data.has_key?(episode.series)
37
-
38
- if episode.series.match(/\w+/)
39
-
40
- # determine link to series
41
- seriespage_link = self.find_link_to_series_page(episode.series)
42
-
43
- if seriespage_link
44
- seriesdata = self.parse_seriespage(seriespage_link)
45
-
46
- @cached_data[episode.series] = seriesdata
47
- end
48
- end
49
- end
50
-
51
- matched_episodes = []
52
-
53
- # tries to find an episodename in cached_data
54
- # otherwise returns empty array
55
- begin
56
- series = @cached_data[episode.series]
57
-
58
- identifier = "%d_%d" % [ episode.season, episode.episode ]
59
- episodename = series[identifier]
60
-
61
- if episodename.match(/\w+/)
62
- matched_episodes.push(episodename)
63
- end
64
- rescue
65
- end
66
-
67
- return matched_episodes
68
- end
69
-
70
- # Public: tries to find a link to the seriespage
71
- #
72
- # seriesname - the series name for which the page is searched
73
- #
74
- # Returns the link or nil
75
- def self.find_link_to_series_page(seriesname)
76
- raise ArgumentError, "seriesname expected" unless seriesname.match(/\w+/)
77
-
78
- self.build_agent unless defined? @agent
79
-
80
- url = URI.join(plugin_url, "?cat=0&l=%s" % seriesname[0].downcase )
81
-
82
- pattern = seriesname.gsub(/\s/, ".*")
83
-
84
- @agent.get(url).search("div#sidebar > ul > li > a").each do |series|
85
- if series.text.match(/#{pattern}/i)
86
- return URI.join( plugin_url, series[:href]).to_s
87
- end
88
- end
89
-
90
- nil
91
- end
92
-
93
- # Public: parses a series page and extracts the episode information
94
- #
95
- # page_url - the url to the seriespage
96
- # german - if true it extracts only german data (Defaults to true)
97
- #
98
- # Returns a hash which contains the episode information or an empty
99
- # hash if there aren't any episodes
100
- def self.parse_seriespage(page_url, german=true, debug=false)
101
-
102
- self.build_agent unless defined? @agent
103
-
104
- series = {}
105
- doc = @agent.get(page_url)
106
-
107
- doc.search('div#sidebar > div#scb > div.bkname > a').each do |link|
108
- if german
109
- next unless link.content.match(/Staffel/i)
110
- else
111
- next unless link.content.match(/Season/i)
112
- end
113
-
114
- site = @agent.get(link[:href])
115
- episodes = self.parse_season_subpage(site, german)
116
-
117
- series.merge!(episodes)
118
- end
119
-
120
- puts series.to_yaml if debug
121
-
122
- return series
123
- end
124
-
125
- # Public: extracts the episodes from one season
126
- #
127
- # page - Mechanize page object which holds the season
128
- # german - extracts german or international episodes
129
- #
130
- # Returns a hash with all episodes (unique)
131
- def self.parse_season_subpage(page, german=true)
132
-
133
- episodes = {}
134
-
135
- page.search('div.post > div.post-content strong:nth-child(1)').each do |e|
136
-
137
- content = e.content
138
- md = Serienrenamer::Episode.extract_episode_information(content)
139
- next unless md
140
-
141
- if german
142
- next unless content.match(/German/i)
143
- next if content.match(/Subbed/i)
144
- else
145
- next if content.match(/German/i)
146
- end
147
-
148
- episodename =
149
- Serienrenamer::Episode.clean_episode_data(md[:episodename], true)
150
- next unless episodename && episodename.match(/\w+/)
151
-
152
- id = "%d_%d" % [ md[:season].to_i, md[:episode].to_i ]
153
-
154
- next if episodes[id] && episodes[id].size > episodename.size
155
-
156
- episodes[id] = episodename
157
-
158
- end
159
-
160
- return episodes
161
- end
162
-
163
- private
164
-
165
- # Private: constructs a Mechanize instance and adds a fix that interprets
166
- # every response as html
167
- #
168
- # Returns the agent
169
- def self.build_agent
170
- @agent = Mechanize.new do |a|
171
- a.post_connect_hooks << lambda do |_,_,response,_|
172
- if response.content_type.nil? || response.content_type.empty?
173
- response.content_type = 'text/html'
174
- end
175
- end
176
- end
177
- end
178
- end
179
- end
@@ -1,54 +0,0 @@
1
- #
2
- # Class that searches for a file with
3
- # episode information in the directory
4
- # like "episode.txt"
5
- #
6
-
7
- module Plugin
8
-
9
- class Textfile < Serienrenamer::Pluginbase
10
-
11
- def self.plugin_name; "Textfile" end
12
- def self.usable; true end
13
- def self.priority; 100 end
14
-
15
- # this method will be called from the main program
16
- # with an Serienrenamer::Episode instance or a path
17
- # to to a directory as parameter
18
- #
19
- # it returns an array of episode information
20
- def self.generate_episode_information(episode)
21
-
22
- sourcedir = ""
23
- if episode.is_a?(Serienrenamer::Episode) && episode.source_directory
24
- sourcedir = episode.source_directory
25
- elsif episode.is_a?(String) && File.directory?(episode)
26
- sourcedir = episode
27
- end
28
-
29
- matched_episodes = []
30
-
31
- if sourcedir != "" && Dir.exists?(sourcedir)
32
-
33
- # search for files that are smaller than 128 Bytes
34
- # an check if they contain episode information
35
- Dir.new(sourcedir).each do |e|
36
- file = File.join(sourcedir, e)
37
- next if File.size(file) > 128 || File.zero?(file)
38
-
39
- data = File.open(file, "rb").read
40
-
41
- # only files with one line with the title are interesting
42
- next if data.lines.to_a.size > 1
43
-
44
- if data != nil && data.match(/\w+/) &&
45
- Serienrenamer::Episode.contains_episode_information?(data)
46
- matched_episodes.push(data)
47
- end
48
- end
49
- end
50
-
51
- return matched_episodes
52
- end
53
- end
54
- end