serienrenamer 0.0.14 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: serienrenamer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.14
4
+ version: 0.0.15
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-01-04 00:00:00.000000000 Z
12
+ date: 2013-01-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: wlapi
@@ -122,16 +122,16 @@ files:
122
122
  - README.rdoc
123
123
  - Rakefile
124
124
  - bin/serienrenamer
125
- - lib/plugin.rb
126
- - lib/plugin/episode_identifier.rb
127
- - lib/plugin/serienjunkies_de.rb
128
- - lib/plugin/serienjunkies_feed.rb
129
- - lib/plugin/serienjunkies_org.rb
130
- - lib/plugin/textfile.rb
131
- - lib/plugin/wikipedia.rb
132
125
  - lib/serienrenamer.rb
133
126
  - lib/serienrenamer/episode.rb
134
127
  - lib/serienrenamer/information_store.rb
128
+ - lib/serienrenamer/plugin.rb
129
+ - lib/serienrenamer/plugin/episode_identifier.rb
130
+ - lib/serienrenamer/plugin/serienjunkies_de.rb
131
+ - lib/serienrenamer/plugin/serienjunkies_feed.rb
132
+ - lib/serienrenamer/plugin/serienjunkies_org.rb
133
+ - lib/serienrenamer/plugin/textfile.rb
134
+ - lib/serienrenamer/plugin/wikipedia.rb
135
135
  - lib/serienrenamer/version.rb
136
136
  - serienrenamer.gemspec
137
137
  - test/serienjunkies_feed_sample.xml
@@ -142,8 +142,8 @@ files:
142
142
  - test/test_plugin_episode_identifier.rb
143
143
  - test/test_plugin_serienjunkies_de.rb
144
144
  - test/test_plugin_serienjunkies_feed.rb
145
+ - test/test_plugin_serienjunkies_org.rb
145
146
  - test/test_plugin_textfile.rb
146
- - test/test_serienjunkies_org.rb
147
147
  homepage: http://github.com/pboehm/serienrenamer
148
148
  licenses: []
149
149
  post_install_message:
@@ -155,7 +155,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
155
155
  requirements:
156
156
  - - ! '>='
157
157
  - !ruby/object:Gem::Version
158
- version: '0'
158
+ version: 1.9.0
159
159
  required_rubygems_version: !ruby/object:Gem::Requirement
160
160
  none: false
161
161
  requirements:
@@ -178,6 +178,6 @@ test_files:
178
178
  - test/test_plugin_episode_identifier.rb
179
179
  - test/test_plugin_serienjunkies_de.rb
180
180
  - test/test_plugin_serienjunkies_feed.rb
181
+ - test/test_plugin_serienjunkies_org.rb
181
182
  - test/test_plugin_textfile.rb
182
- - test/test_serienjunkies_org.rb
183
183
  has_rdoc:
@@ -1,33 +0,0 @@
1
- # class that creates an episodename out of the episode identifier
2
- # for S02E04 the episodename would be "Episode 4"
3
-
4
- module Plugin
5
-
6
- class EpisodeIdentifier < Serienrenamer::Pluginbase
7
-
8
- def self.plugin_name; "EpisodeIdentifier" end
9
- def self.usable; true end
10
- def self.priority; 1 end
11
-
12
- # this method will be called from the main program
13
- # with an Serienrenamer::Episode instance or a path
14
- # to to a directory as parameter
15
- #
16
- # it returns an array of episode information
17
- def self.generate_episode_information(episode)
18
-
19
- path = episode.episodepath
20
-
21
- matched_episodes = []
22
-
23
- if Serienrenamer::Episode.contains_episode_information?(path)
24
- if md = Serienrenamer::Episode.extract_episode_information(path)
25
- episodename = "Episode %d" % [ md[:episode].to_i ]
26
- matched_episodes << episodename
27
- end
28
- end
29
-
30
- return matched_episodes
31
- end
32
- end
33
- end
@@ -1,129 +0,0 @@
1
- #
2
- # Class that extracts information about episodes
3
- # from the serienjunkies.de-Page
4
- #
5
- require 'uri'
6
- require 'mechanize'
7
-
8
- module Plugin
9
-
10
- class SerienjunkiesDe < Serienrenamer::Pluginbase
11
-
12
- def self.plugin_name; "SerienjunkiesDe" end
13
- def self.plugin_url; "http://serienjunkies.de" end
14
- def self.usable; true end
15
- def self.priority; 50 end
16
-
17
- # this method will be called from the main program
18
- # with an Serienrenamer::Episode instance as parameter
19
- #
20
- # if this is the first call to this method, it builds up
21
- # a hash with all series and existing episodes, which can
22
- # be used by all future method calls
23
- #
24
- def self.generate_episode_information(episode)
25
-
26
- raise ArgumentError, "Serienrenamer::Episode instance needed" unless
27
- episode.is_a?(Serienrenamer::Episode)
28
-
29
- unless defined? @cached_data
30
- @cached_data = Hash.new
31
- end
32
-
33
- if ! @cached_data.has_key?(episode.series)
34
-
35
- if episode.series.match(/\w+/)
36
-
37
- # determine link to series
38
- seriespage_link = self.find_link_to_series_page(episode.series)
39
-
40
- if seriespage_link
41
- seriesdata = self.parse_seriespage(seriespage_link)
42
-
43
- @cached_data[episode.series] = seriesdata
44
- end
45
- end
46
- end
47
-
48
- matched_episodes = []
49
-
50
- # tries to find an episodename in cached_data
51
- # otherwise returns empty array
52
- begin
53
- series = @cached_data[episode.series]
54
- identifier = "S%.2dE%.2d" % [ episode.season, episode.episode ]
55
- episodename = series[identifier]
56
-
57
- if episodename.match(/\w+/)
58
- matched_episodes.push(episodename)
59
- end
60
- rescue
61
- end
62
-
63
- return matched_episodes
64
- end
65
-
66
- # tries to find the link to the series page because there are
67
- # plenty of different writings of some series
68
- # :seriesname: - name of the series
69
- #
70
- # TODO make this more intelligent so that it tries other forms
71
- # of the name
72
- #
73
- # returns a link to a seriejunkies.de-page or nil if no page was found
74
- def self.find_link_to_series_page(seriesname)
75
- raise ArgumentError, "seriesname expected" unless seriesname.match(/\w+/)
76
-
77
- self.build_agent unless defined? @agent
78
-
79
- url = URI.join(plugin_url, "serien/%s.html" % seriesname[0].downcase )
80
-
81
- @agent.get(url).search("a.slink").each do |series|
82
- if series.text.match(/#{seriesname}/i)
83
- return URI.join( plugin_url, series[:href]).to_s
84
- end
85
- end
86
-
87
- return nil
88
- end
89
-
90
- # parses the supplied url and returns a hash with
91
- # episode information indexed by episode identifier
92
- # :page_url: - url of the serienjunkies page
93
- # :german: - extract only german titles if true
94
- def self.parse_seriespage(page_url, german=true)
95
-
96
- self.build_agent unless defined? @agent
97
-
98
- series = {}
99
-
100
- seriesdoc = @agent.get(page_url)
101
- epidoc = @agent.click(seriesdoc.link_with(:text => /^Episoden$/i))
102
-
103
- epidoc.search('div#sjserie > div.topabstand > table.eplist tr').each do |episode|
104
-
105
- next unless episode.search("td.thh").empty? # skip headings
106
-
107
- firstchild = episode.search(":first-child")[0].text
108
- md = firstchild.match(/(?<season>\d+)x(?<episode>\d+)/)
109
-
110
- next unless md
111
-
112
- # extract and save these information
113
- identifier = "S%.2dE%.2d" % [ md[:season].to_i, md[:episode].to_i ]
114
-
115
- german = episode.search("a")[1]
116
- next unless german
117
-
118
- series[identifier] = german.text.strip
119
- end
120
-
121
- return series
122
- end
123
-
124
- # build up a mechanize instance
125
- def self.build_agent
126
- @agent = Mechanize.new
127
- end
128
- end
129
- end
@@ -1,110 +0,0 @@
1
- #
2
- # Class that extracts information about episodes
3
- # from the serienjunkies.org-Feed
4
- #
5
- require 'rss'
6
- require 'open-uri'
7
-
8
- module Plugin
9
-
10
- class SerienjunkiesOrgFeed < Serienrenamer::Pluginbase
11
-
12
- def self.plugin_name; "SerienjunkiesOrgFeed" end
13
- def self.usable; true end
14
- def self.priority; 80 end
15
-
16
- @feed_url = 'http://serienjunkies.org/xml/feeds/episoden.xml'
17
-
18
- # this method will be called from the main program
19
- # with an Serienrenamer::Episode instance as parameter
20
- #
21
- # if this is the first call to this method, it builds up
22
- # a hash with all series and existing episodes, which can
23
- # be used by all future method calls
24
- #
25
- def self.generate_episode_information(episode, debug=false)
26
-
27
- raise ArgumentError, "Serienrenamer::Episode instance needed" unless
28
- episode.is_a?(Serienrenamer::Episode)
29
-
30
- unless defined? @feed_data
31
- @feed_data = self.build_up_series_data
32
- end
33
-
34
- episode_definition = 'S%.2dE%.2d' % [ episode.season, episode.episode ]
35
-
36
- # search for all items that match the definition
37
- # and save them uniquely in an array
38
- matched_definitions = []
39
- for epi in @feed_data.grep(/#{episode_definition}/)
40
- serdef = epi.match(/(^.*S\d+E\d+)/)[0]
41
- exist = matched_definitions.grep(/^#{serdef}/)[0]
42
-
43
- if exist != nil && epi.length > exist.length
44
- matched_definitions.delete(exist)
45
- elsif exist != nil && epi.length < exist.length
46
- next
47
- end
48
-
49
- matched_definitions.push(epi)
50
- end
51
-
52
- # find suitable episode string in the array of
53
- # matched definitions
54
- #
55
- # start with a pattern that includes all words from
56
- # Episode#series and if this does not match, it cuts
57
- # off the first word and tries to match again
58
- #
59
- # if the pattern contains one word and if this
60
- # still not match, the last word is splitted
61
- # characterwise, so that:
62
- # crmi ==> Criminal Minds
63
- #
64
- matched_episodes = []
65
- name_words = episode.series.split(/ /)
66
- word_splitted = false
67
-
68
- while ! name_words.empty?
69
- p name_words if debug
70
-
71
- pattern = name_words.join('.*')
72
- matched_episodes = matched_definitions.grep(/#{pattern}.*S\d+E\d+/i)
73
- break if ! matched_episodes.empty?
74
-
75
- # split characterwise if last word does not match
76
- if name_words.length == 1 && ! word_splitted
77
- name_words = pattern.split(//)
78
- word_splitted = true
79
- next
80
- end
81
-
82
- # if last word was splitted and does not match than break
83
- # and return empty resultset
84
- break if word_splitted
85
-
86
- name_words.delete_at(0)
87
- end
88
-
89
- return matched_episodes
90
- end
91
-
92
- # create a list of exisiting episodes
93
- def self.build_up_series_data
94
- feed_data = []
95
-
96
- open(@feed_url) do |rss|
97
- feed = RSS::Parser.parse(rss)
98
- feed.items.each do |item|
99
- feed_data.push(item.title.split(/ /)[1])
100
- end
101
- end
102
- return feed_data
103
- end
104
-
105
- # set the feed url (e.g for testing)
106
- def self.feed_url=(feed)
107
- @feed_url = File.absolute_path(feed)
108
- end
109
- end
110
- end
@@ -1,179 +0,0 @@
1
- #
2
- # Class that extracts information about episodes
3
- # from the serienjunkies.org-Page
4
- #
5
- require 'uri'
6
- require 'mechanize'
7
- require 'yaml'
8
-
9
- module Plugin
10
-
11
- class SerienjunkiesOrg < Serienrenamer::Pluginbase
12
-
13
- def self.plugin_name; "SerienjunkiesOrg" end
14
- def self.plugin_url; "http://serienjunkies.org" end
15
- def self.usable; true end
16
- def self.priority; 60 end
17
-
18
- # Public: tries to search for an appropriate episodename
19
- #
20
- # if this is the first call to this method, it builds up
21
- # a hash with all series and existing episodes, which can
22
- # be used by all future method calls
23
- #
24
- # episode - Serienrenamer::Episode instance which holds the information
25
- #
26
- # Returns an array of possible episodenames
27
- def self.generate_episode_information(episode)
28
-
29
- raise ArgumentError, "Serienrenamer::Episode instance needed" unless
30
- episode.is_a?(Serienrenamer::Episode)
31
-
32
- unless defined? @cached_data
33
- @cached_data = Hash.new
34
- end
35
-
36
- if ! @cached_data.has_key?(episode.series)
37
-
38
- if episode.series.match(/\w+/)
39
-
40
- # determine link to series
41
- seriespage_link = self.find_link_to_series_page(episode.series)
42
-
43
- if seriespage_link
44
- seriesdata = self.parse_seriespage(seriespage_link)
45
-
46
- @cached_data[episode.series] = seriesdata
47
- end
48
- end
49
- end
50
-
51
- matched_episodes = []
52
-
53
- # tries to find an episodename in cached_data
54
- # otherwise returns empty array
55
- begin
56
- series = @cached_data[episode.series]
57
-
58
- identifier = "%d_%d" % [ episode.season, episode.episode ]
59
- episodename = series[identifier]
60
-
61
- if episodename.match(/\w+/)
62
- matched_episodes.push(episodename)
63
- end
64
- rescue
65
- end
66
-
67
- return matched_episodes
68
- end
69
-
70
- # Public: tries to find a link to the seriespage
71
- #
72
- # seriesname - the series name for which the page is searched
73
- #
74
- # Returns the link or nil
75
- def self.find_link_to_series_page(seriesname)
76
- raise ArgumentError, "seriesname expected" unless seriesname.match(/\w+/)
77
-
78
- self.build_agent unless defined? @agent
79
-
80
- url = URI.join(plugin_url, "?cat=0&l=%s" % seriesname[0].downcase )
81
-
82
- pattern = seriesname.gsub(/\s/, ".*")
83
-
84
- @agent.get(url).search("div#sidebar > ul > li > a").each do |series|
85
- if series.text.match(/#{pattern}/i)
86
- return URI.join( plugin_url, series[:href]).to_s
87
- end
88
- end
89
-
90
- nil
91
- end
92
-
93
- # Public: parses a series page and extracts the episode information
94
- #
95
- # page_url - the url to the seriespage
96
- # german - if true it extracts only german data (Defaults to true)
97
- #
98
- # Returns a hash which contains the episode information or an empty
99
- # hash if there aren't any episodes
100
- def self.parse_seriespage(page_url, german=true, debug=false)
101
-
102
- self.build_agent unless defined? @agent
103
-
104
- series = {}
105
- doc = @agent.get(page_url)
106
-
107
- doc.search('div#sidebar > div#scb > div.bkname > a').each do |link|
108
- if german
109
- next unless link.content.match(/Staffel/i)
110
- else
111
- next unless link.content.match(/Season/i)
112
- end
113
-
114
- site = @agent.get(link[:href])
115
- episodes = self.parse_season_subpage(site, german)
116
-
117
- series.merge!(episodes)
118
- end
119
-
120
- puts series.to_yaml if debug
121
-
122
- return series
123
- end
124
-
125
- # Public: extracts the episodes from one season
126
- #
127
- # page - Mechanize page object which holds the season
128
- # german - extracts german or international episodes
129
- #
130
- # Returns a hash with all episodes (unique)
131
- def self.parse_season_subpage(page, german=true)
132
-
133
- episodes = {}
134
-
135
- page.search('div.post > div.post-content strong:nth-child(1)').each do |e|
136
-
137
- content = e.content
138
- md = Serienrenamer::Episode.extract_episode_information(content)
139
- next unless md
140
-
141
- if german
142
- next unless content.match(/German/i)
143
- next if content.match(/Subbed/i)
144
- else
145
- next if content.match(/German/i)
146
- end
147
-
148
- episodename =
149
- Serienrenamer::Episode.clean_episode_data(md[:episodename], true)
150
- next unless episodename && episodename.match(/\w+/)
151
-
152
- id = "%d_%d" % [ md[:season].to_i, md[:episode].to_i ]
153
-
154
- next if episodes[id] && episodes[id].size > episodename.size
155
-
156
- episodes[id] = episodename
157
-
158
- end
159
-
160
- return episodes
161
- end
162
-
163
- private
164
-
165
- # Private: constructs a Mechanize instance and adds a fix that interprets
166
- # every response as html
167
- #
168
- # Returns the agent
169
- def self.build_agent
170
- @agent = Mechanize.new do |a|
171
- a.post_connect_hooks << lambda do |_,_,response,_|
172
- if response.content_type.nil? || response.content_type.empty?
173
- response.content_type = 'text/html'
174
- end
175
- end
176
- end
177
- end
178
- end
179
- end
@@ -1,54 +0,0 @@
1
- #
2
- # Class that searches for a file with
3
- # episode information in the directory
4
- # like "episode.txt"
5
- #
6
-
7
- module Plugin
8
-
9
- class Textfile < Serienrenamer::Pluginbase
10
-
11
- def self.plugin_name; "Textfile" end
12
- def self.usable; true end
13
- def self.priority; 100 end
14
-
15
- # this method will be called from the main program
16
- # with an Serienrenamer::Episode instance or a path
17
- # to to a directory as parameter
18
- #
19
- # it returns an array of episode information
20
- def self.generate_episode_information(episode)
21
-
22
- sourcedir = ""
23
- if episode.is_a?(Serienrenamer::Episode) && episode.source_directory
24
- sourcedir = episode.source_directory
25
- elsif episode.is_a?(String) && File.directory?(episode)
26
- sourcedir = episode
27
- end
28
-
29
- matched_episodes = []
30
-
31
- if sourcedir != "" && Dir.exists?(sourcedir)
32
-
33
- # search for files that are smaller than 128 Bytes
34
- # an check if they contain episode information
35
- Dir.new(sourcedir).each do |e|
36
- file = File.join(sourcedir, e)
37
- next if File.size(file) > 128 || File.zero?(file)
38
-
39
- data = File.open(file, "rb").read
40
-
41
- # only files with one line with the title are interesting
42
- next if data.lines.to_a.size > 1
43
-
44
- if data != nil && data.match(/\w+/) &&
45
- Serienrenamer::Episode.contains_episode_information?(data)
46
- matched_episodes.push(data)
47
- end
48
- end
49
- end
50
-
51
- return matched_episodes
52
- end
53
- end
54
- end