serienrenamer 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,362 @@
1
+ # encoding: UTF-8
2
+ require 'media_wiki'
3
+
4
+ module Plugin
5
+
6
+ # This Plugin tries to extract the series
7
+ # information from wikipedia
8
+ #
9
+ # (by now only the german wikipedia)
10
+ class Wikipedia < Serienrenamer::Pluginbase
11
+
12
+ def self.plugin_name; "Wikipedia" end
13
+ def self.usable; true end
14
+ def self.priority; 5 end
15
+
16
+ @@WIKIPEDIA_URL = 'http://de.wikipedia.org/w/api.php'
17
+
18
+ # patterns used in this class
19
+ @@EPISODE_TABLE_PATTERN = /.*(?<table>\{\{Episodenlistentabelle.*\}\})\s*$/m
20
+ @@EPISODE_ENTRY_PATTERN = /\{\{Episodenlisteneintrag|S-Episode/
21
+ @@SERIES_SITE_TEST_PATTERN = /\{\{Infobox.Fernsehsendung.*\}\}/m
22
+ @@DISAMBIGUATION_TEST_PATTERN = /\{\{Begriffsklärung\}\}/m
23
+ @@CONTAINS_LINK_TO_EPISODE_LIST = /Hauptartikel.*(?<main>Liste.*?)[\]\}]+/
24
+ @@CONTAINS_INARTICLE_EPISODE_LIST = /\<div.*\>Staffel.(\d+).*\<\/div\>.*class=\"wikitable\".*titel/m
25
+ @@INPAGE_SEASON_SEPARATOR = /\<div.style=\"clear:both\;.class=\"NavFrame\"\>/
26
+ @@WIKITABLE_EXTRACT_PATTERN = /(\{\|.class=\"wikitable\".*\|\})\n/m
27
+
28
+ # this method will be called from the main program
29
+ # with an Serienrenamer::Episode instance as parameter
30
+ #
31
+ # it returns an array of episode information
32
+ def self.generate_episode_information(episode)
33
+
34
+ raise ArgumentError, "Serienrenamer::Episode instance needed" unless
35
+ episode.is_a?(Serienrenamer::Episode)
36
+
37
+ return [] unless episode.series.match(/\w+/)
38
+
39
+ unless defined? @cached_data
40
+ @cached_data = Hash.new
41
+ end
42
+
43
+ wiki = MediaWiki::Gateway.new(@@WIKIPEDIA_URL)
44
+
45
+ if ! @cached_data.has_key?(episode.series)
46
+ # search for a series site in wikipedia
47
+ series_site = nil
48
+ tries = 3
49
+ search_pattern = episode.series
50
+ search_pattern_modified = false
51
+
52
+ begin
53
+ wiki.search(search_pattern, nil, 50).each do |title|
54
+ pagedata = wiki.get(title)
55
+ if is_series_main_page?(pagedata)
56
+ series_site = title
57
+ break
58
+ end
59
+ end
60
+
61
+ # modify the search term pattern so that it contains
62
+ # only the last word if the search_pattern contains
63
+ # more than one words
64
+ if series_site.nil? && ! search_pattern_modified
65
+ search_pattern = search_pattern.match(/(\w+)\s*$/)[1]
66
+ search_pattern_modified = true
67
+ raise EOFError if search_pattern # break out and retry
68
+ end
69
+ rescue MediaWiki::APIError => e
70
+ tries -= 1
71
+ retry if tries > 0
72
+ rescue EOFError => e
73
+ retry
74
+ end
75
+
76
+ return [] unless series_site
77
+
78
+ # look for a link to a list of episodes
79
+ pagedata = wiki.get(series_site)
80
+
81
+ if contains_link_to_episode_list?(pagedata)
82
+ mainarticle = pagedata.match(@@CONTAINS_LINK_TO_EPISODE_LIST)[:main]
83
+ if mainarticle
84
+ episodelist_page = wiki.get(mainarticle)
85
+ series = parse_episodelist_page_data(episodelist_page)
86
+
87
+ @cached_data[episode.series] = series
88
+ end
89
+
90
+ elsif contains_inarticle_episode_list?(pagedata)
91
+ series = parse_inarticle_episodelist_page_data(pagedata)
92
+ @cached_data[episode.series] = series
93
+
94
+ else
95
+ warn "no episode list found"
96
+ return []
97
+ end
98
+ end
99
+
100
+ episode_names = []
101
+
102
+ # tries to find an episodename in cached_data
103
+ # otherwise returns empty array
104
+ begin
105
+ series = @cached_data[episode.series]
106
+ episodename = series[episode.season][episode.episode]
107
+ if episodename.match(/\w+/)
108
+ episode_names.push(episodename)
109
+ end
110
+ rescue
111
+ end
112
+
113
+ return episode_names
114
+ end
115
+
116
+ # This method will extract season based information
117
+ # from a string that contains a wikipedia episodelist page
118
+ #
119
+ # returns an Array of Arrays with episode information
120
+ # where episode and season numbers are the indizes
121
+ def self.parse_episodelist_page_data(pagedata, debug=false)
122
+ raise ArgumentError, 'String with pagedata expected' unless
123
+ pagedata.is_a?(String)
124
+
125
+ series_data = []
126
+ is_season_table_following = false
127
+ season_number = nil
128
+
129
+ # split the wikipedia page by headings and process
130
+ # the following paragraph if the heading starts with
131
+ # 'Staffel'
132
+ pagedata.split(/(==.*)==/).each do |paragraph|
133
+ if paragraph.match(/^==.*Staffel/)
134
+ match = paragraph.match(/^==.*Staffel.(?<seasonnr>\d+)/)
135
+ if match
136
+ season_number = match[:seasonnr].to_i
137
+ is_season_table_following = true
138
+ end
139
+ elsif is_season_table_following
140
+ #
141
+ # extract season table from this paragraph
142
+ season = parse_season_table(paragraph)
143
+
144
+ series_data[season_number] = season
145
+ is_season_table_following = false
146
+ end
147
+ end
148
+
149
+ return series_data
150
+ end
151
+
152
+ # this method will be called with a wikipedia seasontable
153
+ # as parameter and will extract all episodes from this
154
+ # and returns that as an array where the episode number is
155
+ # the index
156
+ def self.parse_season_table(table)
157
+ raise ArgumentError, 'String with seasontable expected' unless
158
+ table.is_a?(String)
159
+
160
+ season_data = []
161
+
162
+ matched_table = table.match(@@EPISODE_TABLE_PATTERN)
163
+ if matched_table
164
+
165
+ # extract all episode entries that
166
+ # looks like the following
167
+ #
168
+ # {{Episodenlisteneintrag
169
+ # | NR_GES = 107
170
+ # | NR_ST = 1
171
+ # | OT = The Mastodon in the Room
172
+ # | DT = Die Rückkehr der Scheuklappen
173
+ # | ZF =
174
+ # | EA = {{dts|23|09|2010}}
175
+ # | EAD = {{dts|08|09|2011}}
176
+ # }}
177
+
178
+ episodes = matched_table[:table].split(@@EPISODE_ENTRY_PATTERN)
179
+ if episodes
180
+ episodes.each do |epi|
181
+
182
+ # build up a hash from the entry
183
+ infos = {}
184
+ epi.lines.each do |part|
185
+ parts = part.strip.match(/(?<key>\w+).=.(?<value>.*)$/)
186
+ if parts
187
+ infos[parts[:key].strip] = parts[:value].strip
188
+ end
189
+ end
190
+
191
+ next unless infos.has_key?('NR_ST')
192
+
193
+ # extract useful information and
194
+ # add it to the array
195
+ epi_nr = infos['NR_ST'].to_i
196
+ next unless epi_nr
197
+
198
+ # TODO make the following variable
199
+ epi_name = infos['DT'].strip
200
+
201
+ # remove all html tags and all following
202
+ # text from the episode name and the bold
203
+ # syntax from mediawiki [[text]]
204
+ epi_name.gsub!(/<\/?[^>]*>.*/, "")
205
+ epi_name.gsub!(/[\[\[\]\]]/, "")
206
+ next unless epi_name.match(/\w+/)
207
+
208
+ season_data[epi_nr] = epi_name
209
+ end
210
+ end
211
+ end
212
+ return season_data
213
+ end
214
+
215
+ # This method will extract season based information
216
+ # from a string that contains a series page with an
217
+ # episodelist included
218
+ #
219
+ # returns an Array of Arrays with episode information
220
+ # where episode and season numbers are the indizes
221
+ def self.parse_inarticle_episodelist_page_data(pagedata, debug=false)
222
+ raise ArgumentError, 'String with pagedata expected' unless
223
+ pagedata.is_a?(String)
224
+
225
+ series_data = []
226
+
227
+ # look for a paragraph with an episodelist
228
+ episodelist_paragraph = pagedata.split(/==.*==/).select { |p|
229
+ contains_inarticle_episode_list?(p) }[0]
230
+
231
+ raise ArgumentError, 'no episodelist found' unless episodelist_paragraph
232
+
233
+ # iterate through all seasons in this episode table
234
+ episodelist_paragraph.split(@@INPAGE_SEASON_SEPARATOR).each do |season|
235
+ next unless contains_inarticle_episode_list?(season)
236
+
237
+ season_nr = season.match(@@CONTAINS_INARTICLE_EPISODE_LIST)[1].to_i
238
+
239
+ wikitable = season.match(@@WIKITABLE_EXTRACT_PATTERN)[1]
240
+ episodes = parse_inarticle_season_table(wikitable)
241
+
242
+ # HACK if a season is splitted into different parts
243
+ # eg. Flashpoint (2.1 and 2.2) than merge that if possible
244
+ if series_data[season_nr] != nil
245
+ series_data[season_nr].each_with_index do |item, index|
246
+ episodes[index] = item unless episodes[index]
247
+ end
248
+ end
249
+
250
+ series_data[season_nr] = episodes
251
+ end
252
+
253
+ return series_data
254
+ end
255
+
256
+ # this method will be called with a wikitable for a season
257
+ # as parameter and will extract all episodes from this
258
+ # and returns that as an array where the episode number is
259
+ # the index
260
+ #
261
+ # Example for an wikitable for episodes:
262
+ #
263
+ # {| class="wikitable" width="100%"
264
+ # |- vertical-align: top; text-align:center; "
265
+ # | width="15" | '''Nummer''' <br /><small>(Gesamt)<small>
266
+ # | width="15" | '''Nummer''' <br /><small>(Staffel)<small>
267
+ # ! width="250" | Originaltitel
268
+ # ! width="250" | Deutscher Titel
269
+ # ! width="180" | Erstausstrahlung<br /><small>(USA Network)</small>
270
+ # ! width="180" | Erstausstrahlung<br /><small>(RTL)</small>
271
+ # ! width="180" | Erstausstrahlung<br /><small>(SF zwei)</small>
272
+ # |-
273
+ # | bgcolor="#DFEEEF"| 01
274
+ # | 01
275
+ # | ''Pilot''
276
+ # | ''Auch Reiche sind nur Menschen''
277
+ # | 4. Mai 2009
278
+ # | 17. Mai 2011
279
+ # | 6. Juni 2011 (Teil 1)<br />13. Juni 2011 (Teil 2)
280
+ # |-
281
+ # |}
282
+ #
283
+ def self.parse_inarticle_season_table(table)
284
+ raise ArgumentError, 'String with seasontable expected' unless
285
+ table.is_a?(String)
286
+
287
+ season_data = []
288
+ episode_nr_line_nr = nil
289
+ episode_name_line_nr = nil
290
+
291
+ table.split(/^\|\-.*$/).each do |tablerow|
292
+ tablerow.strip!
293
+
294
+ # skip invalid rows
295
+ lines = tablerow.lines.to_a
296
+ next unless lines.length >= 4
297
+
298
+ if tablerow.match(/width=\"\d+\"/)
299
+ # extract line numbers for needed data that
300
+ # are in the table header
301
+ lines.each_with_index do |item, index|
302
+ if item.match(/Nummer.*Staffel/i)
303
+ episode_nr_line_nr = index
304
+
305
+ # TODO make the following more variable
306
+ elsif item.match(/Deutscher.*Titel/i)
307
+ episode_name_line_nr = index
308
+ end
309
+ end
310
+ else
311
+ # extract episode information
312
+ if episode_nr_line_nr && episode_name_line_nr
313
+
314
+ md_nr = lines[episode_nr_line_nr].strip.match(/(\d+)/)
315
+ if md_nr
316
+ episode_nr = md_nr[1].to_i
317
+
318
+ md_name = lines[episode_name_line_nr].strip.match(/^\|.(.*)$/)
319
+ if md_name
320
+ episode_name = md_name[1]
321
+ episode_name.gsub!(/[\'\"\[\]]/, "")
322
+ next unless episode_name.match(/\w+/)
323
+
324
+ season_data[episode_nr] = episode_name.strip
325
+ end
326
+ end
327
+ end
328
+ end
329
+ end
330
+
331
+ return season_data
332
+ end
333
+
334
+ # this method checks if the page is the main page
335
+ # for a series
336
+ #
337
+ # returns true if page contains the infobox that
338
+ # is typical for series pages in wikipedia
339
+ def self.is_series_main_page?(page)
340
+ page.match(@@SERIES_SITE_TEST_PATTERN) != nil
341
+ end
342
+
343
+ # check the site if it is a disambiguation site
344
+ #
345
+ # returns true if this site links to pages with
346
+ # themes with the same name
347
+ def self.is_disambiguation_site?(page)
348
+ page.match(@@DISAMBIGUATION_TEST_PATTERN) != nil
349
+ end
350
+
351
+ # test if the page contains a link to an article
352
+ # with an episode list
353
+ def self.contains_link_to_episode_list?(page)
354
+ page.match(@@CONTAINS_LINK_TO_EPISODE_LIST) != nil
355
+ end
356
+
357
+ # test if the page contains a episode list
358
+ def self.contains_inarticle_episode_list?(page)
359
+ page.match(@@CONTAINS_INARTICLE_EPISODE_LIST) != nil
360
+ end
361
+ end
362
+ end
data/lib/plugin.rb ADDED
@@ -0,0 +1,8 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless
2
+ $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
+
4
+
5
+ module Plugin
6
+
7
+ Dir[File.dirname(__FILE__) + '/plugin/*.rb'].each {|file| require file }
8
+ end
@@ -0,0 +1,313 @@
1
+ # coding: UTF-8
2
+ require 'find'
3
+ require 'fileutils'
4
+ require 'wlapi'
5
+
6
+ module Serienrenamer
7
+
8
+ class Episode
9
+
10
+ attr_reader :season, :episode, :episodename,
11
+ :extension, :episodepath, :success, :source_directory
12
+ attr_accessor :episodename_needed, :series
13
+
14
+ # patterns for suitable episodes
15
+ @@PATTERNS = [
16
+ # S01E01
17
+ /^(?<series>.*)S(?<season>\d+)E(?<episode>\d+)(?<episodename>.*)$/i,
18
+ # 101; 1212
19
+ /^(?<series>.*\D)(?<season>\d+)(?<episode>\d{2})(?<episodename>\W*.*)$/,
20
+ # 1x1; 12x12
21
+ /^(?<series>.*)(?<season>\d+)x(?<episode>\d+)(?<episodename>.*)$/,
22
+ ]
23
+
24
+ # allowed endings for episode files
25
+ @@ENDINGS = %w( mpg mpeg avi mkv wmv mp4 mov flv 3gp )
26
+
27
+ # trash words that are removed from the episodename
28
+ @@TRASH_WORDS = %w(
29
+ German Dubbed DVDRip HDTVRip XviD ITG TVR inspired HDRip
30
+ AMBiTiOUS RSG SiGHT SATRip WS TVS RiP READ GERMAN dTV aTV
31
+ iNTERNAL CRoW MSE c0nFuSed UTOPiA scum EXPiRED BDRiP HDTV
32
+ iTunesHD 720p x264 h264 CRiSP euHD WEBRiP ZZGtv ARCHiV DD20
33
+ Prim3time Nfo Repack SiMPTY BLURAYRiP BluRay DELiCiOUS Synced
34
+ UNDELiCiOUS fBi CiD iTunesHDRip RedSeven OiNK idTV DL DD51
35
+ )
36
+
37
+ # Constructor for the Episode-Class, which takes an episode as
38
+ # argument and extracts as much as information from the file
39
+ # that it can.
40
+ def initialize(episodepath, episodename_needed=true)
41
+
42
+ raise ArgumentError, 'no episodepath provided' unless episodepath
43
+
44
+ # make some checks on the given episode path
45
+ unless File.exists?(episodepath) || Dir.exists?(episodepath)
46
+ raise ArgumentError, "episodepath not existing"
47
+ end
48
+
49
+ unless Episode.determine_video_file(episodepath)
50
+ raise ArgumentError, 'no videofile found'
51
+ end
52
+
53
+ @source_directory = nil
54
+
55
+ # normalize information for dirs/files
56
+ basepath = File.basename(episodepath)
57
+
58
+ if File.file?(episodepath)
59
+ basepath = basepath.chomp(File.extname(basepath))
60
+ elsif File.directory?(episodepath)
61
+ @source_directory = episodepath
62
+
63
+ # if directory does not contain episode information
64
+ # check for an text file with suitable information
65
+ unless Episode.contains_episode_information?(basepath)
66
+ info = Plugin::Textfile.generate_episode_information(episodepath)[0]
67
+ basepath = info if info
68
+ end
69
+ end
70
+
71
+ unless Episode.contains_episode_information?(basepath)
72
+ raise ArgumentError, 'no episode information existing'
73
+ end
74
+
75
+ @episodepath = Episode.determine_video_file(episodepath)
76
+
77
+ infos = Episode.extract_episode_information(basepath)
78
+ raise ArgumentError, 'no suitable regex pattern matches' unless infos
79
+
80
+ @series = Episode.clean_episode_data(infos[:series]).strip
81
+ @episodename = Episode.clean_episode_data(
82
+ infos[:episodename], true, true).strip
83
+ @season = infos[:season].to_i
84
+ @episode = infos[:episode].to_i
85
+
86
+ # setting up special behaviour
87
+ @episodename_needed=episodename_needed
88
+ @extension=File.extname(@episodepath).gsub('.','')
89
+ @success=false
90
+ end
91
+
92
+ # Returns the episode information into a format like
93
+ # S0xE0x, depending on @episodename_needed it includes
94
+ # the episodename
95
+ def to_s
96
+ if @episodename_needed
97
+ return "S%.2dE%.2d - %s.%s" % [ @season, @episode, @episodename, @extension ]
98
+ else
99
+ return "S%.2dE%.2d.%s" % [ @season, @episode, @extension ]
100
+ end
101
+ end
102
+
103
+ # this method makes it possible to set the episodename
104
+ # afterwards
105
+ #
106
+ # options:
107
+ # :data
108
+ # string that contains epissodename information
109
+ # :need_cleanup
110
+ # if true than it will apply the standard regex
111
+ # to clean the string and extracts that with
112
+ # the standard patterns
113
+ # if false the string will applied without any
114
+ # checks or cleanup
115
+ # :extract_seriesname
116
+ # tries to extract the seriesname from data
117
+ def add_episode_information(data, need_cleanup=true, extract_seriesname=false)
118
+ return unless data
119
+
120
+ if need_cleanup
121
+ if Episode.contains_episode_information?(data)
122
+ infos = Episode.extract_episode_information(data)
123
+ if infos
124
+ data = infos[:episodename]
125
+
126
+ # try to extract seriesname if needed
127
+ if extract_seriesname and infos[:series].match(/\w+/)
128
+ seriesname = Episode.clean_episode_data(infos[:series])
129
+ @series = seriesname.strip
130
+ end
131
+ end
132
+ end
133
+ data = Episode.clean_episode_data(data, true, true).strip
134
+ end
135
+ @episodename = data
136
+ end
137
+
138
+ # renames the given episodefile into the new
139
+ # clean format and sets the status on success
140
+ #
141
+ def rename(destination_dir=".")
142
+ raise IOError, 'episode file not existing' unless File.file?(@episodepath)
143
+ destination_file = File.join(destination_dir, self.to_s)
144
+
145
+ begin
146
+ File.rename(@episodepath, destination_file)
147
+
148
+ if @source_directory
149
+ FileUtils.remove_dir(@source_directory)
150
+ end
151
+
152
+ @success = true
153
+ rescue SystemCallError => e
154
+ puts "Rename failed: #{e}"
155
+ end
156
+ end
157
+
158
+ ##################
159
+ # static methods #
160
+ ##################
161
+
162
+ # cleans strings from things that can occur in
163
+ # episode files like dots (.) and trash words
164
+ #
165
+ # parameter:
166
+ # :data
167
+ # string that will be cleaned
168
+ # :include_trashwords
169
+ # remove Words like German or Dubbed from
170
+ # the string (Trashwords)
171
+ # :repair_umlauts
172
+ # try to repair broken umlauts if they occur
173
+ #
174
+ def self.clean_episode_data(data, include_trashwords=false, repair_umlauts=false)
175
+ data.gsub!(/\./, " ")
176
+ data.gsub!(/\_/, " ")
177
+ data.gsub!(/\-/, " ")
178
+
179
+ # if this feature is enabled than all trash words
180
+ # are removed from the string. If two trashwords
181
+ # occur than all trailing words will be removed.
182
+ # if a word is removed and the next is not a trash
183
+ # word than the removed word will be included
184
+ if include_trashwords
185
+ purge_count= 0
186
+ last_purge = nil
187
+ cleanwords = []
188
+
189
+ for word in data.split(/ /) do
190
+ next unless word.match(/\w+/)
191
+
192
+ word = repair_umlauts(word) if repair_umlauts
193
+
194
+ # if word is in TRASH_WORDS
195
+ if ! @@TRASH_WORDS.grep(/^#{word}$/i).empty?
196
+ purge_count += 1
197
+ last_purge = word
198
+
199
+ break if purge_count == 2;
200
+ else
201
+ if purge_count == 1 && last_purge != nil
202
+ cleanwords.push(last_purge)
203
+ purge_count = 0
204
+ end
205
+ cleanwords.push(word)
206
+ end
207
+ end
208
+ data = cleanwords.join(" ")
209
+ end
210
+
211
+ return data
212
+ end
213
+
214
+ # This method tries to repair some german umlauts so that
215
+ # the following occurs
216
+ #
217
+ # ae => ä ; ue => ü ; oe => ö ; Ae => Ä ; Ue => Ü ; Oe => Ö
218
+ #
219
+ # This method uses a webservice at:
220
+ # http://wortschatz.uni-leipzig.de/
221
+ # which produces statistics about the german language and
222
+ # e.g. frequency of words occuring in the german language
223
+ #
224
+ # this method convert all broken umlauts in the word and compares
225
+ # the frequency of both version and uses the version which is more
226
+ # common
227
+ #
228
+ # returns an repaired version of the word if necessary
229
+ def self.repair_umlauts(word)
230
+
231
+ if contains_eventual_broken_umlauts?(word)
232
+
233
+ repaired = word.gsub(/ae/, 'ä').gsub(/ue/, 'ü').gsub(/oe/, 'ö')
234
+ repaired.gsub!(/^Ae/, 'Ä')
235
+ repaired.gsub!(/^Ue/, 'Ü')
236
+ repaired.gsub!(/^Oe/, 'Ö')
237
+
238
+ ws = WLAPI::API.new
239
+
240
+ res_broken = ws.frequencies(word)
241
+ freq_broken = res_broken.nil? ? -1 : res_broken[0].to_i
242
+
243
+ res_repaired = ws.frequencies(repaired)
244
+ freq_repaired = res_repaired.nil? ? -1 : res_repaired[0].to_i
245
+
246
+ if freq_repaired > freq_broken
247
+ return repaired
248
+ end
249
+ end
250
+ return word
251
+ end
252
+
253
+ # checks for eventual broken umlauts
254
+ #
255
+ # returns true if broken umlaut if included
256
+ def self.contains_eventual_broken_umlauts?(string)
257
+ ! string.match(/ae|ue|oe|Ae|Ue|Oe/).nil?
258
+ end
259
+
260
+ # tries to match the given string against
261
+ # all supported regex-patterns and returns true if a
262
+ # suitable regex is found
263
+ def self.contains_episode_information?(info)
264
+ @@PATTERNS.each do |p|
265
+ if info.match(p)
266
+ return true
267
+ end
268
+ end
269
+ return false
270
+ end
271
+
272
+ # tries to find a suitable pattern and returns
273
+ # the matched data or nil if nothing matched
274
+ def self.extract_episode_information(info)
275
+ pattern = @@PATTERNS.select { |p| ! info.match(p).nil? }[0]
276
+ if pattern
277
+ return pattern.match(info)
278
+ end
279
+
280
+ return nil
281
+ end
282
+
283
+ # tries to find a valid video file in a given path.
284
+ #
285
+ # If path is a file it returns path unchanged if file
286
+ # is a valid video file or nil unless
287
+ #
288
+ # If path is a dir it searches for the biggest valid
289
+ # videofile in it and returns the path or nil if nothing
290
+ # found
291
+ def self.determine_video_file(path)
292
+ if File.file?(path)
293
+ matched_endings = @@ENDINGS.select { |e| ! path.match(/#{e}$/).nil? }
294
+ return path if ! matched_endings.empty?
295
+
296
+ elsif File.directory?(path)
297
+ videofile = nil
298
+ for file in Find.find(path) do
299
+ matched_endings = @@ENDINGS.select { |e| ! file.match(/#{e}$/).nil? }
300
+ if ! matched_endings.empty?
301
+ if videofile == nil || File.size(file) > File.size(videofile)
302
+ videofile = file
303
+ end
304
+ end
305
+ end
306
+
307
+ return videofile if videofile
308
+ end
309
+
310
+ return nil
311
+ end
312
+ end
313
+ end