serienrenamer 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/plugin/wikipedia.rb +85 -1
- data/lib/serienrenamer.rb +1 -1
- data/test/test_plugin_wikipedia.rb +17 -1
- metadata +16 -16
data/lib/plugin/wikipedia.rb
CHANGED
@@ -24,6 +24,8 @@ module Plugin
|
|
24
24
|
@@CONTAINS_INARTICLE_EPISODE_LIST = /\<div.*\>Staffel.(\d+).*\<\/div\>.*class=\"wikitable\".*titel/m
|
25
25
|
@@INPAGE_SEASON_SEPARATOR = /\<div.style=\"clear:both\;.class=\"NavFrame\"\>/
|
26
26
|
@@WIKITABLE_EXTRACT_PATTERN = /(\{\|.class=\"wikitable\".*\|\})\n/m
|
27
|
+
@@IS_ONE_LINE_EPISODE_LIST = /\|.*\|\|.*\|\|.*\|\|/m
|
28
|
+
|
27
29
|
|
28
30
|
# this method will be called from the main program
|
29
31
|
# with an Serienrenamer::Episode instance as parameter
|
@@ -113,6 +115,7 @@ module Plugin
|
|
113
115
|
return episode_names
|
114
116
|
end
|
115
117
|
|
118
|
+
|
116
119
|
# This method will extract season based information
|
117
120
|
# from a string that contains a wikipedia episodelist page
|
118
121
|
#
|
@@ -149,6 +152,7 @@ module Plugin
|
|
149
152
|
return series_data
|
150
153
|
end
|
151
154
|
|
155
|
+
|
152
156
|
# this method will be called with a wikipedia seasontable
|
153
157
|
# as parameter and will extract all episodes from this
|
154
158
|
# and returns that as an array where the episode number is
|
@@ -212,6 +216,7 @@ module Plugin
|
|
212
216
|
return season_data
|
213
217
|
end
|
214
218
|
|
219
|
+
|
215
220
|
# This method will extract season based information
|
216
221
|
# from a string that contains a series page with an
|
217
222
|
# episodelist included
|
@@ -237,7 +242,15 @@ module Plugin
|
|
237
242
|
season_nr = season.match(@@CONTAINS_INARTICLE_EPISODE_LIST)[1].to_i
|
238
243
|
|
239
244
|
wikitable = season.match(@@WIKITABLE_EXTRACT_PATTERN)[1]
|
240
|
-
|
245
|
+
|
246
|
+
# we have to detect the type of the inarticle season page
|
247
|
+
# because there are two different kinds of table structures
|
248
|
+
# used in the german wikipedia
|
249
|
+
if self.is_episode_list_with_one_episode_per_line?(wikitable)
|
250
|
+
episodes = parse_inarticle_season_table_with_one_line(wikitable)
|
251
|
+
else
|
252
|
+
episodes = parse_inarticle_season_table(wikitable)
|
253
|
+
end
|
241
254
|
|
242
255
|
# HACK if a season is splitted into different parts
|
243
256
|
# eg. Flashpoint (2.1 and 2.2) than merge that if possible
|
@@ -253,6 +266,7 @@ module Plugin
|
|
253
266
|
return series_data
|
254
267
|
end
|
255
268
|
|
269
|
+
|
256
270
|
# this method will be called with a wikitable for a season
|
257
271
|
# as parameter and will extract all episodes from this
|
258
272
|
# and returns that as an array where the episode number is
|
@@ -331,6 +345,71 @@ module Plugin
|
|
331
345
|
return season_data
|
332
346
|
end
|
333
347
|
|
348
|
+
|
349
|
+
# this method will be called with a wikitable for a season
|
350
|
+
# as parameter and will extract all episodes from this
|
351
|
+
# and returns that as an array where the episode number is
|
352
|
+
# the index
|
353
|
+
#
|
354
|
+
# this method implements a special format that takes place in
|
355
|
+
# e.g. 'Prison Break' where an episode is not spread along several
|
356
|
+
# lines like in the method above
|
357
|
+
#
|
358
|
+
# Example for an wikitable for episodes:
|
359
|
+
#
|
360
|
+
#{| class="wikitable"
|
361
|
+
# |- style="color:#black; background-color:#006699"
|
362
|
+
# ! '''Episode''' !! '''Deutscher Titel''' !! '''Originaltitel''' !! '''Erstausstrahlung (DE)''' !! '''Erstausstrahlung (USA)'''
|
363
|
+
# |-
|
364
|
+
# |'''1''' (1-01) || Der große Plan || Pilot || 21. Juni 2007 || 29. August 2005
|
365
|
+
# |-
|
366
|
+
# |'''2''' (1-02) || Lügt Lincoln? || Allen || 21. Juni 2007 || 29. August 2005
|
367
|
+
# |-
|
368
|
+
# |'''3''' (1-03) || Vertrauenstest || Cell Test || 28. Juni 2007 || 5. September 2005
|
369
|
+
# |-
|
370
|
+
# |'''4''' (1-04) || Veronica steigt ein || Cute Poison || 28. Juni 2007 || 12. September 2005
|
371
|
+
#
|
372
|
+
def self.parse_inarticle_season_table_with_one_line(table)
|
373
|
+
raise ArgumentError, 'String with seasontable expected' unless
|
374
|
+
table.is_a?(String)
|
375
|
+
|
376
|
+
season_data = []
|
377
|
+
episode_nr_col = nil
|
378
|
+
episode_name_col = nil
|
379
|
+
|
380
|
+
table.split(/^\|\-.*$/).each do |tablerow|
|
381
|
+
|
382
|
+
if tablerow.match(/!!.*!!.*!!/)
|
383
|
+
# extract column numbers from table header
|
384
|
+
tablerow.split(/!!/).each_with_index do |col,index|
|
385
|
+
episode_nr_col = index if col.match(/Episode/i)
|
386
|
+
episode_name_col = index if col.match(/Deutsch.*Titel/i)
|
387
|
+
end
|
388
|
+
|
389
|
+
elsif tablerow.match(/\|\|.*\w+.*\|\|/)
|
390
|
+
tablerow.strip!
|
391
|
+
columns = tablerow.split(/\|\|/)
|
392
|
+
|
393
|
+
# the following cleanes up the column so that the following occurs
|
394
|
+
# " '''7''' (1-07) " => "7 1 07"
|
395
|
+
#
|
396
|
+
# we can now extract the last bunch of digits and this algorithm is
|
397
|
+
# some kind of format independent
|
398
|
+
dirty_episode_nr = columns[episode_nr_col].gsub(/\D/, " ").strip
|
399
|
+
episode_nr = dirty_episode_nr.match(/(\d+)$/)[1]
|
400
|
+
next unless episode_nr
|
401
|
+
|
402
|
+
episode_name = columns[episode_name_col].strip
|
403
|
+
next unless episode_nr.match(/\w+/)
|
404
|
+
|
405
|
+
season_data[episode_nr.to_i] = episode_name
|
406
|
+
end
|
407
|
+
end
|
408
|
+
|
409
|
+
return season_data
|
410
|
+
end
|
411
|
+
|
412
|
+
|
334
413
|
# this method checks if the page is the main page
|
335
414
|
# for a series
|
336
415
|
#
|
@@ -358,5 +437,10 @@ module Plugin
|
|
358
437
|
def self.contains_inarticle_episode_list?(page)
|
359
438
|
page.match(@@CONTAINS_INARTICLE_EPISODE_LIST) != nil
|
360
439
|
end
|
440
|
+
|
441
|
+
# tests for the type of in article episode list
|
442
|
+
def self.is_episode_list_with_one_episode_per_line?(page)
|
443
|
+
page.match(@@IS_ONE_LINE_EPISODE_LIST) != nil
|
444
|
+
end
|
361
445
|
end
|
362
446
|
end
|
data/lib/serienrenamer.rb
CHANGED
@@ -88,7 +88,7 @@ class TestPluginWikipedia < Test::Unit::TestCase
|
|
88
88
|
assert_equal("Mich trifft der Blitz", seasons[2][16])
|
89
89
|
|
90
90
|
data = wiki.get("Flashpoint – Das Spezialkommando")
|
91
|
-
seasons = Plugin::Wikipedia.parse_inarticle_episodelist_page_data(data
|
91
|
+
seasons = Plugin::Wikipedia.parse_inarticle_episodelist_page_data(data)
|
92
92
|
|
93
93
|
assert_equal("Skorpion", seasons[1][1])
|
94
94
|
assert_equal("Die Festung", seasons[2][2])
|
@@ -99,6 +99,22 @@ class TestPluginWikipedia < Test::Unit::TestCase
|
|
99
99
|
|
100
100
|
assert_equal("Touchdown", seasons[1][1])
|
101
101
|
assert_equal("Zickenkrieg", seasons[1][7])
|
102
|
+
|
103
|
+
# the following series have an old inarticle episodelist
|
104
|
+
data = wiki.get("Prison Break")
|
105
|
+
seasons = Plugin::Wikipedia.parse_inarticle_episodelist_page_data(data)
|
106
|
+
|
107
|
+
assert_equal("Der große Plan", seasons[1][1])
|
108
|
+
assert_equal("Seite 1213", seasons[2][5])
|
109
|
+
|
110
|
+
data = wiki.get("Numbers – Die Logik des Verbrechens")
|
111
|
+
seasons = Plugin::Wikipedia.parse_inarticle_episodelist_page_data(data)
|
112
|
+
|
113
|
+
assert_equal("Brandzeichen", seasons[1][1])
|
114
|
+
assert_equal("Das Attentat", seasons[2][5])
|
115
|
+
assert_equal("Gequälte Kreatur", seasons[5][19])
|
116
|
+
assert_equal("Hauptgewinn", seasons[6][11])
|
117
|
+
|
102
118
|
end
|
103
119
|
|
104
120
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: serienrenamer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-03-
|
12
|
+
date: 2012-03-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: wlapi
|
16
|
-
requirement: &
|
16
|
+
requirement: &21895860 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 0.8.4
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *21895860
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: mediawiki-gateway
|
27
|
-
requirement: &
|
27
|
+
requirement: &21891740 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 0.4.4
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *21891740
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: mechanize
|
38
|
-
requirement: &
|
38
|
+
requirement: &21905740 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '2.3'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *21905740
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: highline
|
49
|
-
requirement: &
|
49
|
+
requirement: &21903520 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 1.6.11
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *21903520
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: rdoc
|
60
|
-
requirement: &
|
60
|
+
requirement: &21900960 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: '3.10'
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *21900960
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: newgem
|
71
|
-
requirement: &
|
71
|
+
requirement: &21899300 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: 1.5.3
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *21899300
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: hoe
|
82
|
-
requirement: &
|
82
|
+
requirement: &21913580 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ~>
|
@@ -87,7 +87,7 @@ dependencies:
|
|
87
87
|
version: '2.15'
|
88
88
|
type: :development
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *21913580
|
91
91
|
description: ! 'Ruby Script that brings your series into an appropriate format
|
92
92
|
|
93
93
|
like "S01E01 - Episodename.avi"'
|