serienrenamer 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/plugin/wikipedia.rb +85 -1
- data/lib/serienrenamer.rb +1 -1
- data/test/test_plugin_wikipedia.rb +17 -1
- metadata +16 -16
data/lib/plugin/wikipedia.rb
CHANGED
@@ -24,6 +24,8 @@ module Plugin
|
|
24
24
|
@@CONTAINS_INARTICLE_EPISODE_LIST = /\<div.*\>Staffel.(\d+).*\<\/div\>.*class=\"wikitable\".*titel/m
|
25
25
|
@@INPAGE_SEASON_SEPARATOR = /\<div.style=\"clear:both\;.class=\"NavFrame\"\>/
|
26
26
|
@@WIKITABLE_EXTRACT_PATTERN = /(\{\|.class=\"wikitable\".*\|\})\n/m
|
27
|
+
@@IS_ONE_LINE_EPISODE_LIST = /\|.*\|\|.*\|\|.*\|\|/m
|
28
|
+
|
27
29
|
|
28
30
|
# this method will be called from the main program
|
29
31
|
# with an Serienrenamer::Episode instance as parameter
|
@@ -113,6 +115,7 @@ module Plugin
|
|
113
115
|
return episode_names
|
114
116
|
end
|
115
117
|
|
118
|
+
|
116
119
|
# This method will extract season based information
|
117
120
|
# from a string that contains a wikipedia episodelist page
|
118
121
|
#
|
@@ -149,6 +152,7 @@ module Plugin
|
|
149
152
|
return series_data
|
150
153
|
end
|
151
154
|
|
155
|
+
|
152
156
|
# this method will be called with a wikipedia seasontable
|
153
157
|
# as parameter and will extract all episodes from this
|
154
158
|
# and returns that as an array where the episode number is
|
@@ -212,6 +216,7 @@ module Plugin
|
|
212
216
|
return season_data
|
213
217
|
end
|
214
218
|
|
219
|
+
|
215
220
|
# This method will extract season based information
|
216
221
|
# from a string that contains a series page with an
|
217
222
|
# episodelist included
|
@@ -237,7 +242,15 @@ module Plugin
|
|
237
242
|
season_nr = season.match(@@CONTAINS_INARTICLE_EPISODE_LIST)[1].to_i
|
238
243
|
|
239
244
|
wikitable = season.match(@@WIKITABLE_EXTRACT_PATTERN)[1]
|
240
|
-
|
245
|
+
|
246
|
+
# we have to detect the type of the inarticle season page
|
247
|
+
# because there are two different kinds of table structures
|
248
|
+
# used in the german wikipedia
|
249
|
+
if self.is_episode_list_with_one_episode_per_line?(wikitable)
|
250
|
+
episodes = parse_inarticle_season_table_with_one_line(wikitable)
|
251
|
+
else
|
252
|
+
episodes = parse_inarticle_season_table(wikitable)
|
253
|
+
end
|
241
254
|
|
242
255
|
# HACK if a season is splitted into different parts
|
243
256
|
# eg. Flashpoint (2.1 and 2.2) than merge that if possible
|
@@ -253,6 +266,7 @@ module Plugin
|
|
253
266
|
return series_data
|
254
267
|
end
|
255
268
|
|
269
|
+
|
256
270
|
# this method will be called with a wikitable for a season
|
257
271
|
# as parameter and will extract all episodes from this
|
258
272
|
# and returns that as an array where the episode number is
|
@@ -331,6 +345,71 @@ module Plugin
|
|
331
345
|
return season_data
|
332
346
|
end
|
333
347
|
|
348
|
+
|
349
|
+
# this method will be called with a wikitable for a season
|
350
|
+
# as parameter and will extract all episodes from this
|
351
|
+
# and returns that as an array where the episode number is
|
352
|
+
# the index
|
353
|
+
#
|
354
|
+
# this method implements a special format that takes place in
|
355
|
+
# e.g. 'Prison Break' where an episode is not spread along several
|
356
|
+
# lines like in the method above
|
357
|
+
#
|
358
|
+
# Example for an wikitable for episodes:
|
359
|
+
#
|
360
|
+
#{| class="wikitable"
|
361
|
+
# |- style="color:#black; background-color:#006699"
|
362
|
+
# ! '''Episode''' !! '''Deutscher Titel''' !! '''Originaltitel''' !! '''Erstausstrahlung (DE)''' !! '''Erstausstrahlung (USA)'''
|
363
|
+
# |-
|
364
|
+
# |'''1''' (1-01) || Der große Plan || Pilot || 21. Juni 2007 || 29. August 2005
|
365
|
+
# |-
|
366
|
+
# |'''2''' (1-02) || Lügt Lincoln? || Allen || 21. Juni 2007 || 29. August 2005
|
367
|
+
# |-
|
368
|
+
# |'''3''' (1-03) || Vertrauenstest || Cell Test || 28. Juni 2007 || 5. September 2005
|
369
|
+
# |-
|
370
|
+
# |'''4''' (1-04) || Veronica steigt ein || Cute Poison || 28. Juni 2007 || 12. September 2005
|
371
|
+
#
|
372
|
+
def self.parse_inarticle_season_table_with_one_line(table)
|
373
|
+
raise ArgumentError, 'String with seasontable expected' unless
|
374
|
+
table.is_a?(String)
|
375
|
+
|
376
|
+
season_data = []
|
377
|
+
episode_nr_col = nil
|
378
|
+
episode_name_col = nil
|
379
|
+
|
380
|
+
table.split(/^\|\-.*$/).each do |tablerow|
|
381
|
+
|
382
|
+
if tablerow.match(/!!.*!!.*!!/)
|
383
|
+
# extract column numbers from table header
|
384
|
+
tablerow.split(/!!/).each_with_index do |col,index|
|
385
|
+
episode_nr_col = index if col.match(/Episode/i)
|
386
|
+
episode_name_col = index if col.match(/Deutsch.*Titel/i)
|
387
|
+
end
|
388
|
+
|
389
|
+
elsif tablerow.match(/\|\|.*\w+.*\|\|/)
|
390
|
+
tablerow.strip!
|
391
|
+
columns = tablerow.split(/\|\|/)
|
392
|
+
|
393
|
+
# the following cleanes up the column so that the following occurs
|
394
|
+
# " '''7''' (1-07) " => "7 1 07"
|
395
|
+
#
|
396
|
+
# we can now extract the last bunch of digits and this algorithm is
|
397
|
+
# some kind of format independent
|
398
|
+
dirty_episode_nr = columns[episode_nr_col].gsub(/\D/, " ").strip
|
399
|
+
episode_nr = dirty_episode_nr.match(/(\d+)$/)[1]
|
400
|
+
next unless episode_nr
|
401
|
+
|
402
|
+
episode_name = columns[episode_name_col].strip
|
403
|
+
next unless episode_nr.match(/\w+/)
|
404
|
+
|
405
|
+
season_data[episode_nr.to_i] = episode_name
|
406
|
+
end
|
407
|
+
end
|
408
|
+
|
409
|
+
return season_data
|
410
|
+
end
|
411
|
+
|
412
|
+
|
334
413
|
# this method checks if the page is the main page
|
335
414
|
# for a series
|
336
415
|
#
|
@@ -358,5 +437,10 @@ module Plugin
|
|
358
437
|
def self.contains_inarticle_episode_list?(page)
|
359
438
|
page.match(@@CONTAINS_INARTICLE_EPISODE_LIST) != nil
|
360
439
|
end
|
440
|
+
|
441
|
+
# tests for the type of in article episode list
|
442
|
+
def self.is_episode_list_with_one_episode_per_line?(page)
|
443
|
+
page.match(@@IS_ONE_LINE_EPISODE_LIST) != nil
|
444
|
+
end
|
361
445
|
end
|
362
446
|
end
|
data/lib/serienrenamer.rb
CHANGED
@@ -88,7 +88,7 @@ class TestPluginWikipedia < Test::Unit::TestCase
|
|
88
88
|
assert_equal("Mich trifft der Blitz", seasons[2][16])
|
89
89
|
|
90
90
|
data = wiki.get("Flashpoint – Das Spezialkommando")
|
91
|
-
seasons = Plugin::Wikipedia.parse_inarticle_episodelist_page_data(data
|
91
|
+
seasons = Plugin::Wikipedia.parse_inarticle_episodelist_page_data(data)
|
92
92
|
|
93
93
|
assert_equal("Skorpion", seasons[1][1])
|
94
94
|
assert_equal("Die Festung", seasons[2][2])
|
@@ -99,6 +99,22 @@ class TestPluginWikipedia < Test::Unit::TestCase
|
|
99
99
|
|
100
100
|
assert_equal("Touchdown", seasons[1][1])
|
101
101
|
assert_equal("Zickenkrieg", seasons[1][7])
|
102
|
+
|
103
|
+
# the following series have an old inarticle episodelist
|
104
|
+
data = wiki.get("Prison Break")
|
105
|
+
seasons = Plugin::Wikipedia.parse_inarticle_episodelist_page_data(data)
|
106
|
+
|
107
|
+
assert_equal("Der große Plan", seasons[1][1])
|
108
|
+
assert_equal("Seite 1213", seasons[2][5])
|
109
|
+
|
110
|
+
data = wiki.get("Numbers – Die Logik des Verbrechens")
|
111
|
+
seasons = Plugin::Wikipedia.parse_inarticle_episodelist_page_data(data)
|
112
|
+
|
113
|
+
assert_equal("Brandzeichen", seasons[1][1])
|
114
|
+
assert_equal("Das Attentat", seasons[2][5])
|
115
|
+
assert_equal("Gequälte Kreatur", seasons[5][19])
|
116
|
+
assert_equal("Hauptgewinn", seasons[6][11])
|
117
|
+
|
102
118
|
end
|
103
119
|
|
104
120
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: serienrenamer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-03-
|
12
|
+
date: 2012-03-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: wlapi
|
16
|
-
requirement: &
|
16
|
+
requirement: &21895860 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 0.8.4
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *21895860
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: mediawiki-gateway
|
27
|
-
requirement: &
|
27
|
+
requirement: &21891740 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 0.4.4
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *21891740
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: mechanize
|
38
|
-
requirement: &
|
38
|
+
requirement: &21905740 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '2.3'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *21905740
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: highline
|
49
|
-
requirement: &
|
49
|
+
requirement: &21903520 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 1.6.11
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *21903520
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: rdoc
|
60
|
-
requirement: &
|
60
|
+
requirement: &21900960 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: '3.10'
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *21900960
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: newgem
|
71
|
-
requirement: &
|
71
|
+
requirement: &21899300 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: 1.5.3
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *21899300
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: hoe
|
82
|
-
requirement: &
|
82
|
+
requirement: &21913580 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ~>
|
@@ -87,7 +87,7 @@ dependencies:
|
|
87
87
|
version: '2.15'
|
88
88
|
type: :development
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *21913580
|
91
91
|
description: ! 'Ruby Script that brings your series into an appropriate format
|
92
92
|
|
93
93
|
like "S01E01 - Episodename.avi"'
|