serienrenamer 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,6 +24,8 @@ module Plugin
24
24
  @@CONTAINS_INARTICLE_EPISODE_LIST = /\<div.*\>Staffel.(\d+).*\<\/div\>.*class=\"wikitable\".*titel/m
25
25
  @@INPAGE_SEASON_SEPARATOR = /\<div.style=\"clear:both\;.class=\"NavFrame\"\>/
26
26
  @@WIKITABLE_EXTRACT_PATTERN = /(\{\|.class=\"wikitable\".*\|\})\n/m
27
+ @@IS_ONE_LINE_EPISODE_LIST = /\|.*\|\|.*\|\|.*\|\|/m
28
+
27
29
 
28
30
  # this method will be called from the main program
29
31
  # with an Serienrenamer::Episode instance as parameter
@@ -113,6 +115,7 @@ module Plugin
113
115
  return episode_names
114
116
  end
115
117
 
118
+
116
119
  # This method will extract season based information
117
120
  # from a string that contains a wikipedia episodelist page
118
121
  #
@@ -149,6 +152,7 @@ module Plugin
149
152
  return series_data
150
153
  end
151
154
 
155
+
152
156
  # this method will be called with a wikipedia seasontable
153
157
  # as parameter and will extract all episodes from this
154
158
  # and returns that as an array where the episode number is
@@ -212,6 +216,7 @@ module Plugin
212
216
  return season_data
213
217
  end
214
218
 
219
+
215
220
  # This method will extract season based information
216
221
  # from a string that contains a series page with an
217
222
  # episodelist included
@@ -237,7 +242,15 @@ module Plugin
237
242
  season_nr = season.match(@@CONTAINS_INARTICLE_EPISODE_LIST)[1].to_i
238
243
 
239
244
  wikitable = season.match(@@WIKITABLE_EXTRACT_PATTERN)[1]
240
- episodes = parse_inarticle_season_table(wikitable)
245
+
246
+ # we have to detect the type of the inarticle season page
247
+ # because there are two different kinds of table structures
248
+ # used in the german wikipedia
249
+ if self.is_episode_list_with_one_episode_per_line?(wikitable)
250
+ episodes = parse_inarticle_season_table_with_one_line(wikitable)
251
+ else
252
+ episodes = parse_inarticle_season_table(wikitable)
253
+ end
241
254
 
242
255
  # HACK if a season is splitted into different parts
243
256
  # eg. Flashpoint (2.1 and 2.2) than merge that if possible
@@ -253,6 +266,7 @@ module Plugin
253
266
  return series_data
254
267
  end
255
268
 
269
+
256
270
  # this method will be called with a wikitable for a season
257
271
  # as parameter and will extract all episodes from this
258
272
  # and returns that as an array where the episode number is
@@ -331,6 +345,71 @@ module Plugin
331
345
  return season_data
332
346
  end
333
347
 
348
+
349
+ # this method will be called with a wikitable for a season
350
+ # as parameter and will extract all episodes from this
351
+ # and returns that as an array where the episode number is
352
+ # the index
353
+ #
354
+ # this method implements a special format that takes place in
355
+ # e.g. 'Prison Break' where an episode is not spread along several
356
+ # lines like in the method above
357
+ #
358
+ # Example for an wikitable for episodes:
359
+ #
360
+ #{| class="wikitable"
361
+ # |- style="color:#black; background-color:#006699"
362
+ # ! '''Episode''' !! '''Deutscher Titel''' !! '''Originaltitel''' !! '''Erstausstrahlung (DE)''' !! '''Erstausstrahlung (USA)'''
363
+ # |-
364
+ # |'''1''' (1-01) || Der große Plan || Pilot || 21. Juni 2007 || 29. August 2005
365
+ # |-
366
+ # |'''2''' (1-02) || Lügt Lincoln? || Allen || 21. Juni 2007 || 29. August 2005
367
+ # |-
368
+ # |'''3''' (1-03) || Vertrauenstest || Cell Test || 28. Juni 2007 || 5. September 2005
369
+ # |-
370
+ # |'''4''' (1-04) || Veronica steigt ein || Cute Poison || 28. Juni 2007 || 12. September 2005
371
+ #
372
+ def self.parse_inarticle_season_table_with_one_line(table)
373
+ raise ArgumentError, 'String with seasontable expected' unless
374
+ table.is_a?(String)
375
+
376
+ season_data = []
377
+ episode_nr_col = nil
378
+ episode_name_col = nil
379
+
380
+ table.split(/^\|\-.*$/).each do |tablerow|
381
+
382
+ if tablerow.match(/!!.*!!.*!!/)
383
+ # extract column numbers from table header
384
+ tablerow.split(/!!/).each_with_index do |col,index|
385
+ episode_nr_col = index if col.match(/Episode/i)
386
+ episode_name_col = index if col.match(/Deutsch.*Titel/i)
387
+ end
388
+
389
+ elsif tablerow.match(/\|\|.*\w+.*\|\|/)
390
+ tablerow.strip!
391
+ columns = tablerow.split(/\|\|/)
392
+
393
+ # the following cleanes up the column so that the following occurs
394
+ # " '''7''' (1-07) " => "7 1 07"
395
+ #
396
+ # we can now extract the last bunch of digits and this algorithm is
397
+ # some kind of format independent
398
+ dirty_episode_nr = columns[episode_nr_col].gsub(/\D/, " ").strip
399
+ episode_nr = dirty_episode_nr.match(/(\d+)$/)[1]
400
+ next unless episode_nr
401
+
402
+ episode_name = columns[episode_name_col].strip
403
+ next unless episode_nr.match(/\w+/)
404
+
405
+ season_data[episode_nr.to_i] = episode_name
406
+ end
407
+ end
408
+
409
+ return season_data
410
+ end
411
+
412
+
334
413
  # this method checks if the page is the main page
335
414
  # for a series
336
415
  #
@@ -358,5 +437,10 @@ module Plugin
358
437
  def self.contains_inarticle_episode_list?(page)
359
438
  page.match(@@CONTAINS_INARTICLE_EPISODE_LIST) != nil
360
439
  end
440
+
441
+ # tests for the type of in article episode list
442
+ def self.is_episode_list_with_one_episode_per_line?(page)
443
+ page.match(@@IS_ONE_LINE_EPISODE_LIST) != nil
444
+ end
361
445
  end
362
446
  end
data/lib/serienrenamer.rb CHANGED
@@ -3,7 +3,7 @@ $:.unshift(File.dirname(__FILE__)) unless
3
3
 
4
4
 
5
5
  module Serienrenamer
6
- VERSION = '0.0.2'
6
+ VERSION = '0.0.3'
7
7
 
8
8
  require 'serienrenamer/episode.rb'
9
9
 
@@ -88,7 +88,7 @@ class TestPluginWikipedia < Test::Unit::TestCase
88
88
  assert_equal("Mich trifft der Blitz", seasons[2][16])
89
89
 
90
90
  data = wiki.get("Flashpoint – Das Spezialkommando")
91
- seasons = Plugin::Wikipedia.parse_inarticle_episodelist_page_data(data, true)
91
+ seasons = Plugin::Wikipedia.parse_inarticle_episodelist_page_data(data)
92
92
 
93
93
  assert_equal("Skorpion", seasons[1][1])
94
94
  assert_equal("Die Festung", seasons[2][2])
@@ -99,6 +99,22 @@ class TestPluginWikipedia < Test::Unit::TestCase
99
99
 
100
100
  assert_equal("Touchdown", seasons[1][1])
101
101
  assert_equal("Zickenkrieg", seasons[1][7])
102
+
103
+ # the following series have an old inarticle episodelist
104
+ data = wiki.get("Prison Break")
105
+ seasons = Plugin::Wikipedia.parse_inarticle_episodelist_page_data(data)
106
+
107
+ assert_equal("Der große Plan", seasons[1][1])
108
+ assert_equal("Seite 1213", seasons[2][5])
109
+
110
+ data = wiki.get("Numbers – Die Logik des Verbrechens")
111
+ seasons = Plugin::Wikipedia.parse_inarticle_episodelist_page_data(data)
112
+
113
+ assert_equal("Brandzeichen", seasons[1][1])
114
+ assert_equal("Das Attentat", seasons[2][5])
115
+ assert_equal("Gequälte Kreatur", seasons[5][19])
116
+ assert_equal("Hauptgewinn", seasons[6][11])
117
+
102
118
  end
103
119
 
104
120
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: serienrenamer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-03-04 00:00:00.000000000 Z
12
+ date: 2012-03-12 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: wlapi
16
- requirement: &11633260 !ruby/object:Gem::Requirement
16
+ requirement: &21895860 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 0.8.4
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *11633260
24
+ version_requirements: *21895860
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: mediawiki-gateway
27
- requirement: &11631640 !ruby/object:Gem::Requirement
27
+ requirement: &21891740 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 0.4.4
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *11631640
35
+ version_requirements: *21891740
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: mechanize
38
- requirement: &11629400 !ruby/object:Gem::Requirement
38
+ requirement: &21905740 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '2.3'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *11629400
46
+ version_requirements: *21905740
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: highline
49
- requirement: &11627260 !ruby/object:Gem::Requirement
49
+ requirement: &21903520 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 1.6.11
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *11627260
57
+ version_requirements: *21903520
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: rdoc
60
- requirement: &11641960 !ruby/object:Gem::Requirement
60
+ requirement: &21900960 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '3.10'
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *11641960
68
+ version_requirements: *21900960
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: newgem
71
- requirement: &11639680 !ruby/object:Gem::Requirement
71
+ requirement: &21899300 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: 1.5.3
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *11639680
79
+ version_requirements: *21899300
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: hoe
82
- requirement: &11637160 !ruby/object:Gem::Requirement
82
+ requirement: &21913580 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ~>
@@ -87,7 +87,7 @@ dependencies:
87
87
  version: '2.15'
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *11637160
90
+ version_requirements: *21913580
91
91
  description: ! 'Ruby Script that brings your series into an appropriate format
92
92
 
93
93
  like "S01E01 - Episodename.avi"'