sportdb-formats 1.0.6 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +6 -33
  3. data/Rakefile +2 -5
  4. data/lib/sportdb/formats.rb +54 -70
  5. data/lib/sportdb/formats/country/country_index.rb +2 -2
  6. data/lib/sportdb/formats/event/event_index.rb +141 -0
  7. data/lib/sportdb/formats/event/event_reader.rb +183 -0
  8. data/lib/sportdb/formats/league/league_index.rb +22 -18
  9. data/lib/sportdb/formats/league/league_outline_reader.rb +45 -13
  10. data/lib/sportdb/formats/league/league_reader.rb +7 -1
  11. data/lib/sportdb/formats/match/match_parser.rb +101 -111
  12. data/lib/sportdb/formats/package.rb +59 -11
  13. data/lib/sportdb/formats/parser_helper.rb +11 -2
  14. data/lib/sportdb/formats/team/club_index.rb +13 -11
  15. data/lib/sportdb/formats/team/club_index_history.rb +134 -0
  16. data/lib/sportdb/formats/team/club_reader_history.rb +203 -0
  17. data/lib/sportdb/formats/team/club_reader_props.rb +20 -5
  18. data/lib/sportdb/formats/version.rb +2 -2
  19. data/test/helper.rb +51 -81
  20. data/test/test_club_index_history.rb +107 -0
  21. data/test/test_club_reader_history.rb +212 -0
  22. data/test/test_datafile_package.rb +1 -1
  23. data/test/test_regex.rb +25 -7
  24. metadata +9 -78
  25. data/lib/sportdb/formats/config.rb +0 -40
  26. data/lib/sportdb/formats/match/match_parser_csv.rb +0 -314
  27. data/lib/sportdb/formats/name_helper.rb +0 -84
  28. data/lib/sportdb/formats/score/score_formats.rb +0 -220
  29. data/lib/sportdb/formats/score/score_parser.rb +0 -202
  30. data/lib/sportdb/formats/season_utils.rb +0 -27
  31. data/lib/sportdb/formats/structs/country.rb +0 -31
  32. data/lib/sportdb/formats/structs/group.rb +0 -18
  33. data/lib/sportdb/formats/structs/league.rb +0 -37
  34. data/lib/sportdb/formats/structs/match.rb +0 -151
  35. data/lib/sportdb/formats/structs/matchlist.rb +0 -220
  36. data/lib/sportdb/formats/structs/round.rb +0 -25
  37. data/lib/sportdb/formats/structs/season.rb +0 -123
  38. data/lib/sportdb/formats/structs/standings.rb +0 -247
  39. data/lib/sportdb/formats/structs/team.rb +0 -150
  40. data/lib/sportdb/formats/structs/team_usage.rb +0 -88
  41. data/test/test_clubs.rb +0 -40
  42. data/test/test_conf.rb +0 -65
  43. data/test/test_csv_match_parser.rb +0 -114
  44. data/test/test_csv_match_parser_utils.rb +0 -20
  45. data/test/test_csv_reader.rb +0 -31
  46. data/test/test_match.rb +0 -30
  47. data/test/test_match_auto.rb +0 -72
  48. data/test/test_match_auto_champs.rb +0 -45
  49. data/test/test_match_auto_euro.rb +0 -37
  50. data/test/test_match_auto_worldcup.rb +0 -61
  51. data/test/test_match_champs.rb +0 -27
  52. data/test/test_match_eng.rb +0 -26
  53. data/test/test_match_euro.rb +0 -27
  54. data/test/test_match_worldcup.rb +0 -27
  55. data/test/test_name_helper.rb +0 -67
  56. data/test/test_scores.rb +0 -122
  57. data/test/test_season.rb +0 -62
@@ -13,12 +13,22 @@ module SportDb
13
13
  ## leagues.txt or leagues_en.txt
14
14
  ## remove support for en.leagues.txt - why? why not?
15
15
  LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
16
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.wiki.txt
16
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.leagues.txt
17
17
  leagues
18
18
  (?:_[a-z0-9_-]+)?
19
19
  \.txt$
20
20
  }x
21
21
 
22
+ ## seasons.txt or seasons_en.txt
23
+ ## remove support for br.seasons.txt - why? why not?
24
+ SEASONS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
25
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.seasons.txt
26
+ seasons
27
+ (?:_[a-z0-9_-]+)?
28
+ \.txt$
29
+ }x
30
+
31
+
22
32
  ## clubs.txt or clubs_en.txt
23
33
  ## remove support for en.clubs.txt - why? why not?
24
34
  CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
@@ -35,12 +45,22 @@ module SportDb
35
45
  \.wiki\.txt$
36
46
  }x
37
47
 
38
- CLUB_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
48
+ ## todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not?
49
+ CLUBS_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
39
50
  (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
40
51
  clubs
41
52
  (?:_[a-z0-9_-]+)?
42
53
  \.props\.txt$
43
54
  }x
55
+ CLUB_PROPS_RE = CLUBS_PROPS_RE ## add alias for now (fix later - why? why not?)
56
+
57
+
58
+ CLUBS_HISTORY_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
59
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.history.txt
60
+ clubs
61
+ (?:_[a-z0-9_-]+)?
62
+ \.history\.txt$
63
+ }x
44
64
 
45
65
  ## teams.txt or teams_history.txt
46
66
  TEAMS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
@@ -49,6 +69,8 @@ module SportDb
49
69
  \.txt$
50
70
  }x
51
71
 
72
+
73
+ ### todo/fix: change SEASON_RE to SEASON_KEY_RE (avoid confusion w/ SEASONS_RE for datafile?) - why? why not? !!!!!!!
52
74
  ### season folder:
53
75
  ## e.g. /2019-20 or
54
76
  ## year-only e.g. /2019 or
@@ -73,6 +95,10 @@ module SportDb
73
95
  /[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
74
96
  }x
75
97
 
98
+ ### add "generic" pattern to find all csv datafiles
99
+ CSV_RE = %r{ (?: ^|/ )
100
+ [a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
101
+ }x
76
102
 
77
103
 
78
104
  ## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
@@ -96,16 +122,22 @@ module SportDb
96
122
  def self.find_teams( path, pattern: TEAMS_RE ) find( path, pattern ); end
97
123
  def self.match_teams( path ) TEAMS_RE.match( path ); end
98
124
 
99
- def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
100
- def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
125
+ def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
126
+ def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
127
+ def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end
101
128
 
102
- def self.match_clubs( path ) CLUBS_RE.match( path ); end
103
- def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
104
- def self.match_club_props( path, pattern: CLUB_PROPS_RE ) pattern.match( path ); end
129
+ def self.match_clubs( path ) CLUBS_RE.match( path ); end
130
+ def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
131
+ def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end
132
+ def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end
105
133
 
106
134
  def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
107
135
  def self.match_leagues( path ) LEAGUES_RE.match( path ); end
108
136
 
137
+ def self.find_seasons( path, pattern: SEASONS_RE ) find( path, pattern ); end
138
+ def self.match_seasons( path ) SEASONS_RE.match( path ); end
139
+
140
+
109
141
  def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
110
142
  def self.match_conf( path ) CONF_RE.match( path ); end
111
143
 
@@ -118,6 +150,7 @@ module SportDb
118
150
  end
119
151
  ## add match_match and match_match_csv - why? why not?
120
152
 
153
+
121
154
  class << self
122
155
  alias_method :match_teams?, :match_teams
123
156
  alias_method :teams?, :match_teams
@@ -128,12 +161,21 @@ module SportDb
128
161
  alias_method :match_clubs_wiki?, :match_clubs_wiki
129
162
  alias_method :clubs_wiki?, :match_clubs_wiki
130
163
 
131
- alias_method :match_club_props?, :match_club_props
132
- alias_method :club_props?, :match_club_props
164
+ alias_method :match_clubs_history?, :match_clubs_history
165
+ alias_method :clubs_history?, :match_clubs_history
166
+
167
+ alias_method :match_club_props, :match_clubs_props
168
+ alias_method :match_club_props?, :match_clubs_props
169
+ alias_method :club_props?, :match_clubs_props
170
+ alias_method :match_clubs_props?, :match_clubs_props
171
+ alias_method :clubs_props?, :match_clubs_props
133
172
 
134
173
  alias_method :match_leagues?, :match_leagues
135
174
  alias_method :leagues?, :match_leagues
136
175
 
176
+ alias_method :match_seasons?, :match_seasons
177
+ alias_method :seasons?, :match_seasons
178
+
137
179
  alias_method :match_conf?, :match_conf
138
180
  alias_method :conf?, :match_conf
139
181
  end
@@ -212,11 +254,17 @@ module SportDb
212
254
  end
213
255
  end
214
256
  def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end
257
+ def each_csv( &blk ) each( pattern: CSV_RE, &blk ); end
258
+
215
259
  def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
216
260
 
217
261
  def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
218
262
  def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
219
263
  def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
264
+ def each_clubs_history( &blk ) each( pattern: CLUBS_HISTORY_RE, &blk ); end
265
+
266
+ def each_seasons( &blk ) each( pattern: SEASONS_RE, &blk ); end
267
+
220
268
 
221
269
  ## return all match datafile entries
222
270
  def match( format: 'txt' )
@@ -287,13 +335,13 @@ module SportDb
287
335
  ## filter.skip? filter.include? ( season_sason_key )?
288
336
  ## fiteer.before?( season_key ) etc.
289
337
  ## find some good method names!!!!
290
- season_start = start ? Import::Season.new( start ) : nil
338
+ season_start = start ? Season( start ) : nil
291
339
 
292
340
  h = {}
293
341
  match( format: format ).each do |entry|
294
342
  ## note: assume last directory in datafile path is the season part/key
295
343
  season_q = File.basename( File.dirname( entry.name ))
296
- season = Import::Season.new( season_q ) ## normalize season
344
+ season = Season.parse( season_q ) ## normalize season
297
345
 
298
346
  ## skip if start season before this season
299
347
  next if season_start && season_start.start_year > season.start_year
@@ -18,10 +18,19 @@ module SportDb
18
18
 
19
19
 
20
20
  def is_round?( line )
21
- ## note: =~ return nil if not match found, and 0,1, etc for match
22
- (line =~ SportDb.lang.regex_round) != nil
21
+ ## note: =~ returns nil if not match found, and 0,1, etc for match
22
+
23
+ ## note: allow "free standing" leg 1 and leg 2 too
24
+ ## (e.g. Hinspiel, Rückspiel etc. used for now in Relegation, for example)
25
+ ## note ONLY allowed if "free standing", that is, full line with nothing else
26
+ ## use "custom" regex for special case for now
27
+ ## avoids match HIN in PascHINg, for example (hin in german for leg 1)
28
+ line =~ SportDb.lang.regex_round ||
29
+ line =~ /^(#{SportDb.lang.leg1})$/i ||
30
+ line =~ /^(#{SportDb.lang.leg2})$/i
23
31
  end
24
32
 
33
+
25
34
  def is_knockout_round?( line )
26
35
 
27
36
  ## todo: check for adding ignore case for regex (e.g. 1st leg/1st Leg)
@@ -54,6 +54,7 @@ class ClubIndex
54
54
  ## normalize( name )
55
55
 
56
56
  def strip_wiki( name ) # todo/check: rename to strip_wikipedia_en - why? why not?
57
+ ## change/rename to strip_wiki_qualifier or such - why? why not?
57
58
  ## note: strip disambiguationn qualifier from wikipedia page name if present
58
59
  ## note: only remove year and foot... for now
59
60
  ## e.g. FC Wacker Innsbruck (2002) => FC Wacker Innsbruck
@@ -178,22 +179,24 @@ class ClubIndex
178
179
  ## todo/fix/check: use rename to find_canon or find_canonical() or something??
179
180
  ## remove (getting used?) - why? why not?
180
181
  def []( name ) ## lookup by canoncial name only; todo/fix: add find alias why? why not?
182
+ puts "WARN!! do not use ClubIndex#[] for lookup >#{name}< - will get removed!!!"
181
183
  @clubs[ name ]
182
184
  end
183
185
 
184
186
 
185
- ## todo/fix/check: return empty array if no match!!!
186
- ## and NOT nil (add || []) - why? why not?
187
187
  def match( name )
188
+ # note: returns empty array (e.g. []) if no match and NOT nil
188
189
  name = normalize( name )
189
- m = @clubs_by_name[ name ]
190
+ m = @clubs_by_name[ name ] || []
190
191
 
191
192
  ## no match - retry with unaccented variant if different
192
193
  ## e.g. example is Preussen Münster (with mixed accent and unaccented letters) that would go unmatched for now
193
194
  ## Preussen Münster => preussenmünster (norm) => preussenmunster (norm+unaccent)
194
- if m.nil?
195
+ if m.empty?
195
196
  name2 = unaccent( name )
196
- m = @clubs_by_name[ name2 ] if name2 != name
197
+ if name2 != name
198
+ m = @clubs_by_name[ name2 ] || []
199
+ end
197
200
  end
198
201
  m
199
202
  end
@@ -227,10 +230,8 @@ class ClubIndex
227
230
  country = country( country )
228
231
 
229
232
  ## note: match must for now always include name
230
- if m ## filter by country
231
- m = m.select { |club| club.country.key == country.key }
232
- m = nil if m.empty? ## note: reset to nil if no more matches
233
- end
233
+ ## filter by country
234
+ m = m.select { |club| club.country.key == country.key }
234
235
  end
235
236
  m
236
237
  end
@@ -263,7 +264,7 @@ class ClubIndex
263
264
 
264
265
  m = match_by( name: name, country: country )
265
266
 
266
- if m.nil?
267
+ if m.empty?
267
268
  ## (re)try with second country - quick hacks for known leagues
268
269
  ## todo/fix: add league flag to activate!!! - why? why not
269
270
  m = match_by( name: name, country: 'wal' ) if country.key == 'eng'
@@ -272,6 +273,7 @@ class ClubIndex
272
273
  m = match_by( name: name, country: 'mc' ) if country.key == 'fr'
273
274
  m = match_by( name: name, country: 'li' ) if country.key == 'ch'
274
275
  m = match_by( name: name, country: 'ca' ) if country.key == 'us'
276
+ m = match_by( name: name, country: 'nz' ) if country.key == 'au'
275
277
  end
276
278
  else ## try "global" search - no country passed in
277
279
  m = match( name )
@@ -279,7 +281,7 @@ class ClubIndex
279
281
 
280
282
 
281
283
  club = nil
282
- if m.nil?
284
+ if m.empty?
283
285
  ## puts "** !!! WARN !!! no match for club >#{name}<"
284
286
  elsif m.size > 1
285
287
  puts "** !!! ERROR - too many matches (#{m.size}) for club >#{name}<:"
@@ -0,0 +1,134 @@
1
+ # encoding: utf-8
2
+
3
+ module SportDb
4
+ module Import
5
+
6
+
7
+ class ClubHistoryIndex
8
+
9
+ def self.build( path )
10
+ pack = Package.new( path ) ## lets us use direcotry or zip archive
11
+
12
+ recs = []
13
+ pack.each_clubs_history do |entry|
14
+ recs += ClubHistoryReader.parse( entry.read )
15
+ end
16
+ recs
17
+
18
+ index = new
19
+ index.add( recs )
20
+ index
21
+ end
22
+
23
+
24
+
25
+ def catalog() Import.catalog; end
26
+
27
+ ## note: keep name history for now separate from
28
+ ## from club struct - why? why not?
29
+ ## later yes, yes, yes, merge name history into club struct!!!!!
30
+ ##
31
+ ## for now the name history is experimental
32
+
33
+
34
+ def initialize
35
+ @clubs = {} ## clubs (indexed) by canonical name
36
+ @errors = []
37
+ end
38
+
39
+ attr_reader :errors
40
+ def errors?() @errors.empty? == false; end
41
+
42
+ def mappings() @clubs; end ## todo/check: rename to records or histories or something - why? why not?
43
+
44
+
45
+ def add_history( club_rec, keyword, season, args )
46
+ ## note use season obj for now (and NOT key) - why? why not?
47
+ rec = @clubs[ club_rec.name ] ||= []
48
+
49
+ rec << [season, [keyword, args]]
50
+
51
+ ## note: always keep records sorted by season_key for now
52
+ ## check if 2010 and 2010/11 is in order using alpha sort?? (see argentina)
53
+ rec.sort! { |l,r| r[0] <=> l[0] }
54
+ end
55
+
56
+
57
+ def add( rec_or_recs ) ## add club record / alt_names
58
+ recs = rec_or_recs.is_a?( Array ) ? rec_or_recs : [rec_or_recs] ## wrap (single) rec in array
59
+
60
+ recs.each do |rec|
61
+
62
+ keyword = rec[0]
63
+ season_key = rec[1]
64
+ args = rec[2..-1] ## get rest of args e.g. one, two or more
65
+
66
+ ## note: for now only add (re)name history season records,
67
+ ## that is, skip MERGE and BANKRUPT for now
68
+ ## and incl. only RENAME, REFORM, MOVE for now
69
+ next if ['MERGE', 'BANKRUPT'].include?( keyword )
70
+
71
+
72
+ name_old = strip_geo( args[0][0] ) ## note: strip optional geo part from name
73
+ name_new = strip_geo( args[1][0] )
74
+
75
+ country_old = args[0][1]
76
+ country_new = args[1][1]
77
+
78
+ club_old = catalog.clubs.find_by!( name: name_old, country: country_old )
79
+ club_new = catalog.clubs.find_by!( name: name_new, country: country_new )
80
+
81
+ ## note use season obj for now (and NOT key) - why? why not?
82
+ season = Season.parse( season_key )
83
+
84
+ ## todo/check:
85
+ ## check if club_old and club_new reference different club record!!
86
+ ## examples - RB II -> Liefering ?? or
87
+ ## FC Pasching -> OOE Juniors ??
88
+ ## Austria Salzburg -> RB Salburg ??
89
+ ## for now always add name history to both - why? why not?
90
+
91
+ add_history( club_old, keyword, season, args )
92
+ ## note: allow for now different club references
93
+ ## but maybe warn later - why? why not?
94
+ ## add history to both for now
95
+ add_history( club_new, keyword, season, args ) if club_old != club_new
96
+ end # each rec
97
+ end # method add
98
+
99
+
100
+ #### todo/check: move as method to club struct later - to always use club reference
101
+ ## returns (simply) name as string for now or nil - why? why not?
102
+ #
103
+ # history entry example
104
+ # Arsenal FC"=>
105
+ # [[1927/28, ["RENAME", [["The Arsenal FC, London", "eng"], ["Arsenal FC", "eng"]]]],
106
+ # [1914/15, ["RENAME", [["Woolwich Arsenal FC, London", "eng"], ["The Arsenal FC", "eng"]]]],
107
+ # [1892/93, ["RENAME", [["Royal Arsenal FC, London", "eng"], ["Woolwich Arsenal FC", "eng"]]]]],
108
+ def find_name_by( name:, season: )
109
+ recs = @clubs[ name ]
110
+ if recs
111
+ season = Season( season ) ## make sure season is a season obj (and NOT a string)
112
+ ## check season records for name; use linear search (assume only few records)
113
+ recs.each do |rec|
114
+ if season >= rec[0]
115
+ return strip_geo( rec[1][1][1][0] ) # use second arg
116
+ end
117
+ end
118
+ ## if we get here use last name
119
+ strip_geo( recs[-1][1][1][0][0] ) # use first arg
120
+ else
121
+ nil
122
+ end
123
+ end
124
+
125
+ ##################
126
+ ## helpers
127
+ def strip_geo( name )
128
+ ## e.g. Arsenal, London => Arsenal
129
+ name.split(',')[0].strip
130
+ end
131
+ end # class ClubHistoryIndex
132
+
133
+ end # module Import
134
+ end # module SportDb
@@ -0,0 +1,203 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module SportDb
5
+ module Import
6
+
7
+
8
+ class ClubHistoryReader
9
+
10
+ def catalog() Import.catalog; end
11
+
12
+
13
+
14
+ def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
15
+ txt = File.open( path, 'r:utf-8' ) { |f| f.read }
16
+ parse( txt )
17
+ end
18
+
19
+ def self.parse( txt )
20
+ new( txt ).parse
21
+ end
22
+
23
+ def initialize( txt )
24
+ @txt = txt
25
+ end
26
+
27
+
28
+ ###
29
+ ## RENAME/RENAMED
30
+ ## MOVE/MOVED
31
+ ## BANKRUPT/BANKRUPTED
32
+ ## REFORM/REFORMED
33
+ ## MERGE/MERGED - allow + or ++ or +++ or ; for "inline" - why? why not?
34
+
35
+
36
+ KEYWORD_LINE_RE = %r{ ^(?<keyword>RENAMED?|
37
+ MOVED?|
38
+ BANKRUPT(?:ED)?|
39
+ REFORM(?:ED)?|
40
+ MERGED?
41
+ )
42
+ [ ]+
43
+ (?<text>.*) # rest of text
44
+ $
45
+ }x
46
+
47
+
48
+ def parse
49
+ recs = []
50
+ last_rec = nil
51
+
52
+ last_country = nil
53
+ last_season = nil
54
+ last_keyword = nil
55
+ last_teams = []
56
+
57
+ OutlineReader.parse( @txt ).each do |node|
58
+ if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
59
+ heading_level = node[0][1].to_i
60
+ heading = node[1]
61
+
62
+ puts "heading #{heading_level} >#{heading}<"
63
+
64
+
65
+ if heading_level == 1
66
+ ## assume country in heading; allow all "formats" supported by parse e.g.
67
+ ## Österreich • Austria (at)
68
+ ## Österreich • Austria
69
+ ## Austria
70
+ ## Deutschland (de) • Germany
71
+ country = catalog.countries.parse( heading )
72
+ ## check country code - MUST exist for now!!!!
73
+ if country.nil?
74
+ puts "!!! error [club history reader] - unknown country >#{heading}< - sorry - add country to config to fix"
75
+ exit 1
76
+ end
77
+ puts " country >#{heading}< => #{country.name}, #{country.key}"
78
+ last_country = country
79
+ last_season = nil ## reset "lower levels" - season & keyword
80
+ last_keyword = nil
81
+ elsif heading_level == 2
82
+ ## assume season
83
+ season = Season.parse( heading )
84
+ puts " season >#{heading}< => #{season.key}"
85
+ last_season = season ## reset "lowwer levels" - keyword
86
+ last_keyword = nil
87
+ else
88
+ puts "!!! ERROR [club history reader] - for now only heading 1 & 2 supported; sorry"
89
+ exit 1
90
+ end
91
+
92
+ elsif node[0] == :p ## paragraph with (text) lines
93
+ if last_country.nil?
94
+ puts "!!! ERROR [club history reader] - country heading 1 required, sorry"
95
+ exit 1
96
+ end
97
+ if last_season.nil?
98
+ puts "!!! ERROR [club history reader] - season heading 2 required, sorry"
99
+ exit 1
100
+ end
101
+
102
+ lines = node[1]
103
+ lines.each do |line|
104
+ if m=line.match(KEYWORD_LINE_RE) ## extract keyword and continue
105
+ keyword = m[:keyword]
106
+ line = m[:text].strip
107
+
108
+ puts " keyword #{keyword}"
109
+ last_keyword = case keyword ## "normalize" keywords
110
+ when 'BANKRUPT', 'BANKRUPTED'
111
+ 'BANKRUPT'
112
+ when 'RENAME', 'RENAMED'
113
+ 'RENAME'
114
+ when 'REFORM', 'REFORMED'
115
+ 'REFORM'
116
+ when 'MOVE', 'MOVED'
117
+ 'MOVE'
118
+ when 'MERGE', 'MERGED'
119
+ 'MERGE'
120
+ else
121
+ puts "!!! ERROR [club history reader] - unexpected keyword >#{keyword}<; sorry - don't know how to normalize"
122
+ exit 1
123
+ end
124
+
125
+ last_teams = []
126
+ end
127
+
128
+ if last_keyword.nil?
129
+ puts "!!! ERROR [club history reader] - line with keyword expected - got:"
130
+ puts line
131
+ exit 1
132
+ end
133
+
134
+ if last_keyword == 'BANKRUPT'
135
+ ## requires / expects one team in one line
136
+ recs << [ last_keyword, last_season.key,
137
+ [ squish(line), last_country.key ]
138
+ ]
139
+ elsif last_keyword == 'RENAME' ||
140
+ last_keyword == 'REFORM' ||
141
+ last_keyword == 'MOVE'
142
+ ## requires / expects two teams in one line (separated by ⇒ or such)
143
+ teams = line.split( '⇒' )
144
+ if teams.size != 2
145
+ puts "!!! ERROR [club history reader] - expected two teams - got:"
146
+ pp teams
147
+ exit 1
148
+ end
149
+ teams = teams.map {|team| squish(team.strip) } ## remove whitespaces
150
+ recs << [ last_keyword, last_season.key,
151
+ [ teams[0], last_country.key ],
152
+ [ teams[1], last_country.key ]
153
+ ]
154
+ elsif last_keyword == 'MERGE'
155
+ ## check if line starts with separator
156
+ ## otherwise collect to be merged teams
157
+ if line.start_with?( '⇒' )
158
+ if last_teams.size < 2
159
+ puts "!!! ERROR [club history reader] - expected two or more teams for MERGE - got:"
160
+ pp last_teams
161
+ exit 1
162
+ end
163
+ ## auto-add country to all teams
164
+ teams = last_teams.map {|team| [team, last_country.key]}
165
+ recs << [ last_keyword, last_season.key,
166
+ teams,
167
+ [ squish(line.sub('⇒','').strip), last_country.key ]
168
+ ]
169
+
170
+ last_teams = []
171
+ else
172
+ last_teams << squish(line)
173
+ end
174
+ else
175
+ puts "!!! ERROR [club history reader] - unknown keyword >#{last_keyword}<; cannot process; sorry"
176
+ exit 1
177
+ end
178
+ end # each line (in paragraph)
179
+ else
180
+ puts "** !!! ERROR [club history reader] - unknown line type:"
181
+ pp node
182
+ exit 1
183
+ end
184
+ end
185
+
186
+ recs
187
+ end # method read
188
+
189
+
190
+ ###############
191
+ ## helper
192
+
193
+ def squish( str )
194
+ ## colapse all whitespace to one
195
+ str.gsub( /[ ]+/,' ' )
196
+ end
197
+
198
+
199
+ end # class ClubHistoryReader
200
+
201
+
202
+ end ## module Import
203
+ end ## module SportDb