sportdb-formats 1.0.6 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +6 -33
- data/Rakefile +2 -5
- data/lib/sportdb/formats.rb +54 -70
- data/lib/sportdb/formats/country/country_index.rb +2 -2
- data/lib/sportdb/formats/event/event_index.rb +141 -0
- data/lib/sportdb/formats/event/event_reader.rb +183 -0
- data/lib/sportdb/formats/league/league_index.rb +22 -18
- data/lib/sportdb/formats/league/league_outline_reader.rb +45 -13
- data/lib/sportdb/formats/league/league_reader.rb +7 -1
- data/lib/sportdb/formats/match/match_parser.rb +101 -111
- data/lib/sportdb/formats/package.rb +59 -11
- data/lib/sportdb/formats/parser_helper.rb +11 -2
- data/lib/sportdb/formats/team/club_index.rb +13 -11
- data/lib/sportdb/formats/team/club_index_history.rb +134 -0
- data/lib/sportdb/formats/team/club_reader_history.rb +203 -0
- data/lib/sportdb/formats/team/club_reader_props.rb +20 -5
- data/lib/sportdb/formats/version.rb +2 -2
- data/test/helper.rb +51 -81
- data/test/test_club_index_history.rb +107 -0
- data/test/test_club_reader_history.rb +212 -0
- data/test/test_datafile_package.rb +1 -1
- data/test/test_regex.rb +25 -7
- metadata +9 -78
- data/lib/sportdb/formats/config.rb +0 -40
- data/lib/sportdb/formats/match/match_parser_csv.rb +0 -314
- data/lib/sportdb/formats/name_helper.rb +0 -84
- data/lib/sportdb/formats/score/score_formats.rb +0 -220
- data/lib/sportdb/formats/score/score_parser.rb +0 -202
- data/lib/sportdb/formats/season_utils.rb +0 -27
- data/lib/sportdb/formats/structs/country.rb +0 -31
- data/lib/sportdb/formats/structs/group.rb +0 -18
- data/lib/sportdb/formats/structs/league.rb +0 -37
- data/lib/sportdb/formats/structs/match.rb +0 -151
- data/lib/sportdb/formats/structs/matchlist.rb +0 -220
- data/lib/sportdb/formats/structs/round.rb +0 -25
- data/lib/sportdb/formats/structs/season.rb +0 -123
- data/lib/sportdb/formats/structs/standings.rb +0 -247
- data/lib/sportdb/formats/structs/team.rb +0 -150
- data/lib/sportdb/formats/structs/team_usage.rb +0 -88
- data/test/test_clubs.rb +0 -40
- data/test/test_conf.rb +0 -65
- data/test/test_csv_match_parser.rb +0 -114
- data/test/test_csv_match_parser_utils.rb +0 -20
- data/test/test_csv_reader.rb +0 -31
- data/test/test_match.rb +0 -30
- data/test/test_match_auto.rb +0 -72
- data/test/test_match_auto_champs.rb +0 -45
- data/test/test_match_auto_euro.rb +0 -37
- data/test/test_match_auto_worldcup.rb +0 -61
- data/test/test_match_champs.rb +0 -27
- data/test/test_match_eng.rb +0 -26
- data/test/test_match_euro.rb +0 -27
- data/test/test_match_worldcup.rb +0 -27
- data/test/test_name_helper.rb +0 -67
- data/test/test_scores.rb +0 -122
- data/test/test_season.rb +0 -62
| @@ -13,12 +13,22 @@ module SportDb | |
| 13 13 | 
             
                ## leagues.txt or leagues_en.txt
         | 
| 14 14 | 
             
                ##   remove support for en.leagues.txt - why? why not?
         | 
| 15 15 | 
             
                LEAGUES_RE = %r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
         | 
| 16 | 
            -
                   (?: [a-z]{1,4}\. )?   # optional country code/key e.g. eng. | 
| 16 | 
            +
                   (?: [a-z]{1,4}\. )?   # optional country code/key e.g. eng.leagues.txt
         | 
| 17 17 | 
             
                    leagues
         | 
| 18 18 | 
             
                      (?:_[a-z0-9_-]+)?
         | 
| 19 19 | 
             
                    \.txt$
         | 
| 20 20 | 
             
                }x
         | 
| 21 21 |  | 
| 22 | 
            +
                ## seasons.txt or seasons_en.txt
         | 
| 23 | 
            +
                ##   remove support for br.seasons.txt - why? why not?
         | 
| 24 | 
            +
                SEASONS_RE = %r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
         | 
| 25 | 
            +
                   (?: [a-z]{1,4}\. )?   # optional country code/key e.g. eng.seasons.txt
         | 
| 26 | 
            +
                    seasons
         | 
| 27 | 
            +
                      (?:_[a-z0-9_-]+)?
         | 
| 28 | 
            +
                    \.txt$
         | 
| 29 | 
            +
                }x
         | 
| 30 | 
            +
             | 
| 31 | 
            +
             | 
| 22 32 | 
             
                ## clubs.txt or clubs_en.txt
         | 
| 23 33 | 
             
                ##   remove support for en.clubs.txt - why? why not?
         | 
| 24 34 | 
             
                CLUBS_RE = %r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
         | 
| @@ -35,12 +45,22 @@ module SportDb | |
| 35 45 | 
             
                   \.wiki\.txt$
         | 
| 36 46 | 
             
                }x
         | 
| 37 47 |  | 
| 38 | 
            -
                 | 
| 48 | 
            +
                ## todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not?
         | 
| 49 | 
            +
                CLUBS_PROPS_RE = %r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
         | 
| 39 50 | 
             
                  (?: [a-z]{1,4}\. )?   # optional country code/key e.g. eng.clubs.props.txt
         | 
| 40 51 | 
             
                    clubs
         | 
| 41 52 | 
             
                      (?:_[a-z0-9_-]+)?
         | 
| 42 53 | 
             
                    \.props\.txt$
         | 
| 43 54 | 
             
                }x
         | 
| 55 | 
            +
                CLUB_PROPS_RE = CLUBS_PROPS_RE  ## add alias for now (fix later - why? why not?)
         | 
| 56 | 
            +
             | 
| 57 | 
            +
             | 
| 58 | 
            +
                CLUBS_HISTORY_RE = %r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
         | 
| 59 | 
            +
                  (?: [a-z]{1,4}\. )?   # optional country code/key e.g. eng.clubs.history.txt
         | 
| 60 | 
            +
                    clubs
         | 
| 61 | 
            +
                      (?:_[a-z0-9_-]+)?
         | 
| 62 | 
            +
                    \.history\.txt$
         | 
| 63 | 
            +
                }x
         | 
| 44 64 |  | 
| 45 65 | 
             
                ##  teams.txt or teams_history.txt
         | 
| 46 66 | 
             
                TEAMS_RE = %r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
         | 
| @@ -49,6 +69,8 @@ module SportDb | |
| 49 69 | 
             
                                  \.txt$
         | 
| 50 70 | 
             
                }x
         | 
| 51 71 |  | 
| 72 | 
            +
             | 
| 73 | 
            +
                ###  todo/fix: change SEASON_RE to SEASON_KEY_RE (avoid confusion w/ SEASONS_RE for datafile?) - why? why not? !!!!!!!
         | 
| 52 74 | 
             
                ### season folder:
         | 
| 53 75 | 
             
                ##            e.g. /2019-20   or
         | 
| 54 76 | 
             
                ##  year-only e.g. /2019      or
         | 
| @@ -73,6 +95,10 @@ module SportDb | |
| 73 95 | 
             
                                   /[a-z0-9_.-]+\.csv$  ## note: allow dot (.) too e.g /eng.1.csv
         | 
| 74 96 | 
             
                                }x
         | 
| 75 97 |  | 
| 98 | 
            +
                ### add "generic" pattern to find all csv datafiles
         | 
| 99 | 
            +
                CSV_RE = %r{ (?: ^|/ )
         | 
| 100 | 
            +
                               [a-z0-9_.-]+\.csv$  ## note: allow dot (.) too e.g /eng.1.csv
         | 
| 101 | 
            +
                           }x
         | 
| 76 102 |  | 
| 77 103 |  | 
| 78 104 | 
             
                ## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
         | 
| @@ -96,16 +122,22 @@ module SportDb | |
| 96 122 | 
             
               def self.find_teams( path, pattern: TEAMS_RE )  find( path, pattern ); end
         | 
| 97 123 | 
             
               def self.match_teams( path )  TEAMS_RE.match( path ); end
         | 
| 98 124 |  | 
| 99 | 
            -
               def self.find_clubs( path, pattern: CLUBS_RE ) | 
| 100 | 
            -
               def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) | 
| 125 | 
            +
               def self.find_clubs( path, pattern: CLUBS_RE )                 find( path, pattern ); end
         | 
| 126 | 
            +
               def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE )       find( path, pattern ); end
         | 
| 127 | 
            +
               def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end
         | 
| 101 128 |  | 
| 102 | 
            -
               def self.match_clubs( path ) | 
| 103 | 
            -
               def self.match_clubs_wiki( path ) | 
| 104 | 
            -
               def self. | 
| 129 | 
            +
               def self.match_clubs( path )         CLUBS_RE.match( path ); end
         | 
| 130 | 
            +
               def self.match_clubs_wiki( path )    CLUBS_WIKI_RE.match( path ); end
         | 
| 131 | 
            +
               def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end
         | 
| 132 | 
            +
               def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end
         | 
| 105 133 |  | 
| 106 134 | 
             
               def self.find_leagues( path, pattern: LEAGUES_RE )  find( path, pattern ); end
         | 
| 107 135 | 
             
               def self.match_leagues( path )  LEAGUES_RE.match( path ); end
         | 
| 108 136 |  | 
| 137 | 
            +
               def self.find_seasons( path, pattern: SEASONS_RE )  find( path, pattern ); end
         | 
| 138 | 
            +
               def self.match_seasons( path )  SEASONS_RE.match( path ); end
         | 
| 139 | 
            +
             | 
| 140 | 
            +
             | 
| 109 141 | 
             
               def self.find_conf( path, pattern: CONF_RE )  find( path, pattern ); end
         | 
| 110 142 | 
             
               def self.match_conf( path )  CONF_RE.match( path ); end
         | 
| 111 143 |  | 
| @@ -118,6 +150,7 @@ module SportDb | |
| 118 150 | 
             
               end
         | 
| 119 151 | 
             
               ## add match_match and match_match_csv  - why? why not?
         | 
| 120 152 |  | 
| 153 | 
            +
             | 
| 121 154 | 
             
               class << self
         | 
| 122 155 | 
             
                 alias_method :match_teams?, :match_teams
         | 
| 123 156 | 
             
                 alias_method :teams?,       :match_teams
         | 
| @@ -128,12 +161,21 @@ module SportDb | |
| 128 161 | 
             
                 alias_method :match_clubs_wiki?, :match_clubs_wiki
         | 
| 129 162 | 
             
                 alias_method :clubs_wiki?,       :match_clubs_wiki
         | 
| 130 163 |  | 
| 131 | 
            -
                 alias_method : | 
| 132 | 
            -
                 alias_method : | 
| 164 | 
            +
                 alias_method :match_clubs_history?, :match_clubs_history
         | 
| 165 | 
            +
                 alias_method :clubs_history?,       :match_clubs_history
         | 
| 166 | 
            +
             | 
| 167 | 
            +
                 alias_method :match_club_props,      :match_clubs_props
         | 
| 168 | 
            +
                 alias_method :match_club_props?,     :match_clubs_props
         | 
| 169 | 
            +
                 alias_method :club_props?,           :match_clubs_props
         | 
| 170 | 
            +
                 alias_method :match_clubs_props?,    :match_clubs_props
         | 
| 171 | 
            +
                 alias_method :clubs_props?,          :match_clubs_props
         | 
| 133 172 |  | 
| 134 173 | 
             
                 alias_method :match_leagues?, :match_leagues
         | 
| 135 174 | 
             
                 alias_method :leagues?,       :match_leagues
         | 
| 136 175 |  | 
| 176 | 
            +
                 alias_method :match_seasons?, :match_seasons
         | 
| 177 | 
            +
                 alias_method :seasons?,       :match_seasons
         | 
| 178 | 
            +
             | 
| 137 179 | 
             
                 alias_method :match_conf?, :match_conf
         | 
| 138 180 | 
             
                 alias_method :conf?,       :match_conf
         | 
| 139 181 | 
             
               end
         | 
| @@ -212,11 +254,17 @@ module SportDb | |
| 212 254 | 
             
                  end
         | 
| 213 255 | 
             
                end
         | 
| 214 256 | 
             
                def each_match_csv( &blk )  each( pattern: MATCH_CSV_RE, &blk ); end
         | 
| 257 | 
            +
                def each_csv( &blk )        each( pattern: CSV_RE, &blk );       end
         | 
| 258 | 
            +
             | 
| 215 259 | 
             
                def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
         | 
| 216 260 |  | 
| 217 261 | 
             
                def each_leagues( &blk )    each( pattern: LEAGUES_RE, &blk ); end
         | 
| 218 262 | 
             
                def each_clubs( &blk )      each( pattern: CLUBS_RE, &blk ); end
         | 
| 219 263 | 
             
                def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
         | 
| 264 | 
            +
                def each_clubs_history( &blk )  each( pattern: CLUBS_HISTORY_RE, &blk ); end
         | 
| 265 | 
            +
             | 
| 266 | 
            +
                def each_seasons( &blk )    each( pattern: SEASONS_RE, &blk ); end
         | 
| 267 | 
            +
             | 
| 220 268 |  | 
| 221 269 | 
             
                ## return all match datafile entries
         | 
| 222 270 | 
             
                def match( format: 'txt' )
         | 
| @@ -287,13 +335,13 @@ module SportDb | |
| 287 335 | 
             
                  ##             filter.skip? filter.include? ( season_sason_key )?
         | 
| 288 336 | 
             
                  ##             fiteer.before?( season_key )  etc.
         | 
| 289 337 | 
             
                  ##              find some good method names!!!!
         | 
| 290 | 
            -
                  season_start = start ?  | 
| 338 | 
            +
                  season_start = start ? Season( start ) : nil
         | 
| 291 339 |  | 
| 292 340 | 
             
                  h = {}
         | 
| 293 341 | 
             
                  match( format: format ).each do |entry|
         | 
| 294 342 | 
             
                    ## note: assume last directory in datafile path is the season part/key
         | 
| 295 343 | 
             
                    season_q = File.basename( File.dirname( entry.name ))
         | 
| 296 | 
            -
                    season   =  | 
| 344 | 
            +
                    season   = Season.parse( season_q )  ## normalize season
         | 
| 297 345 |  | 
| 298 346 | 
             
                    ## skip if start season before this season
         | 
| 299 347 | 
             
                    next if season_start && season_start.start_year > season.start_year
         | 
| @@ -18,10 +18,19 @@ module SportDb | |
| 18 18 |  | 
| 19 19 |  | 
| 20 20 | 
             
              def is_round?( line )
         | 
| 21 | 
            -
                ## note: =~  | 
| 22 | 
            -
             | 
| 21 | 
            +
                ## note: =~ returns nil if not match found, and 0,1, etc for match
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                ##  note: allow "free standing" leg 1 and leg 2 too
         | 
| 24 | 
            +
                ##         (e.g. Hinspiel, Rückspiel etc. used for now in Relegation, for example)
         | 
| 25 | 
            +
                ##    note ONLY allowed if "free standing", that is, full line with nothing else
         | 
| 26 | 
            +
                ##          use "custom" regex for special case for now
         | 
| 27 | 
            +
                ##               avoids match HIN in PascHINg, for example (hin in german for leg 1)
         | 
| 28 | 
            +
                line =~ SportDb.lang.regex_round    ||
         | 
| 29 | 
            +
                line =~ /^(#{SportDb.lang.leg1})$/i ||
         | 
| 30 | 
            +
                line =~ /^(#{SportDb.lang.leg2})$/i
         | 
| 23 31 | 
             
              end
         | 
| 24 32 |  | 
| 33 | 
            +
             | 
| 25 34 | 
             
              def is_knockout_round?( line )
         | 
| 26 35 |  | 
| 27 36 | 
             
                ## todo: check for adding ignore case for regex (e.g. 1st leg/1st Leg)
         | 
| @@ -54,6 +54,7 @@ class ClubIndex | |
| 54 54 | 
             
              ##       normalize( name )
         | 
| 55 55 |  | 
| 56 56 | 
             
              def strip_wiki( name )     # todo/check: rename to strip_wikipedia_en - why? why not?
         | 
| 57 | 
            +
                ##  change/rename to strip_wiki_qualifier or such - why? why not?
         | 
| 57 58 | 
             
                ## note: strip disambiguationn qualifier from wikipedia page name if present
         | 
| 58 59 | 
             
                ##        note: only remove year and foot... for now
         | 
| 59 60 | 
             
                ## e.g. FC Wacker Innsbruck (2002) => FC Wacker Innsbruck
         | 
| @@ -178,22 +179,24 @@ class ClubIndex | |
| 178 179 | 
             
              ## todo/fix/check: use rename to find_canon  or find_canonical() or something??
         | 
| 179 180 | 
             
              ##  remove (getting used?) - why? why not?
         | 
| 180 181 | 
             
              def []( name )    ## lookup by canoncial name only;  todo/fix: add find alias why? why not?
         | 
| 182 | 
            +
                puts "WARN!! do not use ClubIndex#[] for lookup >#{name}< - will get removed!!!"
         | 
| 181 183 | 
             
                @clubs[ name ]
         | 
| 182 184 | 
             
              end
         | 
| 183 185 |  | 
| 184 186 |  | 
| 185 | 
            -
              ## todo/fix/check: return empty array if no match!!!
         | 
| 186 | 
            -
              ##     and NOT nil (add || []) - why? why not?
         | 
| 187 187 | 
             
              def match( name )
         | 
| 188 | 
            +
                # note: returns empty array (e.g. []) if no match and NOT nil
         | 
| 188 189 | 
             
                name = normalize( name )
         | 
| 189 | 
            -
                m = @clubs_by_name[ name ]
         | 
| 190 | 
            +
                m = @clubs_by_name[ name ] || []
         | 
| 190 191 |  | 
| 191 192 | 
             
                ## no match - retry with unaccented variant if different
         | 
| 192 193 | 
             
                ##    e.g. example is Preussen Münster  (with mixed accent and unaccented letters) that would go unmatched for now
         | 
| 193 194 | 
             
                ##      Preussen Münster => preussenmünster (norm) => preussenmunster (norm+unaccent)
         | 
| 194 | 
            -
                if m. | 
| 195 | 
            +
                if m.empty?
         | 
| 195 196 | 
             
                  name2 = unaccent( name )
         | 
| 196 | 
            -
                   | 
| 197 | 
            +
                  if name2 != name
         | 
| 198 | 
            +
                    m = @clubs_by_name[ name2 ] || []
         | 
| 199 | 
            +
                  end
         | 
| 197 200 | 
             
                end
         | 
| 198 201 | 
             
                m
         | 
| 199 202 | 
             
              end
         | 
| @@ -227,10 +230,8 @@ class ClubIndex | |
| 227 230 | 
             
                  country = country( country )
         | 
| 228 231 |  | 
| 229 232 | 
             
                  ## note: match must for now always  include name
         | 
| 230 | 
            -
                   | 
| 231 | 
            -
             | 
| 232 | 
            -
                    m = nil   if m.empty?     ## note: reset to nil if no more matches
         | 
| 233 | 
            -
                  end
         | 
| 233 | 
            +
                  ## filter by country
         | 
| 234 | 
            +
                  m = m.select { |club| club.country.key == country.key }
         | 
| 234 235 | 
             
                end
         | 
| 235 236 | 
             
                m
         | 
| 236 237 | 
             
              end
         | 
| @@ -263,7 +264,7 @@ class ClubIndex | |
| 263 264 |  | 
| 264 265 | 
             
                  m = match_by( name: name, country: country )
         | 
| 265 266 |  | 
| 266 | 
            -
                  if m. | 
| 267 | 
            +
                  if m.empty?
         | 
| 267 268 | 
             
                    ## (re)try with second country - quick hacks for known leagues
         | 
| 268 269 | 
             
                    ##  todo/fix: add league flag to activate!!!  - why? why not
         | 
| 269 270 | 
             
                    m = match_by( name: name, country: 'wal' )  if country.key == 'eng'
         | 
| @@ -272,6 +273,7 @@ class ClubIndex | |
| 272 273 | 
             
                    m = match_by( name: name, country: 'mc' )   if country.key == 'fr'
         | 
| 273 274 | 
             
                    m = match_by( name: name, country: 'li' )   if country.key == 'ch'
         | 
| 274 275 | 
             
                    m = match_by( name: name, country: 'ca' )   if country.key == 'us'
         | 
| 276 | 
            +
                    m = match_by( name: name, country: 'nz' )   if country.key == 'au'
         | 
| 275 277 | 
             
                  end
         | 
| 276 278 | 
             
                else  ## try "global" search - no country passed in
         | 
| 277 279 | 
             
                  m = match( name )
         | 
| @@ -279,7 +281,7 @@ class ClubIndex | |
| 279 281 |  | 
| 280 282 |  | 
| 281 283 | 
             
                club = nil
         | 
| 282 | 
            -
                if m. | 
| 284 | 
            +
                if m.empty?
         | 
| 283 285 | 
             
                  ## puts "** !!! WARN !!! no match for club >#{name}<"
         | 
| 284 286 | 
             
                elsif m.size > 1
         | 
| 285 287 | 
             
                  puts "** !!! ERROR - too many matches (#{m.size}) for club >#{name}<:"
         | 
| @@ -0,0 +1,134 @@ | |
| 1 | 
            +
            # encoding: utf-8
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module SportDb
         | 
| 4 | 
            +
              module Import
         | 
| 5 | 
            +
             | 
| 6 | 
            +
             | 
| 7 | 
            +
            class ClubHistoryIndex
         | 
| 8 | 
            +
             | 
| 9 | 
            +
              def self.build( path )
         | 
| 10 | 
            +
                pack = Package.new( path )   ## lets us use direcotry or zip archive
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                recs = []
         | 
| 13 | 
            +
                pack.each_clubs_history do |entry|
         | 
| 14 | 
            +
                  recs += ClubHistoryReader.parse( entry.read )
         | 
| 15 | 
            +
                end
         | 
| 16 | 
            +
                recs
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                index = new
         | 
| 19 | 
            +
                index.add( recs )
         | 
| 20 | 
            +
                index
         | 
| 21 | 
            +
              end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
             | 
| 24 | 
            +
             | 
| 25 | 
            +
              def catalog() Import.catalog; end
         | 
| 26 | 
            +
             | 
| 27 | 
            +
              ## note: keep name history for now separate from
         | 
| 28 | 
            +
              ##          from club struct - why? why not?
         | 
| 29 | 
            +
              ##       later yes, yes, yes, merge name history into club struct!!!!!
         | 
| 30 | 
            +
              ##
         | 
| 31 | 
            +
              ## for now the name history is experimental
         | 
| 32 | 
            +
             | 
| 33 | 
            +
             | 
| 34 | 
            +
              def initialize
         | 
| 35 | 
            +
                @clubs          = {}   ## clubs (indexed) by canonical name
         | 
| 36 | 
            +
                @errors         = []
         | 
| 37 | 
            +
              end
         | 
| 38 | 
            +
             | 
| 39 | 
            +
              attr_reader :errors
         | 
| 40 | 
            +
              def errors?() @errors.empty? == false; end
         | 
| 41 | 
            +
             | 
| 42 | 
            +
              def mappings() @clubs; end   ## todo/check: rename to records or histories or something - why? why not?
         | 
| 43 | 
            +
             | 
| 44 | 
            +
             | 
| 45 | 
            +
              def add_history( club_rec, keyword, season, args )
         | 
| 46 | 
            +
                ## note use season obj for now (and NOT key) - why? why not?
         | 
| 47 | 
            +
                rec = @clubs[ club_rec.name ] ||= []
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                rec << [season, [keyword, args]]
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                ## note: always keep records sorted by season_key for now
         | 
| 52 | 
            +
                ##   check if 2010 and 2010/11 is in order using alpha sort?? (see argentina)
         | 
| 53 | 
            +
                rec.sort! { |l,r| r[0] <=> l[0] }
         | 
| 54 | 
            +
              end
         | 
| 55 | 
            +
             | 
| 56 | 
            +
             | 
| 57 | 
            +
              def add( rec_or_recs )   ## add club record / alt_names
         | 
| 58 | 
            +
                recs = rec_or_recs.is_a?( Array ) ? rec_or_recs : [rec_or_recs]      ## wrap (single) rec in array
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                recs.each do |rec|
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                  keyword    = rec[0]
         | 
| 63 | 
            +
                  season_key = rec[1]
         | 
| 64 | 
            +
                  args       = rec[2..-1]   ## get rest of args e.g. one, two or more
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                  ## note: for now only add (re)name history season records,
         | 
| 67 | 
            +
                  ##          that is, skip MERGE and BANKRUPT for now
         | 
| 68 | 
            +
                  ##           and incl. only RENAME, REFORM, MOVE for now
         | 
| 69 | 
            +
                  next if ['MERGE', 'BANKRUPT'].include?( keyword )
         | 
| 70 | 
            +
             | 
| 71 | 
            +
             | 
| 72 | 
            +
                  name_old = strip_geo( args[0][0] )  ## note: strip optional geo part from name
         | 
| 73 | 
            +
                  name_new = strip_geo( args[1][0] )
         | 
| 74 | 
            +
             | 
| 75 | 
            +
                  country_old = args[0][1]
         | 
| 76 | 
            +
                  country_new = args[1][1]
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                  club_old = catalog.clubs.find_by!( name: name_old, country: country_old )
         | 
| 79 | 
            +
                  club_new = catalog.clubs.find_by!( name: name_new, country: country_new )
         | 
| 80 | 
            +
             | 
| 81 | 
            +
                  ## note use season obj for now (and NOT key) - why? why not?
         | 
| 82 | 
            +
                  season = Season.parse( season_key )
         | 
| 83 | 
            +
             | 
| 84 | 
            +
                  ## todo/check:
         | 
| 85 | 
            +
                  ##   check if  club_old and club_new reference different club record!!
         | 
| 86 | 
            +
                  ##    examples - RB II            -> Liefering ??  or
         | 
| 87 | 
            +
                  ##               FC Pasching      -> OOE Juniors ??
         | 
| 88 | 
            +
                  ##               Austria Salzburg -> RB Salburg ??
         | 
| 89 | 
            +
                  ##   for now always add name history to both - why? why not?
         | 
| 90 | 
            +
             | 
| 91 | 
            +
                  add_history( club_old, keyword, season, args )
         | 
| 92 | 
            +
                  ## note: allow for now different club references
         | 
| 93 | 
            +
                  ##    but maybe warn later - why? why not?
         | 
| 94 | 
            +
                  ## add history to both for now
         | 
| 95 | 
            +
                  add_history( club_new, keyword, season, args )  if club_old != club_new
         | 
| 96 | 
            +
                end # each rec
         | 
| 97 | 
            +
              end # method add
         | 
| 98 | 
            +
             | 
| 99 | 
            +
             | 
| 100 | 
            +
              #### todo/check: move as method to club struct later - to always use club reference
         | 
| 101 | 
            +
              ##  returns (simply) name as string for now or nil - why? why not?
         | 
| 102 | 
            +
              #
         | 
| 103 | 
            +
              #  history entry example
         | 
| 104 | 
            +
              # Arsenal FC"=>
         | 
| 105 | 
            +
              # [[1927/28, ["RENAME", [["The Arsenal FC, London", "eng"], ["Arsenal FC", "eng"]]]],
         | 
| 106 | 
            +
              #  [1914/15, ["RENAME", [["Woolwich Arsenal FC, London", "eng"], ["The Arsenal FC", "eng"]]]],
         | 
| 107 | 
            +
              #  [1892/93, ["RENAME", [["Royal Arsenal FC, London", "eng"], ["Woolwich Arsenal FC", "eng"]]]]],
         | 
| 108 | 
            +
              def find_name_by( name:, season: )
         | 
| 109 | 
            +
                recs = @clubs[ name ]
         | 
| 110 | 
            +
                if recs
         | 
| 111 | 
            +
                  season = Season( season )   ## make sure season is a season obj (and NOT a string)
         | 
| 112 | 
            +
                  ## check season records for name; use linear search (assume only few records)
         | 
| 113 | 
            +
                  recs.each do |rec|
         | 
| 114 | 
            +
                    if season >= rec[0]
         | 
| 115 | 
            +
                       return strip_geo( rec[1][1][1][0] )  # use second arg
         | 
| 116 | 
            +
                    end
         | 
| 117 | 
            +
                  end
         | 
| 118 | 
            +
                  ## if we get here use last name
         | 
| 119 | 
            +
                  strip_geo( recs[-1][1][1][0][0] )   # use first arg
         | 
| 120 | 
            +
                else
         | 
| 121 | 
            +
                  nil
         | 
| 122 | 
            +
                end
         | 
| 123 | 
            +
              end
         | 
| 124 | 
            +
             | 
| 125 | 
            +
              ##################
         | 
| 126 | 
            +
              ## helpers
         | 
| 127 | 
            +
              def strip_geo( name )
         | 
| 128 | 
            +
                ## e.g. Arsenal, London   =>   Arsenal
         | 
| 129 | 
            +
                name.split(',')[0].strip
         | 
| 130 | 
            +
              end
         | 
| 131 | 
            +
            end # class ClubHistoryIndex
         | 
| 132 | 
            +
             | 
| 133 | 
            +
            end   # module Import
         | 
| 134 | 
            +
            end   # module SportDb
         | 
| @@ -0,0 +1,203 @@ | |
| 1 | 
            +
            # encoding: utf-8
         | 
| 2 | 
            +
             | 
| 3 | 
            +
             | 
| 4 | 
            +
            module SportDb
         | 
| 5 | 
            +
            module Import
         | 
| 6 | 
            +
             | 
| 7 | 
            +
             | 
| 8 | 
            +
            class ClubHistoryReader
         | 
| 9 | 
            +
             | 
| 10 | 
            +
              def catalog() Import.catalog; end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
             | 
| 13 | 
            +
             | 
| 14 | 
            +
            def self.read( path )   ## use - rename to read_file or from_file etc. - why? why not?
         | 
| 15 | 
            +
              txt = File.open( path, 'r:utf-8' ) { |f| f.read }
         | 
| 16 | 
            +
              parse( txt )
         | 
| 17 | 
            +
            end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            def self.parse( txt )
         | 
| 20 | 
            +
              new( txt ).parse
         | 
| 21 | 
            +
            end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
            def initialize( txt )
         | 
| 24 | 
            +
              @txt = txt
         | 
| 25 | 
            +
            end
         | 
| 26 | 
            +
             | 
| 27 | 
            +
             | 
| 28 | 
            +
            ###
         | 
| 29 | 
            +
            ## RENAME/RENAMED
         | 
| 30 | 
            +
            ## MOVE/MOVED
         | 
| 31 | 
            +
            ## BANKRUPT/BANKRUPTED
         | 
| 32 | 
            +
            ## REFORM/REFORMED
         | 
| 33 | 
            +
            ## MERGE/MERGED    - allow + or ++ or +++ or ; for "inline" - why? why not?
         | 
| 34 | 
            +
             | 
| 35 | 
            +
             | 
| 36 | 
            +
            KEYWORD_LINE_RE = %r{ ^(?<keyword>RENAMED?|
         | 
| 37 | 
            +
                                           MOVED?|
         | 
| 38 | 
            +
                                           BANKRUPT(?:ED)?|
         | 
| 39 | 
            +
                                           REFORM(?:ED)?|
         | 
| 40 | 
            +
                                           MERGED?
         | 
| 41 | 
            +
                                )
         | 
| 42 | 
            +
                                  [ ]+
         | 
| 43 | 
            +
                                 (?<text>.*)    # rest of text
         | 
| 44 | 
            +
                                $
         | 
| 45 | 
            +
                              }x
         | 
| 46 | 
            +
             | 
| 47 | 
            +
             | 
| 48 | 
            +
            def parse
         | 
| 49 | 
            +
              recs = []
         | 
| 50 | 
            +
              last_rec  = nil
         | 
| 51 | 
            +
             | 
| 52 | 
            +
              last_country = nil
         | 
| 53 | 
            +
              last_season  = nil
         | 
| 54 | 
            +
              last_keyword = nil
         | 
| 55 | 
            +
              last_teams   = []
         | 
| 56 | 
            +
             | 
| 57 | 
            +
              OutlineReader.parse( @txt ).each do |node|
         | 
| 58 | 
            +
                if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
         | 
| 59 | 
            +
                  heading_level  = node[0][1].to_i
         | 
| 60 | 
            +
                  heading        = node[1]
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                  puts "heading #{heading_level} >#{heading}<"
         | 
| 63 | 
            +
             | 
| 64 | 
            +
             | 
| 65 | 
            +
                    if heading_level == 1
         | 
| 66 | 
            +
                        ## assume country in heading; allow all "formats" supported by parse e.g.
         | 
| 67 | 
            +
                        ##   Österreich • Austria (at)
         | 
| 68 | 
            +
                        ##   Österreich • Austria
         | 
| 69 | 
            +
                        ##   Austria
         | 
| 70 | 
            +
                        ##   Deutschland (de) • Germany
         | 
| 71 | 
            +
                        country = catalog.countries.parse( heading )
         | 
| 72 | 
            +
                        ## check country code - MUST exist for now!!!!
         | 
| 73 | 
            +
                        if country.nil?
         | 
| 74 | 
            +
                          puts "!!! error [club history reader] - unknown country >#{heading}< - sorry - add country to config to fix"
         | 
| 75 | 
            +
                          exit 1
         | 
| 76 | 
            +
                        end
         | 
| 77 | 
            +
                        puts "  country >#{heading}< => #{country.name}, #{country.key}"
         | 
| 78 | 
            +
                        last_country = country
         | 
| 79 | 
            +
                        last_season  = nil  ## reset "lower levels" - season & keyword
         | 
| 80 | 
            +
                        last_keyword = nil
         | 
| 81 | 
            +
                     elsif heading_level == 2
         | 
| 82 | 
            +
                        ## assume season
         | 
| 83 | 
            +
                        season = Season.parse( heading )
         | 
| 84 | 
            +
                        puts "  season >#{heading}< => #{season.key}"
         | 
| 85 | 
            +
                        last_season  = season  ## reset "lowwer levels" - keyword
         | 
| 86 | 
            +
                        last_keyword = nil
         | 
| 87 | 
            +
                     else
         | 
| 88 | 
            +
                        puts "!!! ERROR [club history reader] - for now only heading 1 & 2 supported; sorry"
         | 
| 89 | 
            +
                        exit 1
         | 
| 90 | 
            +
                     end
         | 
| 91 | 
            +
             | 
| 92 | 
            +
                elsif node[0] == :p   ## paragraph with (text) lines
         | 
| 93 | 
            +
                  if last_country.nil?
         | 
| 94 | 
            +
                    puts "!!! ERROR [club history reader] - country heading 1 required, sorry"
         | 
| 95 | 
            +
                    exit 1
         | 
| 96 | 
            +
                  end
         | 
| 97 | 
            +
                  if last_season.nil?
         | 
| 98 | 
            +
                    puts "!!! ERROR [club history reader] - season heading 2 required, sorry"
         | 
| 99 | 
            +
                    exit 1
         | 
| 100 | 
            +
                  end
         | 
| 101 | 
            +
             | 
| 102 | 
            +
                  lines = node[1]
         | 
| 103 | 
            +
                  lines.each do |line|
         | 
| 104 | 
            +
                    if m=line.match(KEYWORD_LINE_RE)   ## extract keyword and continue
         | 
| 105 | 
            +
                      keyword = m[:keyword]
         | 
| 106 | 
            +
                      line    = m[:text].strip
         | 
| 107 | 
            +
             | 
| 108 | 
            +
                      puts "    keyword #{keyword}"
         | 
| 109 | 
            +
                      last_keyword = case keyword   ## "normalize" keywords
         | 
| 110 | 
            +
                                     when 'BANKRUPT', 'BANKRUPTED'
         | 
| 111 | 
            +
                                       'BANKRUPT'
         | 
| 112 | 
            +
                                     when 'RENAME', 'RENAMED'
         | 
| 113 | 
            +
                                       'RENAME'
         | 
| 114 | 
            +
                                     when 'REFORM', 'REFORMED'
         | 
| 115 | 
            +
                                       'REFORM'
         | 
| 116 | 
            +
                                     when 'MOVE',   'MOVED'
         | 
| 117 | 
            +
                                       'MOVE'
         | 
| 118 | 
            +
                                     when 'MERGE',  'MERGED'
         | 
| 119 | 
            +
                                       'MERGE'
         | 
| 120 | 
            +
                                     else
         | 
| 121 | 
            +
                                       puts "!!! ERROR [club history reader] - unexpected keyword >#{keyword}<; sorry - don't know how to normalize"
         | 
| 122 | 
            +
                                       exit 1
         | 
| 123 | 
            +
                                     end
         | 
| 124 | 
            +
             | 
| 125 | 
            +
                      last_teams   = []
         | 
| 126 | 
            +
                    end
         | 
| 127 | 
            +
             | 
| 128 | 
            +
                    if last_keyword.nil?
         | 
| 129 | 
            +
                      puts "!!! ERROR [club history reader] - line with keyword expected - got:"
         | 
| 130 | 
            +
                      puts line
         | 
| 131 | 
            +
                      exit 1
         | 
| 132 | 
            +
                    end
         | 
| 133 | 
            +
             | 
| 134 | 
            +
                    if    last_keyword == 'BANKRUPT'
         | 
| 135 | 
            +
                       ## requires / expects one team in one line
         | 
| 136 | 
            +
                       recs << [ last_keyword, last_season.key,
         | 
| 137 | 
            +
                                 [ squish(line), last_country.key ]
         | 
| 138 | 
            +
                               ]
         | 
| 139 | 
            +
                    elsif last_keyword == 'RENAME' ||
         | 
| 140 | 
            +
                          last_keyword == 'REFORM' ||
         | 
| 141 | 
            +
                          last_keyword == 'MOVE'
         | 
| 142 | 
            +
                       ## requires / expects two teams in one line (separated by ⇒ or such)
         | 
| 143 | 
            +
                       teams = line.split( '⇒' )
         | 
| 144 | 
            +
                       if teams.size != 2
         | 
| 145 | 
            +
                        puts "!!! ERROR [club history reader] - expected two teams - got:"
         | 
| 146 | 
            +
                        pp teams
         | 
| 147 | 
            +
                        exit 1
         | 
| 148 | 
            +
                       end
         | 
| 149 | 
            +
                       teams = teams.map {|team| squish(team.strip) }  ## remove whitespaces
         | 
| 150 | 
            +
                       recs << [ last_keyword, last_season.key,
         | 
| 151 | 
            +
                                 [ teams[0], last_country.key ],
         | 
| 152 | 
            +
                                 [ teams[1], last_country.key ]
         | 
| 153 | 
            +
                               ]
         | 
| 154 | 
            +
                    elsif last_keyword == 'MERGE'
         | 
| 155 | 
            +
                        ## check if line starts with separator
         | 
| 156 | 
            +
                        ## otherwise collect to be merged teams
         | 
| 157 | 
            +
                        if line.start_with?( '⇒' )
         | 
| 158 | 
            +
                          if last_teams.size < 2
         | 
| 159 | 
            +
                            puts "!!! ERROR [club history reader] - expected two or more teams for MERGE - got:"
         | 
| 160 | 
            +
                            pp last_teams
         | 
| 161 | 
            +
                            exit 1
         | 
| 162 | 
            +
                          end
         | 
| 163 | 
            +
                          ## auto-add country to all teams
         | 
| 164 | 
            +
                          teams = last_teams.map {|team| [team, last_country.key]}
         | 
| 165 | 
            +
                          recs << [ last_keyword, last_season.key,
         | 
| 166 | 
            +
                                    teams,
         | 
| 167 | 
            +
                                    [ squish(line.sub('⇒','').strip), last_country.key ]
         | 
| 168 | 
            +
                                  ]
         | 
| 169 | 
            +
             | 
| 170 | 
            +
                          last_teams = []
         | 
| 171 | 
            +
                        else
         | 
| 172 | 
            +
                          last_teams << squish(line)
         | 
| 173 | 
            +
                        end
         | 
| 174 | 
            +
                    else
         | 
| 175 | 
            +
                      puts "!!! ERROR [club history reader] - unknown keyword >#{last_keyword}<; cannot process; sorry"
         | 
| 176 | 
            +
                      exit 1
         | 
| 177 | 
            +
                    end
         | 
| 178 | 
            +
                  end  # each line (in paragraph)
         | 
| 179 | 
            +
                else
         | 
| 180 | 
            +
                  puts "** !!! ERROR [club history reader] - unknown line type:"
         | 
| 181 | 
            +
                  pp node
         | 
| 182 | 
            +
                  exit 1
         | 
| 183 | 
            +
                end
         | 
| 184 | 
            +
              end
         | 
| 185 | 
            +
             | 
| 186 | 
            +
              recs
         | 
| 187 | 
            +
            end  # method read
         | 
| 188 | 
            +
             | 
| 189 | 
            +
             | 
| 190 | 
            +
            ###############
         | 
| 191 | 
            +
            ## helper
         | 
| 192 | 
            +
             | 
| 193 | 
            +
            def squish( str )
         | 
| 194 | 
            +
              ## colapse all whitespace to one
         | 
| 195 | 
            +
              str.gsub( /[ ]+/,' ' )
         | 
| 196 | 
            +
            end
         | 
| 197 | 
            +
             | 
| 198 | 
            +
             | 
| 199 | 
            +
            end  # class ClubHistoryReader
         | 
| 200 | 
            +
             | 
| 201 | 
            +
             | 
| 202 | 
            +
            end ## module Import
         | 
| 203 | 
            +
            end ## module SportDb
         |