RubyGems - sportdb-formats - Versions diffs - 1.0.6 → 1.1.4 - Mend

sportdb-formats 1.0.6 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

checksums.yaml +4 -4
data/Manifest.txt +6 -33
data/Rakefile +2 -5
data/lib/sportdb/formats.rb +54 -70
data/lib/sportdb/formats/country/country_index.rb +2 -2
data/lib/sportdb/formats/event/event_index.rb +141 -0
data/lib/sportdb/formats/event/event_reader.rb +183 -0
data/lib/sportdb/formats/league/league_index.rb +22 -18
data/lib/sportdb/formats/league/league_outline_reader.rb +45 -13
data/lib/sportdb/formats/league/league_reader.rb +7 -1
data/lib/sportdb/formats/match/match_parser.rb +101 -111
data/lib/sportdb/formats/package.rb +59 -11
data/lib/sportdb/formats/parser_helper.rb +11 -2
data/lib/sportdb/formats/team/club_index.rb +13 -11
data/lib/sportdb/formats/team/club_index_history.rb +134 -0
data/lib/sportdb/formats/team/club_reader_history.rb +203 -0
data/lib/sportdb/formats/team/club_reader_props.rb +20 -5
data/lib/sportdb/formats/version.rb +2 -2
data/test/helper.rb +51 -81
data/test/test_club_index_history.rb +107 -0
data/test/test_club_reader_history.rb +212 -0
data/test/test_datafile_package.rb +1 -1
data/test/test_regex.rb +25 -7
metadata +9 -78
data/lib/sportdb/formats/config.rb +0 -40
data/lib/sportdb/formats/match/match_parser_csv.rb +0 -314
data/lib/sportdb/formats/name_helper.rb +0 -84
data/lib/sportdb/formats/score/score_formats.rb +0 -220
data/lib/sportdb/formats/score/score_parser.rb +0 -202
data/lib/sportdb/formats/season_utils.rb +0 -27
data/lib/sportdb/formats/structs/country.rb +0 -31
data/lib/sportdb/formats/structs/group.rb +0 -18
data/lib/sportdb/formats/structs/league.rb +0 -37
data/lib/sportdb/formats/structs/match.rb +0 -151
data/lib/sportdb/formats/structs/matchlist.rb +0 -220
data/lib/sportdb/formats/structs/round.rb +0 -25
data/lib/sportdb/formats/structs/season.rb +0 -123
data/lib/sportdb/formats/structs/standings.rb +0 -247
data/lib/sportdb/formats/structs/team.rb +0 -150
data/lib/sportdb/formats/structs/team_usage.rb +0 -88
data/test/test_clubs.rb +0 -40
data/test/test_conf.rb +0 -65
data/test/test_csv_match_parser.rb +0 -114
data/test/test_csv_match_parser_utils.rb +0 -20
data/test/test_csv_reader.rb +0 -31
data/test/test_match.rb +0 -30
data/test/test_match_auto.rb +0 -72
data/test/test_match_auto_champs.rb +0 -45
data/test/test_match_auto_euro.rb +0 -37
data/test/test_match_auto_worldcup.rb +0 -61
data/test/test_match_champs.rb +0 -27
data/test/test_match_eng.rb +0 -26
data/test/test_match_euro.rb +0 -27
data/test/test_match_worldcup.rb +0 -27
data/test/test_name_helper.rb +0 -67
data/test/test_scores.rb +0 -122
data/test/test_season.rb +0 -62

data/lib/sportdb/formats/name_helper.rb DELETED

@@ -1,84 +0,0 @@
-module SportDb
-  module NameHelper
-  ## note: allow placeholder years to e.g. (-___) or (-????)
-  ##    for marking missing (to be filled in) years
-  ##  e.g. (1887-1911), (-2013),
-  ##      (1946-2001, 2013-) etc.
-  ##  todo/check: make more strict  e.g. only accept 4-digit years? - why? why not?
-  YEAR_RE =  %r{\(
-                  [0-9, ?_-]+?     # note: non-greedy (minimum/first) match
-              \)}x
-  def strip_year( name )
-    ## check for year(s) e.g. (1887-1911), (-2013),
-    ##                        (1946-2001, 2013-) etc.
-    ##  todo/check: only sub once (not global) - why? why not?
-    name.gsub( YEAR_RE, '' ).strip
-  end
-  def has_year?( name ) name =~ YEAR_RE; end
-  LANG_RE =  %r{\[
-                [a-z]{1,2}    # note also allow single-letter [a] or [d] or [e] - why? why not?
-                \]}x
-  def strip_lang( name )
-     name.gsub( LANG_RE, '' ).strip
-  end
-  def has_lang?( name ) name =~ LANG_RE; end
-  def sanitize( name )
-    ## check for year(s) e.g. (1887-1911), (-2013),
-    ##                        (1946-2001,2013-) etc.
-    name = strip_year( name )
-    ## check lang codes e.g. [en], [fr], etc.
-    name = strip_lang( name )
-    name
-  end
-  ## note: also add (),’,−  etc. e.g.
-  ##   Estudiantes (LP) => Estudiantes LP
-  ##   Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
-  ##   Myllykosken Pallo −47 => Myllykosken Pallo 47
-  NORM_RE =  %r{
-                    [.'’º/()_−-]
-                  }x   # note: in [] dash (-) if last doesn't need to get escaped
-  ## note: remove all dots (.), dash (-), ', º, /, etc.
-  #   .  U+002E (46) - FULL STOP
-  #   '  U+0027 (39) - APOSTROPHE
-  #   ’  U+2019 (8217) - RIGHT SINGLE QUOTATION MARK
-  #   º  U+00BA (186) - MASCULINE ORDINAL INDICATOR
-  #   /  U+002F (47) - SOLIDUS
-  #   (  U+0028 (40) - LEFT PARENTHESIS
-  #   )  U+0029 (41) - RIGHT PARENTHESIS
-  #   −  U+2212 (8722) - MINUS SIGN
-  #   -  U+002D (45) - HYPHEN-MINUS
-  ##         for norm(alizing) names
-  def strip_norm( name )
-    name.gsub( NORM_RE, '' )
-  end
-  def normalize( name )
-    # note: do NOT call sanitize here (keep normalize "atomic" for reuse)
-    name = strip_norm( name )
-    name = name.gsub( ' ', '' )  # note: also remove all spaces!!!
-    ## todo/check: use our own downcase - why? why not?
-    name = downcase_i18n( name )     ## do NOT care about upper and lowercase for now
-    name
-  end
-  def variants( name )  Variant.find( name ); end
-  end  # module NameHelper
-end   # module SportDb

data/lib/sportdb/formats/score/score_formats.rb DELETED

@@ -1,220 +0,0 @@
-module ScoreFormats
-    ## todo/check: use ‹› (unicode chars) to mark optional parts in regex constant name - why? why not?
-    #####
-    #  english helpers (penalty, extra time, ...)
-    P_EN  =  '(?: p | pen\.? | pso )'     # e.g. p, pen, pen., PSO, etc.
-    ET_EN =  '(?: aet | a\.e\.t\.? )'     # note: make last . optional (e.g a.e.t) allowed too
-    ## e.g. 3-4 pen. 2-2 a.e.t. (1-1, 1-1)  or
-    ##      3-4 pen. 2-2 a.e.t. (1-1, )     or
-    ##      3-4 pen. 2-2 a.e.t. (1-1)       or
-    ##               2-2 a.e.t. (1-1, 1-1)  or
-    ##               2-2 a.e.t. (1-1, )     or
-    ##               2-2 a.e.t. (1-1)
-    EN__P_ET_FT_HT__RE = /\b
-               (?:
-                (?<score1p>\d{1,2})
-                   [ ]* - [ ]*          # note: sep in optional block; CANNOT use a reference
-                (?<score2p>\d{1,2})
-                   [ ]* #{P_EN} [ ]*
-                )?            # note: make penalty (P) score optional for now
-               (?<score1et>\d{1,2})
-                   [ ]* - [ ]*
-               (?<score2et>\d{1,2})
-                   [ ]* #{ET_EN} [ ]*
-                   \(
-                   [ ]*
-              (?<score1>\d{1,2})
-                   [ ]* - [ ]*
-              (?<score2>\d{1,2})
-                   [ ]*
-                (?:
-                     , [ ]*
-                    (?: (?<score1i>\d{1,2})
-                        [ ]* - [ ]*
-                        (?<score2i>\d{1,2})
-                        [ ]*
-                    )?
-                )?              # note: make half time (HT) score optional for now
-              \)
-             (?=[ \]]|$)/xi    ## todo/check:  remove loakahead assertion here - why require space?
-                               ## note: \b works only after non-alphanum e.g. )
-    ###
-    ##   special case for case WITHOUT extra time!!
-    ##     same as above (but WITHOUT extra time and pen required)
-    EN__P_FT_HT__RE = /\b
-     (?<score1p>\d{1,2})
-        [ ]* - [ ]*          # note: sep in optional block; CANNOT use a reference
-     (?<score2p>\d{1,2})
-        [ ]* #{P_EN} [ ]*
-        \(
-        [ ]*
-   (?<score1>\d{1,2})
-        [ ]* - [ ]*
-   (?<score2>\d{1,2})
-        [ ]*
-     (?:
-          , [ ]*
-         (?: (?<score1i>\d{1,2})
-             [ ]* - [ ]*
-             (?<score2i>\d{1,2})
-             [ ]*
-         )?
-     )?              # note: make half time (HT) score optional for now
-   \)
-  (?=[ \]]|$)/xi    ## todo/check:  remove loakahead assertion here - why require space?
-                    ## note: \b works only after non-alphanum e.g. )
-    ## e.g. 2-1 (1-1) or
-    ##      2-1
-    ## note: for now add here used in Brazil / Portugal
-    ##  e.g 1x1 or 1X1 or 0x2 or 3x3  too
-    ##   todo/check/fix: move to its own use PT__FT_HT etc!!!!
-    EN__FT_HT__RE = /\b
-              (?<score1>\d{1,2})
-                [ ]* (?<sep>[x-]) [ ]*
-              (?<score2>\d{1,2})
-               (?:
-                   [ ]* \( [ ]*
-                (?<score1i>\d{1,2})
-                   [ ]* \k<sep> [ ]*
-                (?<score2i>\d{1,2})
-                   [ ]* \)
-               )?   # note: make half time (HT) score optional for now
-             (?=[ \]]|$)/xi    ## todo/check:  remove loakahead assertion here - why require space?
-                               ## note: \b works only after non-alphanum e.g. )
-    #####
-    #  deutsch / german helpers (penalty, extra time, ...)
-    ## todo add more marker e.g. im Elf. or such!!!
-    P_DE  =  '(?: ie | i\.e\.? )'     # e.g. iE, i.E., i.E etc.
-    ET_DE =  '(?: nv | n\.v\.? )'     # e.g. nV, n.V., n.V etc.
-    ## support alternate all-in-one score e.g.
-    ##     i.E. 2:4, n.V. 3:3 (1:1, 1:1)  or
-    ##               n.V. 3:2 (2:2, 1:2)
-    DE__P_ET_FT_HT__RE = /\b
-                     (?:
-                     #{P_DE}
-                      [ ]*
-                     (?<score1p>\d{1,2})
-                      [ ]* : [ ]*
-                     (?<score2p>\d{1,2})
-                      [ ]* (?:, [ ]*)?
-                     )?   # note: make penalty (P) score optional for now
-                      #{ET_DE}
-                      [ ]*
-                     (?<score1et>\d{1,2})
-                      [ ]* : [ ]*
-                     (?<score2et>\d{1,2})
-                      [ ]*
-                    \(
-                 [ ]*
-             (?<score1>\d{1,2})
-                  [ ]* : [ ]*
-             (?<score2>\d{1,2})
-                  [ ]*
-               (?:
-                   , [ ]*
-                   (?:
-                    (?<score1i>\d{1,2})
-                      [ ]* : [ ]*
-                    (?<score2i>\d{1,2})
-                      [ ]*
-                   )?
-               )?    # note: make half time (HT) score optional for now
-             \)
-            (?=[ \]]|$)
-              /xi
-    ## support all-in-one "literal form e.g.
-    #  2:2 (1:1, 1:0) n.V. 5:1 i.E.   or
-    #  2-2 (1-1, 1-0) n.V. 5-1 i.E.
-    DE__ET_FT_HT_P__RE = /\b
-               (?<score1et>\d{1,2})
-                   [ ]* (?<sep>[:-]) [ ]*     ## note: for now allow : or - as separator!!
-               (?<score2et>\d{1,2})
-                   [ ]*
-                   \(
-                  [ ]*
-              (?<score1>\d{1,2})
-                   [ ]* \k<sep> [ ]*
-              (?<score2>\d{1,2})
-                   [ ]*
-                (?:
-                    , [ ]*
-                    (?:
-                     (?<score1i>\d{1,2})
-                       [ ]* \k<sep> [ ]*
-                     (?<score2i>\d{1,2})
-                       [ ]*
-                    )?
-                )?    # note: make half time (HT) score optional for now
-              \)
-               [ ]*
-               #{ET_DE}
-              (?:
-                [ ]*
-                (?<score1p>\d{1,2})
-                 [ ]* \k<sep> [ ]*
-                (?<score2p>\d{1,2})
-                  [ ]*
-                #{P_DE}
-              )?       # note: make penalty (P) score optional for now
-             (?=[ \]]|$)
-               /xi    ## todo/check:  remove loakahead assertion here - why require space?
-                               ## note: \b works only after non-alphanum e.g. )
-    ## e.g. 2:1 (1:1)  or
-    ##      2-1 (1-1)  or
-    ##      2:1        or
-    ##      2-1
-    DE__FT_HT__RE = /\b
-              (?<score1>\d{1,2})
-                [ ]* (?<sep>[:-]) [ ]*
-              (?<score2>\d{1,2})
-               (?:
-                  [ ]* \( [ ]*
-                      (?<score1i>\d{1,2})
-                        [ ]* \k<sep> [ ]*
-                      (?<score2i>\d{1,2})
-                  [ ]* \)
-               )?   # note: make half time (HT) score optional for now
-             (?=[ \]]|$)/x    ## todo/check:  remove loakahead assertion here - why require space?
-                               ## note: \b works only after non-alphanum e.g. )
-#############################################
-# map tables - 1) regex,  2) tag - note: order matters; first come-first matched/served
-FORMATS_EN = [
-  [ EN__P_ET_FT_HT__RE, '[SCORE.EN__P?_ET_(FT_HT?)]' ], # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
-  [ EN__P_FT_HT__RE,    '[SCORE.EN__P_(FT_HT?)]'     ], # e.g. 5-1 pen. (1-1)
-  [ EN__FT_HT__RE,      '[SCORE.EN__FT_(HT)?]'       ], # e.g. 1-1 (1-0)
-]
-FORMATS_DE = [
-  [ DE__ET_FT_HT_P__RE, '[SCORE.DE__ET_(FT_HT?)_P?]' ], # e.g. 2:2 (1:1, 1:0) n.V. 5:1 i.E.
-  [ DE__P_ET_FT_HT__RE, '[SCORE.DE__P?_ET_(FT_HT?)]' ], # e.g. i.E. 2:4, n.V. 3:3 (1:1, 1:1)
-  [ DE__FT_HT__RE,      '[SCORE.DE__FT_(HT)?]'       ], # e.g. 1:1 (1:0)
-]
-FORMATS = {
-  en: FORMATS_EN,
-  de: FORMATS_DE,
-}
-end # module ScoreFormats

data/lib/sportdb/formats/score/score_parser.rb DELETED

@@ -1,202 +0,0 @@
-# encoding: utf-8
-## note: lets follow the model of DateFormats -see DateFormats gem for more!!!
-## note: make Score top-level and use like Date - why? why not?
-class Score
-  attr_reader :score1i,  :score2i,   # half time (ht) score
-              :score1,   :score2,    # full time (ft) score
-              :score1et, :score2et,  # extra time (et) score
-              :score1p,  :score2p    # penalty (p) score
-              ## todo/fix: add :score1agg, score2agg too - why? why not?!!!
-              ##  add state too e.g. canceled or abadoned etc - why? why not?
-  def initialize( *values )
-    ## note: for now always assumes integers
-    ##  todo/check - check/require integer args - why? why not?
-    @score1i  = values[0]    # half time (ht) score
-    @score2i  = values[1]
-    @score1   = values[2]    # full time (ft) score
-    @score2   = values[3]
-    @score1et = values[4]    # extra time (et) score
-    @score2et = values[5]
-    @score1p  = values[6]    # penalty (p) score
-    @score2p  = values[7]
-  end
-  def to_a
-    ## todo: how to handle game w/o extra time
-    #   but w/ optional penalty ???  e.g. used in copa liberatores, for example
-    #    retrun 0,0 or nil,nil for extra time score ?? or -1, -1 ??
-    #    for now use nil,nil
-    score = []
-    score += [score1i,  score2i]     if score1p || score2p || score1et || score2et || score1 || score2 || score1i || score2i
-    score += [score1,   score2]      if score1p || score2p || score1et || score2et || score1 || score2
-    score += [score1et, score2et]    if score1p || score2p || score1et || score2et
-    score += [score1p,  score2p]     if score1p || score2p
-    score
-  end
-end  # class Score
-module ScoreFormats
-  def self.lang
-    @@lang ||= :en            ## defaults to english (:en)
-  end
-  def self.lang=( value )
-    @@lang = value.to_sym    ## note: make sure lang is always a symbol for now (NOT a string)
-    @@lang      ## todo/check: remove  =() method always returns passed in value? double check
-  end
-  def self.parser( lang: )  ## find parser
-    lang = lang.to_sym  ## note: make sure lang is always a symbol for now (NOT a string)
-    ## note: cache all "built-in" lang versions (e.g. formats == nil)
-    @@parser ||= {}
-    parser = @@parser[ lang ] ||= ScoreParser.new( lang: lang )
-  end
-  def self.parse( line, lang: ScoreFormats.lang )
-    parser( lang: lang ).parse( line )
-  end
-  def self.find!( line, lang: ScoreFormats.lang )
-    parser( lang: lang ).find!( line )
-  end
-class ScoreParser
-  include LogUtils::Logging
-  def initialize( lang: )
-    @lang    = lang.to_sym   ## note: make sure lang is always a symbol for now (NOT a string)
-    ## fallback to english if lang not available
-    ##  todo/fix: add/issue warning - why? why not?
-    @formats = FORMATS[ @lang ] || FORMATS[ :en ]
-  end
-  def parse( line )
-     ##########
-     ## todo/fix/check: add unicode to regular dash conversion - why? why not?
-     ##  e.g. – becomes -  (yes, the letters a different!!!)
-     #############
-    score = nil
-    @formats.each do |format|
-      re = format[0]
-      m = re.match( line )
-      if m
-        score = parse_matchdata( m )
-        break
-      end
-      # no match; continue; try next regex pattern
-    end
-    ## todo/fix - raise ArgumentError - invalid score; no format match found
-    score  # note: nil if no match found
-  end # method parse
-  def find!( line )
-    ### fix: add and match all-in-one literal first, followed by
-    # note: always call after find_dates !!!
-    #  scores match date-like patterns!!  e.g. 10-11  or 10:00 etc.
-    #   -- note: score might have two digits too
-    ### fix: depending on language allow 1:1 or 1-1
-    ##   do NOT allow mix and match
-    ##  e.g. default to en is  1-1
-    ##    de is 1:1 etc.
-    # extract score from line
-    # and return it
-    # note: side effect - removes date from line string
-    score = nil
-    @formats.each do |format|
-      re  = format[0]
-      tag = format[1]
-      m = re.match( line )
-      if m
-        score = parse_matchdata( m )
-        line.sub!( m[0], tag )
-        break
-      end
-      # no match; continue; try next regex pattern
-    end
-    score  # note: nil if no match found
-  end # method find!
-private
-  def parse_matchdata( m )
-    # convert regex match_data captures to hash
-    # - note: cannont use match_data like a hash (e.g. raises exception if key/name not present/found)
-    h = {}
-    # - note: do NOT forget to turn name into symbol for lookup in new hash (name.to_sym)
-    m.names.each { |name| h[name.to_sym] = m[name] }  # or use match_data.names.zip( match_data.captures )  - more cryptic but "elegant"??
-    ## puts "[parse_date_time] match_data:"
-    ## pp h
-    logger.debug "   [parse_matchdata] hash: >#{h.inspect}<"
-    score1i   = nil    # half time (ht) scores
-    score2i   = nil
-    score1    = nil    # full time (ft) scores
-    score2    = nil
-    score1et  = nil    # extra time (et) scores
-    score2et  = nil
-    score1p   = nil   # penalty (p) scores
-    score2p   = nil
-    if h[:score1i] && h[:score2i]   ## note: half time (HT) score is optional now
-      score1i   = h[:score1i].to_i
-      score2i   = h[:score2i].to_i
-    end
-    score1 = h[:score1].to_i
-    score2 = h[:score2].to_i
-    if h[:score1et] && h[:score2et]
-      score1et = h[:score1et].to_i
-      score2et = h[:score2et].to_i
-    end
-    if h[:score1p] && h[:score2p]
-      score1p   = h[:score1p].to_i
-      score2p   = h[:score2p].to_i
-    end
-    score = Score.new( score1i,  score2i,
-                       score1,   score2,
-                       score1et, score2et,
-                       score1p,  score2p   )
-    score
-  end  # method parse_matchdata
-end  # class ScoreParser
-end  # module ScoreFormats