RubyGems - sportdb-parser - Versions diffs - 0.6.20 → 0.7.0 - Mend

sportdb-parser 0.6.20 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +1 -1
data/Manifest.txt +14 -8
data/Rakefile +1 -1
data/lib/sportdb/parser/blocktxt.rb +99 -0
data/lib/sportdb/parser/lexer.rb +958 -395
data/lib/sportdb/parser/lexer_buffer.rb +97 -0
data/lib/sportdb/parser/lexer_tty.rb +111 -0
data/lib/sportdb/parser/parser.rb +1768 -855
data/lib/sportdb/parser/racc_parser.rb +1 -1
data/lib/sportdb/parser/racc_tree.rb +327 -41
data/lib/sportdb/parser/token-date.rb +160 -178
data/lib/sportdb/parser/token-date_duration.rb +190 -0
data/lib/sportdb/parser/token-geo.rb +59 -59
data/lib/sportdb/parser/token-goals.rb +460 -0
data/lib/sportdb/parser/token-group.rb +43 -0
data/lib/sportdb/parser/token-note.rb +40 -0
data/lib/sportdb/parser/token-prop.rb +70 -54
data/lib/sportdb/parser/token-prop_name.rb +74 -0
data/lib/sportdb/parser/token-round.rb +102 -0
data/lib/sportdb/parser/token-score.rb +323 -47
data/lib/sportdb/parser/token-score_fuller.rb +435 -0
data/lib/sportdb/parser/token-score_legs.rb +59 -0
data/lib/sportdb/parser/token-status.rb +157 -160
data/lib/sportdb/parser/token-table.rb +149 -0
data/lib/sportdb/parser/token-text.rb +72 -23
data/lib/sportdb/parser/token-time.rb +141 -0
data/lib/sportdb/parser/token.rb +242 -105
data/lib/sportdb/parser/token_helpers.rb +92 -0
data/lib/sportdb/parser/version.rb +2 -2
data/lib/sportdb/parser.rb +24 -2
metadata +18 -18
data/config/rounds_de.txt +0 -125
data/config/rounds_en.txt +0 -29
data/config/rounds_es.txt +0 -26
data/config/rounds_misc.txt +0 -25
data/config/rounds_pt.txt +0 -4
data/config/zones_en.txt +0 -20
data/lib/sportdb/parser/lang.rb +0 -298
data/lib/sportdb/parser/token-minute.rb +0 -205

data/lib/sportdb/parser/lexer.rb CHANGED Viewed

@@ -25,194 +25,386 @@ end
   ##    for now for compatibility
   def is_group?( text )  Lang.is_group?( text ); end
   def is_round?( text )  Lang.is_round?( text ); end
-  def is_leg?( text )    Lang.is_leg?( text ); end
-  def is_zone?( text )   Lang.is_zone?( text ); end
-## transforms
-##
-##  Netherlands  1-2 (1-1)   England
-##   =>  text => team
-##       score|vs
-##       text => team
-## token iter/find better name
-##  e.g. TokenBuffer/Scanner or such ??
-class Tokens
-  def initialize( tokens )
-      @tokens = tokens
-      @pos = 0
-  end
-  def pos()  @pos; end
-  def eos?() @pos >= @tokens.size; end
+def debug?()  @debug == true; end
-  def include?( *types )
-      pos = @pos
-      ## puts "  starting include? #{types.inspect} @ #{pos}"
-      while pos < @tokens.size do
-          return true   if types.include?( @tokens[pos][0] )
-          pos +=1
-      end
-      false
-  end
+def initialize( lines, debug: false )
+   raise ArgumentError, "(string) text expected for lexer; got #{lines.class.name}"  unless lines.is_a?(String)
+   @debug = debug
+   @txt   = lines
+end
-  ## pattern e.g. [:TEXT, [:VS,:SCORE], :TEXT]
-  def match?( *pattern )
-      ## puts "  starting match? #{pattern.inspect} @ #{@pos}"
-      pattern.each_with_index do |types,offset|
-          ## if single symbol wrap in array
-          types = types.is_a?(Array) ? types : [types]
-          return false  unless types.include?( peek(offset) )
-      end
-      true
-  end
+HTML_COMMENT_RE = %r{  <!--
+                            .*?   ## note - use non-greedy/lazy *? match
+                         -->
+                       }xm      ## note - turn on multi-line match (for dot (.))
-  ## return token type  (e.g. :TEXT, :NUM, etc.)
-  def cur()           peek(0); end
-  ## return content (assumed to be text)
-  def text(offset=0)
-      ## raise error - why? why not?
-      ##   return nil?
-      if peek( offset ) != :text
-          raise ArgumentError, "text(#{offset}) - token not a text type"
-      end
-      @tokens[@pos+offset][1]
-  end
+##
+##  note - [] block may NOT incl. square brackets
+##       what about comments (e.g. #)?
+##    todo/check - rename to NOTE_BLOCK or TEXT_BLOCK or ???
+PREPROC_BLOCK_RE = %r{  \[
+                      [^\[\]\#]*?  ## note - use non-greedy/lazy *? match
+                  \]
+                        }xm  ## note - turn on multi-line match (for dot(.))
-  def peek(offset=1)
-      ## return nil if eos
-      if @pos+offset >= @tokens.size
-          nil
-      else
-         @tokens[@pos+offset][0]
-      end
-  end
-  ## note - returns complete token
-  def next
-     # if @pos >= @tokens.size
-     #     raise ArgumentError, "end of array - #{@pos} >= #{@tokens.size}"
-     # end
-     #   throw (standard) end of iteration here why? why not?
+##
+## check for "literal"  (multi-line) note blocks
+##   eg.  nb:  or note:
+##   space required after double colon - why? why not?
+PREPROC_NOTA_BENE_RE = %r{
+         ^
+    [ ]* (?: nb | note) [ ]* : [ ]+
+       .+?  ## non-greedy
+    ## positive lookahead
+    ##    note - must end with blank line or end-of-file/document
+    ##   note - do NOT eat-up trailing hrule (---)
+      (?=      (?: \n [ ]* -{3,} [ ]*)?
+                   \n[ ]*\n
+               | \z
+        )
+}xim
-      t = @tokens[@pos]
-      @pos += 1
-      t
-  end
+##
+##  replace "escaped" newline with non-newline char e.g. '↵'
+LINE_CONTINUATION_RE = %r{
+                           \\[ ]* \n
+                        }x
-  def collect( &blk )
-      tokens = []
-      loop do
-        break if eos?
-        tokens <<  if block_given?
-                      blk.call( self.next )
-                   else
-                      self.next
-                   end
-      end
-      tokens
-  end
-end  # class Tokens
+###
+##  check for magic comments
+##     e.g  # teletype: true    or TELETYPE: TRUE
+##             tty/teletype
+MAGIC_COMMENT_RE = %r{  \A
+                         [ ]*    ## optional leading spaces
+                        \#+      ##  note - allow ##,###, etc. too
+                         [ ]*    ## optional spaces
+                           (?<magic_comment_key> tty | teletype )
+                         [ ]*    ## optional spaces
+                            :
+                         [ ]*    ## optional spaces
+                            (?<magic_comment_value> true | false )
+                         [ ]*    ## optional trailing spaces
+                        \z
+                      }ix
-def debug?()  @debug == true; end
-def initialize( lines, debug: false )
-   @debug = debug
-##  note - for convenience - add support
-##         comments (incl. inline end-of-line comments) and empty lines here
-##             why? why not?
-##         why?  keeps handling "centralized" here in one place
-   ## todo/fix - rework and make simpler
-    ##             no need to double join array of string to txt etc.
+def tokenize_with_errors
+####
+##   flags / modes
+    @teletype = false     # use magic comment - tty/teletype: true
-    txt_pre =  if lines.is_a?( Array )
-               ## join together with newline
-                 lines.reduce( String.new ) do |mem,line|
-                                               mem << line; mem << "\n"; mem
-                                            end
-               else  ## assume single-all-in-one txt
-                 lines
-               end
-    ##  preprocess automagically - why? why not?
+    tokens_by_line = []   ## note: add tokens line-by-line (flatten later)
+    errors         = []   ## keep a list of errors - why? why not?
+   ##  preprocess automagically - why? why not?
     ##   strip lines with comments and empty lines striped / removed
     ##      keep empty lines? why? why not?
     ##      keep leading spaces (indent) - why?
     ##
     ##  note - KEEP empty lines (get turned into BLANK token!!!!)
-    @txt = String.new
-    txt_pre.each_line do |line|    ## preprocess
-       line = line.strip
-       next if line.start_with?('#')   ###  skip comments
-       line = line.sub( /#.*/, '' ).strip   ###  cut-off end-of line comments too
-       @txt << line
-       @txt << "\n"
+    ##  "universal" newlines
+    ##    replace all windows-style  cr+lf (\r\n) to lf (\n) only
+    txt = @txt.gsub( "\r\n", "\n" )
+    ###
+    ## quick hack for now
+    ##   remove  html-style comments <!-- -->
+    ##           (incl. multi-line)  with two spaces
+    ##       will mess-up lineno tracking!!!
+    ##    fix later to have function lineno & colno!!!
+    txt = @txt.gsub( HTML_COMMENT_RE ) do |m|
+                        puts " [debug] preproc html comment:"
+                        puts m
+                        '  '
+                   end
+=begin
+##
+##  todo/fix - add a command line switch/option for auto-format fixes !!!
+   ##  quick hack - remove later
+   ##    auto-convert "old" legacy round markers (»)
+   txt = txt.gsub( %r{^ [ ]*
+                          »
+                        (?= [ ]+)  ## require one trailing space for now!!
+                        }ix ) do |_|
+                     puts "!! WARN - auto-fix format; replacing old (alternate/legacy) round marker (»)"
+                        '▪'
+                    end
+###  16.00 => 16:00
+##     todo/check - use space for positive lookbehind & ahead
+##                      (instead of \b) - why? why not?
+##  note - check for/exclude 12.12.  date in match
+##             use negative lookahead
+##   check for 12.12.94
+##      use   positive lookbehind   !!!
+##               must be space, comma or begin-of-line [ ,]|^
+##    or use negative lookbehind
+##               must NOT be dot
+   txt = txt.gsub(  %r{
+                        ## check NEGATIVE lookbehind
+                         (?<! [.])  ## do NOT match 12.94 in 12.12.94
+                          \b
+                        (?<h>\d{1,2})
+                           \.
+                        (?<m>\d{2})
+                          \b
+                        (?! [.] )   ## do NOT match 12.12.
+                        }ix ) do |_|
+                           m = $~   ## is $LAST_MATCH_DATA
+                        puts "!! WARN - auto-fix format; replacing old (alternate/legacy) time format #{m[0]}"
+                           "#{m[:h]}:#{m[:m]}"   ## '\1:\2'
+                        end
+=end
+    ###
+    ## add more "native" multi-line comment-styles
+    ##  e.g.    #[[ ... ]]  or  #<<< .. >>> or #<< .. >>
+    ##                 or such - why? why not?
+   txt = txt.gsub( PREPROC_NOTA_BENE_RE ) do |m|
+       if m.include?( "\n" )   ## check for newlines (\n) and replace
+         puts " [debug] preproc (multi-line) note/nota bene block:"
+         puts m
+         ## todo/check: replace with two spaces insead of ↵ - why? why not?
+         m.gsub( "\n", '↵' )
+       else
+         m
+       end
     end
-end
+   ##
+   ## e.g. used in (multi-line) TableNote
+   ##  1.SOUTH KOREA   6  5  1  0 22- 1 16  [0-0]
+   ##  2.LEBANON       6  3  1  2 11- 8 10  [0-2, 0-0]
+   ##  3.Turkmenistan  6  3  0  3  8-11  9  [3-1]
+   ##  4.Sri Lanka     6  0  0  6  2-23  0  [0-1]
+   ##  -.North Korea   [withdrew after playing 5 matches due to safety concerns in
+   ##                   connection with the Covid-19 pandemic; all results annulled]
+   ##
+   ##  note - no longer used for now
+   ##     enclose multi-line notes in []
+   ##         removes need for line continuation for now
-def tokenize_with_errors
-    tokens_by_line = []   ## note: add tokens line-by-line (flatten later)
-    errors         = []   ## keep a list of errors - why? why not?
+##
+##   txt = txt.gsub( LINE_CONTINUATION_RE ) do |_|
+##            puts " [debug] preproc line continuation"
+##              ## todo/check: replace with two spaces insead of ↵ - why? why not?
+##               '↵'
+##         end
+    #####
+    ## (another) quick hack for now
+    ##   turn multi-line note blocks into
+    ##             single-line note blocks
+    ##             by changing newline (\n) to ⏎ (unicode U+23CE)
+    ##              or why not  to ___ ?
+    ##
+    ##  unicode options for return/arrows:
+    ##   -  ↵ (U+21B5): Downwards Arrow With Corner Leftwards.
+    ##                This is the most common "carriage return" symbol.
+    ##   -  ⏎ (U+23CE): Return Symbol.
+    ##               Specifically designated as the keyboard's "Return" key symbol,
+    ##                often used in user interfaces.
+    txt = txt.gsub( PREPROC_BLOCK_RE ) do |m|
+       if m.include?( "\n" )   ## check for newlines (\n) and replace
+         puts " [debug] preproc (multi-line) block:"
+         puts m
+         ## todo/check: replace with two spaces insead of ↵ - why? why not?
+         m.gsub( "\n", '↵' )
+       else
+         m
+       end
+    end
+    ####
+    ## quick hack - keep re state/mode between tokenize calls!!!
+    @re  ||= RE     ## note - switch between RE & INSIDE_RE
-    @txt.each_line do |line|
-        line = line.rstrip   ## note - MUST remove/strip trailing newline (spaces optional)!!!
-        more_tokens, more_errors = _tokenize_line( line )
-        tokens_by_line  << more_tokens
-        errors          += more_errors
-    end # each line
-    tokens_by_line = tokens_by_line.map do |tokens|
-        #############
-        ## pass 1
-        ##   replace all texts with keyword matches
-        ##     (e.g. group, round, leg, etc.)
+    txt.each_line do |line|
+        ## line = line.rstrip   ## note - MUST remove/strip trailing newline (spaces optional)!!!
+        line = line.strip   ## note - strip leading AND trailing whitespaces
+                            ## note - trailing whitespace may incl. \n or \r\n!!!
         ##
-        ##   note - let is_round? get first (before is_group?)
-        ##            will match group stage  as round (NOT group)
-        tokens = tokens.map do |t|
-                    if t[0] == :TEXT
-                       text = t[1]
-                       t =  if is_round?( text ) || is_leg?( text ) || is_zone?( text )
-                               [:ROUND, text]
-                            elsif is_group?( text )
-                               [:GROUP, text]
-                             else
-                               t  ## pass through as-is (1:1)
-                             end
-                    end
-                   t
+        ###
+        ##  check for magic comments
+        ##     e.g  # teletype: true    or TELETYPE: TRUE
+        ##             tty/teletype
+        if line.start_with?('#')   ###  skip comments (& check magic comments!!)
+           if (m = MAGIC_COMMENT_RE.match(line))
+              magic_comment_key   = m[:magic_comment_key].downcase
+              magic_comment_value = m[:magic_comment_value].downcase
+              ##   turn on teletype mode
+              ## e.g.  tty: true  or teletype: true
+              if ['tty', 'teletype'].include?( magic_comment_key ) &&
+                 ['true'].include?( magic_comment_value )
+                 puts " magic comment - turn on teletype (tty) mode"
+                 @teletype = true
+              end
+           end
+           next
+        end
+        line = line.sub( /#.*/, '' ).strip   ###  cut-off end-of line comments too
+        ####
+        #  support __END__ marker to cut-off input
+        break if line.strip == '__END__'
+       ##
+       ##  first check for tabs
+       ##    add error/warn
+       ##    for auto-fix - replace tabs with two spaces
+        line = line.gsub( "\t" ) do |_|
+                  ## report error here
+                  ## todo/add error here
+                  puts "!! WARN - auto-fix; replacing tab (\\t) with two spaces in line #{line.inspect}"
+                   "  "   ## replace with two spaces
                  end
-        ### check for "section" starters e.g. Teams or such
-        t = tokens[0]
-        if t[0] == :TEXT
-            text = t[1]
-            if text =~ /^teams$/i
-               t[0] = :TEAMS
-            elsif text =~  /^blank$/i   ### todo/fix -- remove!!! add real blanks!!
-               t[0] = :BLANK
-            else
+        ## U+00A0 (160)  -- non-breaking space (unicode)
+        line = line.gsub( "\u00A0" ) do |uni|
+                  ## report error here
+                  ## todo/add error here
+                  puts "!! WARN - auto-fix; replacing non-breaking unicode space (#{uni}/#{uni.ord}) w/ ascii space ( /#{" ".ord}) in line #{line.inspect}"
+                   " "   ## replace with space
+                 end
+        ###
+        ## todo/fix - print unicode numbers for [–−]
+        ##                different candidates to differentiate and document!!!
+        ##   – => U+2013 (8211)     -- En Dash     (unicode)
+        ##   − => U+2212 (8722)     -- Minus Sign  (unicode)
+        line = line.gsub( /[–−]/ ) do |uni|
+                  ## report error here
+                  ## todo/add error here
+                  puts "!! WARN - auto-fix; replacing unicode dash (#{uni}/#{uni.ord}) w/ ascii dash (-/#{"-".ord}) in line #{line.inspect}"
+                   '-'   ## replace with ascii dash (-)
+                  end
+        puts "line: >#{line}<"    if debug?
+        ######
+        ### special case for empty line (aka BLANK)
+        if line.empty?
+           ## note - blank always resets parser mode to std/top-level!!!
+           @re = RE
+           tokens_by_line << [[:BLANK, '<|BLANK|>']]
+        elsif (m = HEADING_RE.match(line))
+           ## note - heading always resets parser mode to std/top-level!!!
+           @re = RE
+           puts "   HEADING"  if debug?
+           ## note - derive heading level from no of (leading) markers
+           ##             e.g. = is 1, == is 2, == is 3, etc.
+           heading_level = m[:heading_marker].size
+           tokens_by_line << [[:"H#{heading_level}", m[:heading]]]
+        elsif (m = NOTA_BENE_RE.match(line))
+           ## note - nota bene always resets parser mode to std/top-level!!!
+           @re = RE
+           tokens_by_line << [[:NOTA_BENE, m[:nota_bene]]]
+       elsif @re == RE && (m = TABLE_RE.match(line))
+            @re = TABLE_MORE_RE  ## switch into table mode
+            if m[:table_heading]
+              tokens_by_line << [[:TABLE_HEADING, m[:table_heading]]]
+            else  ## assume table (line) e.g. m[:table]
+              tokens_by_line << [[:TABLE_LINE, line]]
+            end
+        elsif @re == TABLE_MORE_RE
+            ### todo/fix - check if no match and report/add error!!
+            ##        for now (ummatched) line gets auto-added as table line!!!
+            ##
+            ##   note - MUST be followed by blank line (or nota bene/heading)
+            ##            to switch back into to top-level!!!!
+            m = TABLE_MORE_RE.match(line)
+            if m[:table_note]
+              tokens_by_line << [[:TABLE_NOTE, m[:table_note]]]
+            elsif m[:table_divider]
+              tokens_by_line << [[:TABLE_DIVIDER, m[:table_divider]]]
+            else  ## assume table (line) e.g. m[:table]
+              tokens_by_line << [[:TABLE_LINE, line]]
             end
+        elsif @re != TABLE_MORE_RE &&  (m = HRULER_RE.match(line))
+           ## note - hruler (---)
+           ##          will only match if NOT in table mode!!!
+           ##   otherwise
+           ##      hruler always resets parser mode to std/top-level!!!
+           @re = RE
+           tokens_by_line << [[:HRULER, '<|HRULER|>']]
+        elsif @teletype && (@re == RE && IS_TTY_LINE_RE.match(line))
+            ## try experimental TELETYPE (TTY) mode!!!
+            ##    note - turn on via magic comment e.g.  tty/teletype: true
+            ###
+            ###    move inside _tokenize_line - why? why not?
+            tokens_by_line << _tokenize_tty_line( line )
+            ##   note - dates such as
+            ##         APR 11 or 11 APR   will trigger TELETYPE
+            ###    ## check letter
+        else
+          more_tokens, more_errors = _tokenize_line( line )
+          tokens_by_line  << more_tokens
+          errors          += more_errors
         end
+    end # each line
+    tokens_by_line = tokens_by_line.map do |tokens|
         #################
-        ## pass 2
         ##    transform tokens (using simple patterns)
         ##      to help along the (racc look ahead 1 - LA1) parser
         nodes = []
@@ -220,48 +412,72 @@ def tokenize_with_errors
         buf = Tokens.new( tokens )
         ## pp buf
     loop do
           break if buf.eos?
-          if buf.pos == 0   ## MUST start line
-            ## check for
-            ##    group def or round def
-            if buf.match?( :ROUND, :'|' )    ## assume round def (change round to round_def)
-                      nodes << [:ROUND_DEF, buf.next[1]]
-                      nodes << buf.next
-                      nodes += buf.collect
-                      break
-            end
-            if buf.match?( :GROUP, :'|' )    ## assume group def (change group to group_def)
-                      nodes << [:GROUP_DEF, buf.next[1]]
-                      nodes << buf.next
-                      ## change all text to team - why? why not?
-                      nodes += buf.collect { |t|
-                                t[0] == :TEXT ? [:TEAM, t[1]] : t
-                               }
-                      break
-            end
-          end
-          if buf.match?( :TEXT, [:SCORE, :SCORE_MORE, :VS, :'-'], :TEXT )
-             nodes << [:TEAM, buf.next[1]]
-             nodes << buf.next
-             nodes << [:TEAM, buf.next[1]]
-   #   note - now handled (upstream) with GOAL_RE mode!!!
-   #       elsif buf.match?( :TEXT, :MINUTE )
-   #          nodes << [:PLAYER, buf.next[1]]
-   #          nodes << buf.next
-          elsif buf.match?( :DATE, :TIME )   ## merge DATE TIME into DATETIME
+          if buf.match?( :DATE, :TIME )   ## merge DATE TIME into DATETIME
                date = buf.next[1]
                time = buf.next[1]
                ## puts "DATETIME:"
                ## pp date, time
+               ##  note:  time value is { time: {} } or
+               ##                       { time: {}, time_local {} }
                val =  [date[0] + ' ' + time[0],  ## concat string of two tokens
-                        { date: date[1], time: time[1] }
+                        { date: date[1] }.merge( time[1] )
+                      ]
+               nodes << [:DATETIME, val]
+         ### support  date time with comma too - why? why not?
+         elsif buf.match?( :DATE, :',', :TIME )
+               date  = buf.next[1]
+               _    = buf.next  ## ignore comma
+               time = buf.next[1]
+               ## puts "DATETIME:"
+               ## pp date, time
+               val =  [date[0] + ', ' + time[0],  ## concat string of two tokens
+                        { date: date[1] }.merge( time[1] )
+                      ]
+               nodes << [:DATETIME, val]
+        elsif buf.match?( :TEAM, :SCORE_TEAM )
+            ## merge TEAM SCORE_TEAM into TEAMALT
+            ##     (use TEAMENTRY or TEAMRESULT - why? why not?)
+               team       = buf.next[1]
+               score_team = buf.next[1]
+               val =  [team + ' ' + score_team[0],  ## concat string of two tokens
+                        { team: team }.merge( score_team[1] )
+                      ]
+               nodes << [:TEAMALT, val]
+        elsif buf.match?( :TEAM, :SCORE_TEAM_PEN )
+               team           = buf.next[1]
+               score_team_pen = buf.next[1]
+               val =  [team + ' ' + score_team_pen[0],  ## concat string of two tokens
+                        { team: team }.merge( score_team_pen[1] )
+                      ]
+               nodes << [:TEAMALT_PEN, val]
+        elsif buf.match?( :TEAM, :SCORE_TEAM_NUM )
+               team           = buf.next[1]
+               score_team_num = buf.next[1]
+               val =  [team + ' ' + score_team_num[0],  ## concat string of two tokens
+                        { team: team }.merge( score_team_num[1] )
                       ]
-               nodes << [:DATETIME, val]
+               nodes << [:TEAMALT_NUM, val]
+         elsif buf.match?( :GOAL_MINUTE, :',', :GOAL_MINUTE )
+             ## note - only advance by two tokens!
+             ##     allows more :GOAL_MINUTE sequences!! e.g. 12,13,14 etc!!!
+             ##
+             ## help parser with comma shift/reduce conflict
+             ##   change ',' to GOAL_MINUTE_SEP !!!
+             nodes << buf.next   ## pass through goal_minute
+             _ = buf.next  ## eat-up goal_minute_sep a.k.a. comma (,)
+                           ##   and replace with dedicated sep(arator)
+             nodes << [:GOAL_MINUTE_SEP,"<|GOAL_MINUTE_SEP|>"]
+         elsif buf.match?( :',', :INLINE_ATTENDANCE )
+             ## note  - allow optional comma before inline attendance
+             ## help parser with comma shift/reduce conflict
+             ##   change ',' to INLINE_ATTENDANCE_SEP !!!
+             nodes << [:INLINE_ATTENDANCE_SEP, "<|INLINE_ATTENDANCE_SEP|>"]
+             _ = buf.next  ## eat-up inline_attendance_sep a.k.a. comma (,)
+                           ##   and replace with dedicated sep(arator)
+             nodes << buf.next   ## pass through inline_attendance
           else
              ## pass through
              nodes << buf.next
@@ -271,6 +487,7 @@ def tokenize_with_errors
   end  # map tokens_by_line
     ## flatten tokens
     tokens = []
@@ -280,9 +497,49 @@ def tokenize_with_errors
            pp tok
          end
+     ###############
+     ##   "hacky" (automagic) line merges (remove newline)
+           ## if line start with @  - check if incl. teams
+     ###
+     ### quick merge lines hack
+     ##    if line starts with geo-marker token @
+     ##            check if line incl. TEAM
+     ##           if yes, leave alone
+     ##            otherwise  merge line into previous line!!
+     ##       - todo/fix - handle in possibly in grammar!!!
+     ##        for now match_line CAN start with @ London
+     ##                 resulting in parser conflict(s)!!!
+     ##    e.g.
+     ##       England v Scotland
+     ##          @ London
+     ##          =>
+     ##        England v Scotland @ London
+     ##
+     ##
+     ##  note/todo - if INDENT / SPACES get added
+     ##                adjust here
+     ##   tok[0][0] == :INDENT  (or :SPACES) &&
+     ##   tok[1][0] == :'@'
+           if tok[0] && tok[0][0] == :'@'
+                team =  tok.find { |t| t[0] == :TEAM }
+                if team
+                   ## do nothing - keep as is (assume match_line starting w/ @)
+                else
+                  ## no team(s) found in line
+                  ##    remove last token (that is, NEWLINE)
+                  ##   note - possibly is blank ?!  keep blank
+                  tokens.pop  if tokens[-1][0] == :NEWLINE
+                end
+           end
          tokens  += tok
          ## auto-add newlines  (unless BLANK!!)
-         tokens  << [:NEWLINE, "\n"]   unless tok[0][0] == :BLANK
+         tokens  << [:NEWLINE, "\n"]   unless tok[0] && tok[0][0] == :BLANK
     end
     [tokens,errors]
@@ -290,42 +547,11 @@ end   # method tokenize_with_errors
-### add a QUICK_PLAYER_WITH_MINUTE  check
-QUICK_PLAYER_WITH_MINUTE_RE = %r{
-      ##  note - \b  NOT working for ? !!!
-      ##
-      ##  use positive lookbehind
-        (?<= [ ,;\(\)\[\]]|^)
-        (?:
-            (?:
-                \d{1,3}      ## constrain numbers to 0 to 999!!!
-                (?: \+\d{1,3}
-                 )?
-            )
-            |
-            (?: \?{2} | _{2} )  ## add support for n/a (not/available)
-        )
-        '   ## must have minute marker!!!!
-}ix
 def _tokenize_line( line )
   tokens = []
   errors = []   ## keep a list of errors - why? why not?
-  puts "line: >#{line}<"    if debug?
-   ### special case for empty line (aka BLANK)
-   if line.empty?
-       ## note - blank always resets parser mode to std/top-level!!!
-       @re = RE
-       tokens << [:BLANK, '<|BLANK|>']
-       return [tokens, errors]
-   end
   pos = 0
   ## track last offsets - to report error on no match
@@ -333,6 +559,9 @@ def _tokenize_line( line )
   offsets = [0,0]
   m = nil
+  ## track number of geo text seen
+  ##    (use for - do NOT break on two spaces if no geo text seen yet!!)
+  geo_count = 0
   ####
   ## quick hack - keep re state/mode between tokenize calls!!!
@@ -342,34 +571,76 @@ def _tokenize_line( line )
   if @re == RE  ## top-level
     ### check for modes once (per line) here to speed-up parsing
     ###   for now goals only possible for start of line!!
-    ###        fix - remove optional [] - why? why not?
-    ##  start with prop key (match will switch into prop mode!!!)
-    ##   - fix - remove leading spaces in regex (upstream) - why? why not?
-    if (m = PROP_KEY_RE.match( line ))
+    ###        fix - remove optional [] - why? why not?
+    ####
+    ## note - ord e.g. (45) for match number can only start a (match) line
+    ##                "inline" use NOT possible
+    ## note -  ord (for ordinal number!!!) e.g match number (1), (42), etc.
+    if (m = START_WITH_ORD.match(line))
+       ## note -  strip enclosing () and convert to integer
+       tokens << [:ORD, [m[:ord], { value: m[:value].to_i(10) } ]]
+       offsets = [m.begin(0), m.end(0)]
+       pos = offsets[1]    ## update pos
+    elsif (m = START_WITH_YEAR.match(line))
+       ## note -  strip enclosing () and convert to integer
+       tokens << [:YEAR, m[:year].to_i(10)]
+       offsets = [m.begin(0), m.end(0)]
+       pos = offsets[1]    ## update pos
+    ###
+    ##  todo/fix - rename to START_GROUP_DEF_LINE_RE !!!!
+    elsif (m = GROUP_DEF_LINE_RE.match( line ))
+      puts "  ENTER GROUP_DEF_RE MODE"   if debug?
+      @re = GROUP_DEF_RE
+      tokens << [:GROUP_DEF, m[:group_def]]
+      offsets = [m.begin(0), m.end(0)]
+      pos = offsets[1]    ## update pos
+    ###  todo/fix - rename to PROP_KEY_RE to START_WITH_PROP_KEY_RE !!!
+    elsif (m = PROP_KEY_RE.match( line ))
+      ##  start with prop key (match will switch into prop mode!!!)
+      ##   - fix - remove leading spaces in regex (upstream) - why? why not?
+      ##
       ###  switch into new mode
       ##  switch context  to PROP_RE
         puts "  ENTER PROP_RE MODE"   if debug?
         key = m[:key]
-        ### todo - add prop yellow/red cards too - why? why not?
-        if ['sent off', 'red cards'].include?( key.downcase)
+        ### todo/fix - add prop yellow/red cards too - why? why not?
+        ##  todo/fix - separate sent off and red card
+        ##     sent-off - incl. red card, yellow/red card and the era before red cards!!
+        if ['sent off'].include?( key.downcase)
+          @re = PROP_CARDS_RE    ## use CARDS_RE ???
+          tokens << [:PROP_SENTOFF, m[:key]]
+        elsif ['red cards'].include?( key.downcase )
           @re = PROP_CARDS_RE    ## use CARDS_RE ???
           tokens << [:PROP_REDCARDS, m[:key]]
         elsif ['yellow cards'].include?( key.downcase )
           @re = PROP_CARDS_RE
           tokens << [:PROP_YELLOWCARDS, m[:key]]
-        elsif ['ref', 'referee'].include?( key.downcase )
+        elsif ['ref', 'referee',
+               'refs', 'referees'   ## note - allow/support assistant refs
+              ].include?( key.downcase )
           @re = PROP_REFEREE_RE
           tokens << [:PROP_REFEREE, m[:key]]
         elsif ['att', 'attn', 'attendance'].include?( key.downcase )
           @re = PROP_ATTENDANCE_RE
           tokens << [:PROP_ATTENDANCE, m[:key]]
-        elsif ['goals'].include?( key.downcase )
-          @re = PROP_GOAL_RE
-          tokens << [:PROP_GOALS, m[:key]]
-        elsif ['penalties', 'penalty shootout'].include?( key.downcase )
+     #   elsif ['goals'].include?( key.downcase )
+     #     @re = PROP_GOAL_RE
+     #     tokens << [:PROP_GOALS, m[:key]]
+        elsif ['penalties',
+               'penalty shootout',
+               'penalty shoot-out',
+               'penalty kicks'].include?( key.downcase )
           @re = PROP_PENALTIES_RE
           tokens << [:PROP_PENALTIES, m[:key]]
         else   ## assume (team) line-up
@@ -379,63 +650,69 @@ def _tokenize_line( line )
         offsets = [m.begin(0), m.end(0)]
         pos = offsets[1]    ## update pos
+    ###
+    ### todo/fix
+    ###   rename to START_WITH_ROUND_DEF_OUTLINE_RE !!!!
+    elsif (m = ROUND_DEF_OUTLINE_RE.match( line ))
+      puts "   ENTER ROUND_DEF_RE MODE"  if debug?
+      @re = ROUND_DEF_RE
+      ## note - return ROUND_DEF NOT  ROUND_OUTLINE token
+      tokens << [:ROUND_DEF, m[:round_outline]]
+      offsets = [m.begin(0), m.end(0)]
+      pos = offsets[1]    ## update pos
     elsif (m = ROUND_OUTLINE_RE.match( line ))
       puts "   ROUND_OUTLINE"  if debug?
+      ## note - derive round level from no of (leading) markers
+      ##             e.g. ▪/:: is 1, ▪▪/::: is 2, ▪▪▪/:::: is 3, etc.
+      ##       note  - ascii-style starts with double ::, thus, autodecrement by one!
+      round_level = m[:round_marker].size
+      round_level -= 1  if m[:round_marker].start_with?( '::' )
-      tokens << [:ROUND_OUTLINE, m[:round_outline]]
+      tokens << [:ROUND_OUTLINE, [m[:round_outline],
+                      { outline: m[:round_outline] ,
+                        level: round_level}]]
       ## note - eats-up line for now (change later to only eat-up marker e.g. »|>>)
       offsets = [m.begin(0), m.end(0)]
       pos = offsets[1]    ## update pos
-    elsif (m = PLAYER_WITH_SCORE_RE.match( line ))
-      ##  switch context to GOAL_RE (goalline(s)
-      ##   split token (automagically) into two!! - player AND minute!!!
-      @re = GOAL_RE
-      puts "  ENTER GOAL_RE MODE"   if debug?
-      score = {}
-      ## must always have ft for now e.g. 1-1 or such
-      ###  change to (generic) score from ft -
-      ##     might be score a.e.t. or such - why? why not?
-      score[:ft] = [m[:ft1].to_i(10),
-                    m[:ft2].to_i(10)]
-      ## note - for debugging keep (pass along) "literal" score
-      tokens << [:SCORE, [m[:score], score]]
-      ## auto-add player token
-      tokens << [:PLAYER, m[:name]]
-      offsets = [m.begin(0), m.end(0)]
-      pos = offsets[1]    ## update pos
+    elsif (m = START_GOAL_LINE_RE.match( line ))   ## line starting with ( - assume
+      ##  switch context to GOAL_RE (goalline(s))
+      ####
+      ##  note - check for alternate goal line styles / formats
+      if START_GOAL_LINE_COMPAT_RE.match(line )
+        ## "legacy" style starting with minute e.g.
+        ##  (6 Puskás 0-1, 9 Czibor 0-2, 11 Morlock 1-2, 18 Rahn 2-2,
+        ##    84 Rahn 3-2)
+        @re = GOAL_COMPAT_RE
+        puts "  ENTER GOAL_COMPAT_RE MODE"   if debug?
+        tokens << [:GOALS_COMPAT, "<|GOALS_COMPAT|>"]
+      elsif START_GOAL_LINE_ALT_RE.match( line )
+        ##  goals with scores e.g.
+        ##    (1-0 Franck Ribéry, 2-0 Ivica Olić, 2-1 Wayne Rooney)
+        ##         -or-
+        ##      (Dion Beljo  1-0
+        ##                   1-1  Andreas Gruber
+        ##   Matthias Seidl  2-1)
+        @re = GOAL_ALT_RE
+        puts "  ENTER GOAL_ALT_RE MODE"   if debug?
+        tokens << [:GOALS_ALT, "<|GOALS_ALT|>"]
+      else
+        ## "standard" / default style
+        @re = GOAL_RE
+        puts "  ENTER GOAL_RE MODE"   if debug?
-    ####  FIX/FIX/TODO
-    ### looks to hang in player with minute
-    ###  FIX - improve / rework PLAYER_WITH_MINUTE_RE  regex!!!!
-    elsif (_quick = QUICK_PLAYER_WITH_MINUTE_RE.match(line) &&
-                m = PLAYER_WITH_MINUTE_RE.match( line ))
-      ##  switch context to GOAL_RE (goalline(s)
-      ##   split token (automagically) into two!! - player AND minute!!!
-      @re = GOAL_RE
-      puts "  ENTER GOAL_RE MODE"   if debug?
-      ## check for optional open_bracket
-      tokens << [:'[']     if m[:open_bracket]
-      ## check for  -;  (none with separator)
-      ##    todo - find a better way? how possible?
-      tokens << [:NONE, "<|NONE|>"]   if m[:none]
-      ## auto-add player token first
-      tokens << [:PLAYER, m[:name]]
-      ## minute props
-      minute = {}
-      minute[:m]      = m[:value].to_i(10)
-      minute[:offset] = m[:value2].to_i(10)   if m[:value2]
-      ##  t is minute only
-      tokens << [:MINUTE, [m[:minute], minute]]
+        tokens << [:GOALS, "<|GOALS|>"]
+      end
+      ## note - eat-up ( for now
+      ##   pass along "virtual" GOALS or GOALS_ALT token
+      ##      (see INLINE_GOALS for the starting goal line inline)
       offsets = [m.begin(0), m.end(0)]
-      pos = offsets[1]    ## update pos
+      pos = offsets[1]    ## update pos
     end
   end
@@ -475,24 +752,105 @@ def _tokenize_line( line )
     ## note: racc requires pairs e.g. [:TOKEN, VAL]
     ##         for VAL use "text" or ["text", { opts }]  array
-  t = if @re == GEO_RE
-         ### note - possibly end inline geo on [ (and others?? in the future
-         if m[:space] || m[:spaces]
-            nil    ## skip space(s)
-         elsif m[:text]
-            [:GEO, m[:text]]   ## keep pos - why? why not?
-         elsif m[:timezone]
-            [:TIMEZONE, m[:timezone]]
-         elsif m[:sym]
-            sym = m[:sym]
-            ## return symbols "inline" as is - why? why not?
-            ## (?<sym>[;,@|\[\]-])
-            case sym
-            when ',' then [:',']
-            when '›' then [:',']  ## note - treat geo sep › (unicode) like comma for now!!!
-            when '>' then [:',']  ## note - treat geo sep > (ascii) like comma for now!!!
-            when '[' then
+  t = if @re == ROUND_DEF_RE
+           if m[:spaces] || m[:space]
+               nil    ## skip spaces
+           elsif m[:date]
+            [:DATE, [m[:date], _build_date( m )]]
+          elsif m[:duration]
+            [:DURATION, [m[:duration], _build_duration( m )]]
+          elsif m[:sym]
+              sym = m[:sym]
+              case sym
+              when '|' then  [:'|']
+              when ':' then  [:':']
+              when ',' then  [:',']
+              else
+                puts "!!! TOKENIZE ERROR (sym) - ignore sym >#{sym}<"
+                nil  ## ignore others (e.g. brackets [])
+              end
+           elsif m[:any]
+              ## todo/check log error
+               msg = "parse error (tokenize round_def) - skipping any match>#{m[:any]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
+               puts "!! WARN - #{msg}"
+               errors << msg
+               log( "!! WARN - #{msg}" )
+               nil
+            else
+              ## report error/raise expection
+               puts "!!! TOKENIZE ERROR - no match found"
+               nil
+            end
+      elsif @re == GROUP_DEF_RE
+           if m[:spaces] || m[:space]
+               nil    ## skip spaces
+           elsif m[:text]
+               [:TEAM, m[:text]]
+           elsif m[:sym]
+              sym = m[:sym]
+              case sym
+              when '|' then  [:'|']
+              when ':' then  [:':']
+              when ',' then  [:',']
+              else
+                puts "!!! TOKENIZE ERROR (sym) - ignore sym >#{sym}<"
+                nil  ## ignore others (e.g. brackets [])
+              end
+           elsif m[:any]
+              ## todo/check log error
+               msg = "parse error (tokenize group_def) - skipping any match>#{m[:any]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
+               puts "!! WARN - #{msg}"
+               errors << msg
+               log( "!! WARN - #{msg}" )
+               nil
+            else
+              ## report error/raise expection
+               puts "!!! TOKENIZE ERROR - no match found"
+               nil
+            end
+       elsif @re == GEO_RE
+           ### note - possibly end inline geo on [ (and others?? in the future
+           ## note: break on double spaces e.g.
+           ## e.g. Jul/16 @ Arena Auf Schalke, Gelsenkirchen  Serbia 0-1 England
+           if m[:spaces]
+                 ### note - do NOT break out
+                 ##           if not text seen yet!!!
+                 if geo_count > 0
+                    ## get out-off geo mode and backtrack (w/ next)
+                    puts "  LEAVE GEO_RE MODE, BACK TO TOP_LEVEL/RE"  if debug?
+                    @re = RE
+                    pos = old_pos
+                    next   ## backtrack (resume new loop step)
+                 else
+                     nil   ## skip spaces
+                 end
+           elsif m[:space]
+               nil    ## skip (single) space
+           elsif m[:text]
+               geo_count += 1
+               [:GEO, m[:text]]   ## keep pos - why? why not?
+           elsif m[:geo_end]   ## "hacky" special comma; always ends geo mode!!!
+                 ## get out-off geo mode and backtrack (w/ next)
+                 puts "  LEAVE GEO_RE MODE, BACK TO TOP_LEVEL/RE"  if debug?
+                 @re = RE
+                 pos = old_pos
+                 next   ## backtrack (resume new loop step)
+           elsif m[:sym]
+              sym = m[:sym]
+              ## return symbols "inline" as is - why? why not?
+              ## (?<sym>[;,@|\[\]-])
+              case sym
+                ## note - reset geo_count to 0 (avoids break on two spaces)
+                ##                     if separator seen!!
+              when ',' then geo_count = 0; [:',']
+              when '›' then geo_count = 0; [:',']  ## note - treat geo sep › (unicode) like comma for now!!!
+              when '>' then geo_count = 0; [:',']  ## note - treat geo sep > (ascii) like comma for now!!!
+              when '[' then
                  ## get out-off geo mode and backtrack (w/ next)
                  puts "  LEAVE GEO_RE MODE, BACK TO TOP_LEVEL/RE"  if debug?
                  @re = RE
@@ -554,19 +912,29 @@ def _tokenize_line( line )
                 ## report error - for unknown (inline) prop key in lineup
                 nil
               end
+         elsif m[:inline_captain]
+              [:INLINE_CAPTAIN, m[:inline_captain]]
+         elsif m[:inline_yellow]
+              card = {}
+              card[:m]      = m[:minute].to_i(10)  if m[:minute]
+              card[:offset] = m[:offset].to_i(10)  if m[:offset]
+              [:INLINE_YELLOW, [m[:inline_yellow], card]]
+         elsif m[:inline_red]
+              card = {}
+              card[:m]      = m[:minute].to_i(10)  if m[:minute]
+              card[:offset] = m[:offset].to_i(10)  if m[:offset]
+              [:INLINE_RED, [m[:inline_red], card]]
+         elsif m[:inline_yellow_red]
+              card = {}
+              card[:m]      = m[:minute].to_i(10)  if m[:minute]
+              card[:offset] = m[:offset].to_i(10)  if m[:offset]
+              [:INLINE_YELLOW_RED, [m[:inline_yellow_red], card]]
          elsif m[:prop_name]
-               if m[:name] == 'Y'
-                 [:YELLOW_CARD, m[:name]]
-               elsif m[:name] == 'R'
-                 [:RED_CARD, m[:name]]
-               else
-                 [:PROP_NAME, m[:name]]
-               end
+              [:PROP_NAME, m[:name]]
          elsif m[:minute]
               minute = {}
               minute[:m]      = m[:value].to_i(10)
               minute[:offset] = m[:value2].to_i(10)   if m[:value2]
-             ## note - for debugging keep (pass along) "literal" minute
              [:MINUTE, [m[:minute], minute]]
          elsif m[:sym]
             sym = m[:sym]
@@ -661,9 +1029,8 @@ def _tokenize_line( line )
               ## must always have ft for now e.g. 1-1 or such
               ###  change to (generic) score from ft -
               ##     might be score a.e.t. or such - why? why not?
-              score[:ft] = [m[:ft1].to_i(10),
-                            m[:ft2].to_i(10)]
-              ## note - for debugging keep (pass along) "literal" score
+              score[:score] = [m[:score1].to_i(10),
+                               m[:score2].to_i(10)]
               [:SCORE, [m[:score], score]]
          elsif m[:sym]
             sym = m[:sym]
@@ -680,30 +1047,107 @@ def _tokenize_line( line )
             puts "!!! TOKENIZE ERROR (PROP_PENALTIES_RE) - no match found"
             nil
          end
-      elsif @re == GOAL_RE || @re == PROP_GOAL_RE
+      elsif @re == GOAL_COMPAT_RE
          if m[:space] || m[:spaces]
               nil    ## skip space(s)
          elsif m[:prop_name]    ## note - change prop_name to player
              [:PLAYER, m[:name]]
          elsif m[:minute]
-              minute = {}
-              minute[:m]      = m[:value].to_i(10)
-              minute[:offset] = m[:value2].to_i(10)   if m[:value2]
-             ## note - for debugging keep (pass along) "literal" minute
+              minute = _build_minute( m )
              [:MINUTE, [m[:minute], minute]]
+         elsif m[:goal_type]
+              goal_type = _build_goal_type( m )
+             [:GOAL_TYPE, [m[:goal_type], goal_type]]
          elsif m[:score]
-              score = {}
-              ## must always have ft for now e.g. 1-1 or such
-              ###  change to (generic) score from ft -
-              ##     might be score a.e.t. or such - why? why not?
-              score[:ft] = [m[:ft1].to_i(10),
-                            m[:ft2].to_i(10)]
-              ## note - for debugging keep (pass along) "literal" score
-              [:SCORE, [m[:score], score]]
-         elsif m[:og]
-             [:OG, m[:og]]    ## for typed drop - string version/variants ??  why? why not?
-         elsif m[:pen]
-             [:PEN, m[:pen]]
+            score = {}
+             ##  note - score is "generic"
+            ##      might be full-time (ft) or
+            ##         after extra-time (aet) or such
+            ##         or even undecided/unknown
+            ##    thus, use score1/score2 and NOT ft1/ft2
+            score[:score] = [m[:score1].to_i(10),
+                             m[:score2].to_i(10)]
+            ## note - for debugging keep (pass along) "literal" score
+            [:SCORE, [m[:score], score]]
+         elsif m[:sym]
+            sym = m[:sym]
+            ## return symbols "inline" as is - why? why not?
+            ## (?<sym>[;,@|\[\]-])
+            case sym
+            when ',' then [:',']
+            when ')'  ## leave goal mode!!
+                puts "  LEAVE GOAL_COMPAT_RE MODE"   if debug?
+                @re = RE
+                ##  note - use/return GOAL_END token   - change to GOAL_END_PAREN(THESIS)
+                ##                                or GOAL_PAREN_CLOSE/END ???
+                [:GOALS_END, '<|GOALS_END|>']
+            else
+              nil  ## ignore others (e.g. brackets [])
+            end
+         else
+            ## report error
+            puts "!!! TOKENIZE ERROR (GOAL_COMPAT_RE) - no match found"
+            nil
+         end
+      elsif @re == GOAL_ALT_RE
+         if m[:space] || m[:spaces]
+              nil    ## skip space(s)
+         elsif m[:prop_name]    ## note - change prop_name to player
+             [:PLAYER, m[:name]]
+         elsif m[:goal_minute]
+              minute = _build_goal_minute( m )
+             [:GOAL_MINUTE, [m[:goal_minute], minute]]
+         elsif m[:goal_type]
+              goal_type = _build_goal_type( m )
+             [:GOAL_TYPE, [m[:goal_type], goal_type]]
+         elsif m[:score]
+            score = {}
+             ##  note - score is "generic"
+            ##      might be full-time (ft) or
+            ##         after extra-time (aet) or such
+            ##         or even undecided/unknown
+            ##    thus, use score1/score2 and NOT ft1/ft2
+            score[:score] = [m[:score1].to_i(10),
+                             m[:score2].to_i(10)]
+            ## note - for debugging keep (pass along) "literal" score
+            [:SCORE, [m[:score], score]]
+         elsif m[:sym]
+            sym = m[:sym]
+            ## return symbols "inline" as is - why? why not?
+            ## (?<sym>[;,@|\[\]-])
+            case sym
+            when ',' then [:',']
+            when ')'  ## leave goal mode!!
+                puts "  LEAVE GOAL_ALT_RE MODE"   if debug?
+                @re = RE
+                ##  note - use/return GOAL_END token   - change to GOAL_END_PAREN(THESIS)
+                ##                                or GOAL_PAREN_CLOSE/END ???
+                [:GOALS_END, '<|GOALS_END|>']
+            else
+              nil  ## ignore others (e.g. brackets [])
+            end
+         else
+            ## report error
+            puts "!!! TOKENIZE ERROR (GOAL_ALT_RE) - no match found"
+            nil
+         end
+      elsif @re == GOAL_RE
+         if m[:space] || m[:spaces]
+              nil    ## skip space(s)
+         elsif m[:goals_none]    ## note - eats-up semicolon!! e.g. -; or - ;
+             [:GOALS_NONE, "<|GOALS_NONE|>"]
+         elsif m[:goal_sep_alt]
+             [:GOAL_SEP_ALT, "<|GOAL_SEP_ALT|>" ]   ## e.g. dash (-) WITH leading & trailing space required
+         elsif m[:prop_name]    ## note - change prop_name to player
+             [:PLAYER, m[:name]]
+         elsif m[:goal_minute]
+              minute = _build_goal_minute( m )
+             [:GOAL_MINUTE, [m[:goal_minute], minute]]
+         elsif m[:goal_count]
+              count = _build_goal_count( m )
+              [:GOAL_COUNT, [m[:goal_count], count]]
          elsif m[:sym]
             sym = m[:sym]
             ## return symbols "inline" as is - why? why not?
@@ -712,8 +1156,14 @@ def _tokenize_line( line )
             case sym
             when ',' then [:',']
             when ';' then [:';']
-            when '[' then [:'[']
-            when ']' then [:']']
+            # when '[' then [:'[']
+            # when ']' then [:']']
+            when ')'  ## leave goal mode!!
+                puts "  LEAVE GOAL_RE MODE"   if debug?
+                @re = RE
+                ##  note - use/return GOAL_END token   - change to GOAL_END_PAREN(THESIS)
+                ##                                or GOAL_PAREN_CLOSE/END ???
+                [:GOALS_END, '<|GOALS_END|>']
             else
               nil  ## ignore others (e.g. brackets [])
             end
@@ -728,74 +1178,112 @@ def _tokenize_line( line )
         if m[:space] || m[:spaces]
            nil   ## skip space(s)
         elsif m[:text]
-          [:TEXT, m[:text]]   ## keep pos - why? why not?
+          ##  note - top-level (for now always) assumes TEAM for TEXT match!!
+          [:TEAM, m[:text]]   ## keep pos - why? why not?
         elsif m[:status]   ## (match) status e.g. cancelled, awarded, etc.
-          ## todo/check - add text (or status)
-          #     to opts hash {} by default (for value)
-          if m[:status_note]   ## includes note? e.g.  awarded; originally 2-0
-             [:STATUS, [m[:status], {status: m[:status],
-                                     note:   m[:status_note]} ]]
-          else
-             [:STATUS, [m[:status], {status: m[:status] } ]]
-          end
+            [:STATUS, [m[:status], _build_status( m ) ]]
+        elsif m[:inline_wo]   ## w/o - walkover  (match status)
+            [:INLINE_WO, m[:inline_wo]]
+        elsif m[:inline_np]   ## n/p - not played (match status)
+            [:INLINE_NP, m[:inline_np]]
+        elsif m[:inline_bye]  ## bye  (match status)
+            [:INLINE_BYE, m[:inline_bye]]
+        elsif m[:inline_abd]  ## abd/abd. - abandoned (match status)
+            [:INLINE_ABD, m[:inline_abd]]
+        elsif m[:inline_void]  ## abd/abd. - abandoned (match status)
+            [:INLINE_VOID, m[:inline_void]]
+        elsif m[:inline_susp]  ## susp/susp. - suspended (match status)
+            [:INLINE_SUSP, m[:inline_susp]]
+        elsif m[:inline_ppd]  ## ppd/ppd. or postp/postp. - postponed (match status)
+            [:INLINE_PPD, m[:inline_ppd]]
+        elsif m[:inline_awd]  ## awd/awd. - awarded (match status)
+            [:INLINE_AWD, m[:inline_awd]]
+        elsif m[:inline_canc]  ## canc/canc. - cancelled/canceled (match status)
+            [:INLINE_CANC, m[:inline_canc]]
+        elsif m[:team_home]
+            [:TEAM_HOME, m[:team_home]]
+        elsif m[:team_away]
+            [:TEAM_AWAY, m[:team_away]]
+        elsif m[:team_neutral]
+            [:TEAM_NEUTRAL, m[:team_neutral]]
+        elsif m[:attendance]
+             att = {}
+             att[:value] = m[:value].gsub( '_', '' ).to_i(10)
+             ## note - for token id use INLINE_ATTENDANCE  (ATTENDANCE in use for prop!!!)
+            [:INLINE_ATTENDANCE, [m[:attendance], att ]]
         elsif m[:note]
             ###  todo/check:
             ##      use value hash - why? why not? or simplify to:
             ## [:NOTE, [m[:note], {note: m[:note] } ]]
              [:NOTE, m[:note]]
-        elsif m[:score_note]
-             [:SCORE_NOTE, m[:score_note]]
         elsif m[:time]
-              ## unify to iso-format
-              ###   12.40 => 12:40
-              ##    12h40 => 12:40 etc.
-              ##  keep string (no time-only type in ruby)
-              hour =   m[:hour].to_i(10)  ## allow 08/07/etc.
-              minute = m[:minute].to_i(10)
-              ## check if valid -  0:00 - 24:00
-              ##   check if 24:00 possible? or only 0:00 (23:59)
-              if (hour >= 0 && hour <= 24) &&
-                 (minute >=0 && minute <= 59)
-               ## note - for debugging keep (pass along) "literal" time
-               ##   might use/add support for am/pm later
-               [:TIME, [m[:time], {h:hour,m:minute}]]
-              else
-                 raise ArgumentError, "parse error - time >#{m[:time]}< out-of-range"
-              end
+            [:TIME, [m[:time], _build_time(m)]]
         elsif m[:date]
-            date = {}
- ## map month names
- ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
-            date[:y]  = m[:year].to_i(10)  if m[:year]
-            ## check - use y too for two-digit year or keep separate - why? why not?
-            date[:yy] = m[:yy].to_i(10)    if m[:yy]    ## two digit year (e.g. 25 or 78 etc.)
-            date[:m] = m[:month].to_i(10)  if m[:month]
-            date[:m] = MONTH_MAP[ m[:month_name].downcase ]   if m[:month_name]
-            date[:d]  = m[:day].to_i(10)   if m[:day]
-            date[:wday] = DAY_MAP[ m[:day_name].downcase ]   if m[:day_name]
-            ## note - for debugging keep (pass along) "literal" date
-            [:DATE, [m[:date], date]]
-        elsif m[:duration]
-            ## todo/check/fix - if end: works for kwargs!!!!!
-            duration = { start: {}, end: {}}
-            duration[:start][:y] = m[:year1].to_i(10)  if m[:year1]
-            duration[:start][:m] = MONTH_MAP[ m[:month_name1].downcase ]   if m[:month_name1]
-            duration[:start][:d]  = m[:day1].to_i(10)   if m[:day1]
-            duration[:start][:wday] = DAY_MAP[ m[:day_name1].downcase ]   if m[:day_name1]
-            duration[:end][:y] = m[:year2].to_i(10)  if m[:year2]
-            duration[:end][:m] = MONTH_MAP[ m[:month_name2].downcase ]   if m[:month_name2]
-            duration[:end][:d]  = m[:day2].to_i(10)   if m[:day2]
-            duration[:end][:wday] = DAY_MAP[ m[:day_name2].downcase ]   if m[:day_name2]
-            ## note - for debugging keep (pass along) "literal" duration
-            [:DURATION, [m[:duration], duration]]
-        elsif m[:wday]    ## standalone weekday e.g. Mo/Tu/We/etc.
-             [:WDAY, [m[:wday], { wday: DAY_MAP[ m[:day_name].downcase ] } ]]
-        elsif m[:num]   ## fix - change to ord (for ordinal number!!!)
-              ## note -  strip enclosing () and convert to integer
-             [:ORD, [m[:num], { value: m[:value].to_i(10) } ]]
-        elsif m[:score_more]
+            [:DATE, [m[:date], _build_date(m)]]
+        elsif m[:date_legs]
+            [:DATE_LEGS, [m[:date_legs], _build_date_legs(m)]]
+        elsif m[:score_team]
+            [:SCORE_TEAM, [m[:score_team], _build_score_team(m)]]
+        elsif m[:score_team_pen]
+            [:SCORE_TEAM_PEN, [m[:score_team_pen], _build_score_team_pen(m)]]
+        elsif m[:score_team_num]
+            [:SCORE_TEAM_NUM, [m[:score_team_num], _build_score_team_num(m)]]
+          elsif m[:score_legs]
+              legs = {}
+              ### leg1
+              score = {}
+              score[:ft] = [m[:leg1_ft1].to_i(10),
+                            m[:leg1_ft2].to_i(10)]
+              legs['leg1'] = score
+              ### leg2
+              score = {}
+              score[:ft] = [m[:leg2_ft1].to_i(10),
+                            m[:leg2_ft2].to_i(10)]  if m[:leg2_ft1] && m[:leg2_ft2]
+              score[:et] = [m[:leg2_et1].to_i(10),
+                            m[:leg2_et2].to_i(10)]  if m[:leg2_et1] && m[:leg2_et2]
+              score[:p]  = [m[:leg2_p1].to_i(10),
+                            m[:leg2_p2].to_i(10)]  if m[:leg2_p1] && m[:leg2_p2]
+              legs['leg2'] = score
+              ## check for (opt) aggregate - keep on "top-level"
+              legs[:agg] = [m[:agg1].to_i(10),
+                            m[:agg2].to_i(10)]  if m[:agg1] && m[:agg2]
+              legs[:away] = true  if m[:away]
+              ## note - for debugging keep (pass along) "literal" score
+              [:SCORE_LEGS, [m[:score_legs], legs]]
+        elsif m[:score_full]
+              score = {}
+              score[:p] = [m[:p1].to_i(10),
+                           m[:p2].to_i(10)]  if m[:p1] && m[:p2]
+              score[:et] = [m[:et1].to_i(10),
+                            m[:et2].to_i(10)]  if m[:et1] && m[:et2]
+              score[:ft] = [m[:ft1].to_i(10),
+                            m[:ft2].to_i(10)]  if m[:ft1] && m[:ft2]
+              score[:ht] = [m[:ht1].to_i(10),
+                            m[:ht2].to_i(10)]  if m[:ht1] && m[:ht2]
+              score[:agg] = [m[:agg1].to_i(10),
+                             m[:agg2].to_i(10)]  if m[:agg1] && m[:agg2]
+              if m[:away1] && m[:away2]
+                 score[:away] = [m[:away1].to_i(10),
+                                 m[:away2].to_i(10)]
+              elsif m[:away]    ## fallback if no away score; check away flag
+                 score[:away] = true
+              end
+              ## add golden/silver flags
+              score[:golden] = true   if m[:aetgg]  ## golden goal (gg)/sudden death (sd)
+              score[:silver] = true   if m[:aetsg]  ## silver goal (sg)
+            ## note - for debugging keep (pass along) "literal" score
+            [:SCORE_FULL, [m[:score_full], score]]
+        elsif m[:score_fuller]
               score = {}
-              ## check for pen
               score[:p] = [m[:p1].to_i(10),
                            m[:p2].to_i(10)]  if m[:p1] && m[:p2]
               score[:et] = [m[:et1].to_i(10),
@@ -804,18 +1292,85 @@ def _tokenize_line( line )
                             m[:ft2].to_i(10)]  if m[:ft1] && m[:ft2]
               score[:ht] = [m[:ht1].to_i(10),
                             m[:ht2].to_i(10)]  if m[:ht1] && m[:ht2]
+              score[:agg] = [m[:agg1].to_i(10),
+                             m[:agg2].to_i(10)]  if m[:agg1] && m[:agg2]
+              if m[:away1] && m[:away2]
+                 score[:away] = [m[:away1].to_i(10),
+                                 m[:away2].to_i(10)]
+              elsif m[:away]    ## fallback if no away score; check away flag
+                 score[:away] = true
+              end
+              ## add aet flag true/false
+              # score[:aet] = true   if m[:aet] || m[:aetgg] || m[:aetsg]
+              ## add golden/silver flags
+              score[:golden] = true   if m[:aetgg]  ## golden goal (gg)/sudden death (sd)
+              score[:silver] = true   if m[:aetsg]  ## silver goal (sg)
             ## note - for debugging keep (pass along) "literal" score
-            [:SCORE_MORE, [m[:score_more], score]]
+            [:SCORE_FULLER, [m[:score_fuller], score]]
+        elsif m[:score_fuller_more]
+               ##    SCORE + SCORE_FULLER_MORE
+               ## note -  after extra-time (aet) or full-time (ft)
+               ##           score may be present in SCORE!!!
+              score = {}
+              score[:p] = [m[:p1].to_i(10),
+                           m[:p2].to_i(10)]  if m[:p1] && m[:p2]
+              score[:et] = [m[:et1].to_i(10),
+                            m[:et2].to_i(10)]  if m[:et1] && m[:et2]
+              score[:ft] = [m[:ft1].to_i(10),
+                            m[:ft2].to_i(10)]  if m[:ft1] && m[:ft2]
+              score[:ht] = [m[:ht1].to_i(10),
+                            m[:ht2].to_i(10)]  if m[:ht1] && m[:ht2]
+              score[:agg] = [m[:agg1].to_i(10),
+                             m[:agg2].to_i(10)]  if m[:agg1] && m[:agg2]
+              if m[:away1] && m[:away2]
+                 score[:away] = [m[:away1].to_i(10),
+                                 m[:away2].to_i(10)]
+              elsif m[:away]    ## fallback if no away score; check away flag
+                 score[:away] = true
+              end
+              ## add flag in score for et/ft/ht
+              score[:score] = 'et'   if m[:aet] || m[:aetgg] || m[:aetsg]
+              score[:score] = 'ft'   if m[:ft]
+              score[:score] = 'ht'   if m[:ht]
+              ## add golden/silver flags
+              score[:golden] = true   if m[:aetgg]  ## golden goal (gg)/sudden death (sd)
+              score[:silver] = true   if m[:aetsg]  ## silver goal (sg)
+            ## note - for debugging keep (pass along) "literal" score
+            [:SCORE_FULLER_MORE, [m[:score_fuller_more], score]]
         elsif m[:score]
             score = {}
-            ## must always have ft for now e.g. 1-1 or such
-            ###  change to (generic) score from ft -
-            ##     might be score a.e.t. or such - why? why not?
-            score[:ft] = [m[:ft1].to_i(10),
-                          m[:ft2].to_i(10)]
-          ## note - for debugging keep (pass along) "literal" score
+             ##  note - score is "generic"
+            ##      might be full-time (ft) or
+            ##         after extra-time (aet) or such
+            ##         or even undecided/unknown
+            ##    thus, use score1/score2 and NOT ft1/ft2
+            score[:score] = [m[:score1].to_i(10),
+                             m[:score2].to_i(10)]
+         ## note - for debugging keep (pass along) "literal" score
           [:SCORE, [m[:score], score]]
+        elsif m[:score_awd]   ## score awarded (awd/awd.)
+            score = {}
+            ### note - use "generic" score for now
+            ##         to match  A 3-0 B [awarded] etc.
+            score[:score] = [m[:score1].to_i(10),
+                             m[:score2].to_i(10)]
+            ## add score[:awarded] = true ???
+            ##    or only use match status to avoid duplicate?
+            [:SCORE_AWD, [m[:score_awd], score]]
+        elsif m[:score_abd]   ## score abandonded (abd/abd.)
+            score = {}
+            ### note - use "generic" score for now
+            score[:score] = [m[:score1].to_i(10),
+                             m[:score2].to_i(10)]
+            ## add score[:awarded] = true ???
+            ##    or only use match status to avoid duplicate?
+            [:SCORE_ABD, [m[:score_abd], score]]
       elsif m[:minute]
               minute = {}
               minute[:m]      = m[:value].to_i(10)
@@ -833,6 +1388,7 @@ def _tokenize_line( line )
           when '@'    ##  enter geo mode
             puts "  ENTER GEO_RE MODE"  if debug?
             @re = GEO_RE
+            geo_count = 0
             [:'@']
           when ',' then [:',']
           when ';' then [:';']
@@ -840,10 +1396,14 @@ def _tokenize_line( line )
           when '|' then [:'|']
           when '[' then [:'[']
           when ']' then [:']']
-          when '-' then [:'-']        # level 1 OR (classic) dash
-          when '--'   then [:'--']    # level 2
-          when '---'  then [:'---']   # level 3
-          when '----' then [:'----']  # level 4
+          when '-' then [:'-']
+          when '('    ## enter goal scorer mode on "free-floating" open paranthesis!!!
+             puts "  ENTER GOAL_RE MODE"   if debug?
+             @re = GOAL_RE
+              ## note - eat-up ( for now; do NOT pass along as token
+              ##       pass along "virutal" INLINE GOALS - why? why not?
+              [:INLINE_GOALS, "<|INLINE_GOALS|>"]
+          when ')' then [:')']
           else
             puts "!!! TOKENIZE ERROR (sym) - ignore sym >#{sym}<"
             nil  ## ignore others (e.g. brackets [])
@@ -884,21 +1444,24 @@ def _tokenize_line( line )
   end
-   if @re == GOAL_RE   ### ALWAYS switch back to top level mode
-     puts "  LEAVE GOAL_RE MODE, BACK TO TOP_LEVEL/RE"  if debug?
-     @re = RE
-   end
+  # if @re == GOAL_RE   ### ALWAYS switch back to top level mode
+  #   puts "  LEAVE GOAL_RE MODE, BACK TO TOP_LEVEL/RE"  if debug?
+  #   @re = RE
+  # end
    if @re == GEO_RE   ### ALWAYS switch back to top level mode
      puts "  LEAVE GEO_RE MODE, BACK TO TOP_LEVEL/RE"  if debug?
      @re = RE
    end
+   @re = RE  if @re == GROUP_DEF_RE   ### ALWAYS switch back to top level mode
+   @re = RE  if @re == ROUND_DEF_RE
    ##
    ## if in prop mode continue if   last token is [,-]
    ##        otherwise change back to "standard" mode
    if @re == PROP_RE            || @re == PROP_CARDS_RE ||
-      @re == PROP_GOAL_RE       || @re == PROP_PENALTIES_RE ||
+      @re == PROP_PENALTIES_RE ||
       @re == PROP_ATTENDANCE_RE || @re == PROP_REFEREE_RE
      if [:',', :'-', :';'].include?( tokens[-1][0] )
         ## continue/stay in PROP_RE mode