RubyGems - sportdb-parser - Versions diffs - 0.7.1 → 0.7.2 - Mend

sportdb-parser 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +1 -1
data/Manifest.txt +17 -4
data/lib/sportdb/parser/lexer-on_goal.rb +172 -0
data/lib/sportdb/parser/lexer-on_group_def.rb +31 -0
data/lib/sportdb/parser/lexer-on_prop_lineup.rb +79 -0
data/lib/sportdb/parser/lexer-on_prop_misc.rb +110 -0
data/lib/sportdb/parser/lexer-on_prop_penalties.rb +40 -0
data/lib/sportdb/parser/lexer-on_round_def.rb +37 -0
data/lib/sportdb/parser/lexer-on_top.rb +125 -0
data/lib/sportdb/parser/lexer-prep_doc.rb +131 -0
data/lib/sportdb/parser/lexer-prep_line.rb +63 -0
data/lib/sportdb/parser/lexer-tokenize.rb +449 -0
data/lib/sportdb/parser/lexer.rb +133 -1363
data/lib/sportdb/parser/lexer_buffer.rb +8 -37
data/lib/sportdb/parser/lexer_token.rb +126 -0
data/lib/sportdb/parser/parser.rb +1104 -1403
data/lib/sportdb/parser/racc_parser.rb +36 -32
data/lib/sportdb/parser/racc_tree.rb +65 -98
data/lib/sportdb/parser/token-date--helpers.rb +130 -0
data/lib/sportdb/parser/token-date--names.rb +108 -0
data/lib/sportdb/parser/token-date.rb +20 -192
data/lib/sportdb/parser/token-date_duration.rb +8 -27
data/lib/sportdb/parser/token-geo.rb +16 -16
data/lib/sportdb/parser/token-goals--helpers.rb +114 -0
data/lib/sportdb/parser/token-goals.rb +103 -249
data/lib/sportdb/parser/token-group.rb +8 -22
data/lib/sportdb/parser/token-prop.rb +138 -124
data/lib/sportdb/parser/token-prop_name.rb +48 -39
data/lib/sportdb/parser/token-round.rb +21 -35
data/lib/sportdb/parser/token-score--helpers.rb +189 -0
data/lib/sportdb/parser/token-score.rb +9 -393
data/lib/sportdb/parser/token-score_full.rb +331 -0
data/lib/sportdb/parser/token-status.rb +44 -46
data/lib/sportdb/parser/token-status_inline.rb +112 -0
data/lib/sportdb/parser/token-text.rb +41 -31
data/lib/sportdb/parser/token-time.rb +29 -26
data/lib/sportdb/parser/token.rb +58 -159
data/lib/sportdb/parser/version.rb +1 -1
data/lib/sportdb/parser.rb +45 -17
metadata +19 -6
data/lib/sportdb/parser/blocktxt.rb +0 -99
data/lib/sportdb/parser/lexer_tty.rb +0 -111
data/lib/sportdb/parser/token-table.rb +0 -149
data/lib/sportdb/parser/token_helpers.rb +0 -92

data/lib/sportdb/parser/lexer.rb CHANGED Viewed

@@ -7,6 +7,9 @@ class Lexer
 def log( msg )
    ## append msg to ./logs.txt
    ##     use ./errors.txt - why? why not?
+   ##
+   ##  change to ./logs_lexer.txt or such - why? why not?
+   ##    auto-add/prepend  [Lexer] and timestamp!!!  to msg - why? why not?
    File.open( './logs.txt', 'a:utf-8' ) do |f|
      f.write( msg )
      f.write( "\n" )
@@ -14,387 +17,125 @@ def log( msg )
 end
-  ###
-  ##  todo/fix -   use LangHelper or such
-  ##   e.g.     class Lexer
-  ##                include LangHelper
-  ##            end
-  ##
-  ##  merge back Lang into Lexer - why? why not?
-  ## keep "old" access to checking for group, round & friends
-  ##    for now for compatibility
-  def is_group?( text )  Lang.is_group?( text ); end
-  def is_round?( text )  Lang.is_round?( text ); end
+def _trace( *args )
+  if debug?
+    print "[DEBUG] Lexer -- "
+    args.each { |arg| puts args }
+  end
+end
+def _warn( *args )
+  print "!! [WARN] Lexer -- "
+  args.each { |arg| puts args }
+end
+def _info( *args )
+  print "[INFO] Lexer -- "
+  args.each { |arg| puts args }
+end
 def debug?()  @debug == true; end
-def initialize( lines, debug: false )
-   raise ArgumentError, "(string) text expected for lexer; got #{lines.class.name}"  unless lines.is_a?(String)
-   @debug = debug
-   @txt   = lines
-end
-HTML_COMMENT_RE = %r{  <!--
-                            .*?   ## note - use non-greedy/lazy *? match
-                         -->
-                       }xm      ## note - turn on multi-line match (for dot (.))
-##
-##  note - [] block may NOT incl. square brackets
-##       what about comments (e.g. #)?
-##    todo/check - rename to NOTE_BLOCK or TEXT_BLOCK or ???
-PREPROC_BLOCK_RE = %r{  \[
-                      [^\[\]\#]*?  ## note - use non-greedy/lazy *? match
-                  \]
-                        }xm  ## note - turn on multi-line match (for dot(.))
+def initialize( txt, debug: false )
+   raise ArgumentError, "text as string expected for lexer; got #{txt.class.name}"  unless txt.is_a?(String)
+   @txt   = txt
+   @debug = debug
+end
-##
-## check for "literal"  (multi-line) note blocks
-##   eg.  nb:  or note:
-##   space required after double colon - why? why not?
-PREPROC_NOTA_BENE_RE = %r{
-         ^
-    [ ]* (?: nb | note) [ ]* : [ ]+
-       .+?  ## non-greedy
-    ## positive lookahead
-    ##    note - must end with blank line or end-of-file/document
-    ##   note - do NOT eat-up trailing hrule (---)
-      (?=      (?: \n [ ]* -{3,} [ ]*)?
-                   \n[ ]*\n
-               | \z
-        )
-}xim
-##
-##  replace "escaped" newline with non-newline char e.g. '↵'
-LINE_CONTINUATION_RE = %r{
-                           \\[ ]* \n
-                        }x
+def tokenize_with_errors
-###
-##  check for magic comments
-##     e.g  # teletype: true    or TELETYPE: TRUE
-##             tty/teletype
+    tokens_by_line = []   ## note: add tokens line-by-line (flatten later)
+    errors         = []   ## keep a list of errors - why? why not?
-MAGIC_COMMENT_RE = %r{  \A
-                         [ ]*    ## optional leading spaces
-                        \#+      ##  note - allow ##,###, etc. too
-                         [ ]*    ## optional spaces
-                           (?<magic_comment_key> tty | teletype )
-                         [ ]*    ## optional spaces
-                            :
-                         [ ]*    ## optional spaces
-                            (?<magic_comment_value> true | false )
-                         [ ]*    ## optional trailing spaces
-                        \z
-                      }ix
+    txt = _prep_doc( @txt )
+    ####
+    ## quick hack - keep re state/mode between tokenize calls!!!
+    @re  ||= RE     ## note - switch between RE & INSIDE_RE
+    lineno = 0
+    txt.each_line do |line|
+        lineno += 1
-def tokenize_with_errors
+        ## todo - "inlined virtual/collapsed/folded newlines"
+        ##   check for "↵" !!!
+        ##   and add to lineno
-####
-##   flags / modes
-    @teletype = false     # use magic comment - tty/teletype: true
+        ## note - KEEP leading spaces for indent
+        ##         use rstrip (NOT left/leading & right/trainling strip) only!!
+        ## note -   remove/strip trailing newline (and optional spaces)!!!
+        ##          trailing whitespace may incl. \n or \r\n!!!
+        line = line.rstrip
-    tokens_by_line = []   ## note: add tokens line-by-line (flatten later)
-    errors         = []   ## keep a list of errors - why? why not?
-   ##  preprocess automagically - why? why not?
-    ##   strip lines with comments and empty lines striped / removed
-    ##      keep empty lines? why? why not?
-    ##      keep leading spaces (indent) - why?
-    ##
-    ##  note - KEEP empty lines (get turned into BLANK token!!!!)
-    ##  "universal" newlines
-    ##    replace all windows-style  cr+lf (\r\n) to lf (\n) only
-    txt = @txt.gsub( "\r\n", "\n" )
-    ###
-    ## quick hack for now
-    ##   remove  html-style comments <!-- -->
-    ##           (incl. multi-line)  with two spaces
-    ##       will mess-up lineno tracking!!!
-    ##    fix later to have function lineno & colno!!!
-    txt = @txt.gsub( HTML_COMMENT_RE ) do |m|
-                        puts " [debug] preproc html comment:"
-                        puts m
-                        '  '
-                   end
-=begin
-##
-##  todo/fix - add a command line switch/option for auto-format fixes !!!
-   ##  quick hack - remove later
-   ##    auto-convert "old" legacy round markers (»)
-   txt = txt.gsub( %r{^ [ ]*
-                          »
-                        (?= [ ]+)  ## require one trailing space for now!!
-                        }ix ) do |_|
-                     puts "!! WARN - auto-fix format; replacing old (alternate/legacy) round marker (»)"
-                        '▪'
-                    end
-###  16.00 => 16:00
-##     todo/check - use space for positive lookbehind & ahead
-##                      (instead of \b) - why? why not?
-##  note - check for/exclude 12.12.  date in match
-##             use negative lookahead
-##   check for 12.12.94
-##      use   positive lookbehind   !!!
-##               must be space, comma or begin-of-line [ ,]|^
-##    or use negative lookbehind
-##               must NOT be dot
-   txt = txt.gsub(  %r{
-                        ## check NEGATIVE lookbehind
-                         (?<! [.])  ## do NOT match 12.94 in 12.12.94
-                          \b
-                        (?<h>\d{1,2})
-                           \.
-                        (?<m>\d{2})
-                          \b
-                        (?! [.] )   ## do NOT match 12.12.
-                        }ix ) do |_|
-                           m = $~   ## is $LAST_MATCH_DATA
-                        puts "!! WARN - auto-fix format; replacing old (alternate/legacy) time format #{m[0]}"
-                           "#{m[:h]}:#{m[:m]}"   ## '\1:\2'
-                        end
-=end
-    ###
-    ## add more "native" multi-line comment-styles
-    ##  e.g.    #[[ ... ]]  or  #<<< .. >>> or #<< .. >>
-    ##                 or such - why? why not?
-   txt = txt.gsub( PREPROC_NOTA_BENE_RE ) do |m|
-       if m.include?( "\n" )   ## check for newlines (\n) and replace
-         puts " [debug] preproc (multi-line) note/nota bene block:"
-         puts m
-         ## todo/check: replace with two spaces insead of ↵ - why? why not?
-         m.gsub( "\n", '↵' )
-       else
-         m
-       end
-    end
+        ###  skip comments
+        ##      todo/check - change to blank line
+        ##                     to keep lineno (closer to orginal) - why? why not?
+        next  if line.match?(/\A  [ ]* ## optional leading space(s)
+                                   \#
+                                    /x )
+        ##  strip (inline) end-of-line comments (from line)
+        ##    check/discuss: make - inline comment require trailing space
+        ##                      e.g.   #1 vs # 1   - why? why not?
+        line = line.sub( /   [ ]*      ## (eat-up) optional leading space(s)
+                              \#{1,}.*?
+                             \z
+                            /x, '' )
-   ##
-   ## e.g. used in (multi-line) TableNote
-   ##  1.SOUTH KOREA   6  5  1  0 22- 1 16  [0-0]
-   ##  2.LEBANON       6  3  1  2 11- 8 10  [0-2, 0-0]
-   ##  3.Turkmenistan  6  3  0  3  8-11  9  [3-1]
-   ##  4.Sri Lanka     6  0  0  6  2-23  0  [0-1]
-   ##  -.North Korea   [withdrew after playing 5 matches due to safety concerns in
-   ##                   connection with the Covid-19 pandemic; all results annulled]
-   ##
-   ##  note - no longer used for now
-   ##     enclose multi-line notes in []
-   ##         removes need for line continuation for now
-##
-##   txt = txt.gsub( LINE_CONTINUATION_RE ) do |_|
-##            puts " [debug] preproc line continuation"
-##              ## todo/check: replace with two spaces insead of ↵ - why? why not?
-##               '↵'
-##         end
-    #####
-    ## (another) quick hack for now
-    ##   turn multi-line note blocks into
-    ##             single-line note blocks
-    ##             by changing newline (\n) to ⏎ (unicode U+23CE)
-    ##              or why not  to ___ ?
-    ##
-    ##  unicode options for return/arrows:
-    ##   -  ↵ (U+21B5): Downwards Arrow With Corner Leftwards.
-    ##                This is the most common "carriage return" symbol.
-    ##   -  ⏎ (U+23CE): Return Symbol.
-    ##               Specifically designated as the keyboard's "Return" key symbol,
-    ##                often used in user interfaces.
-    txt = txt.gsub( PREPROC_BLOCK_RE ) do |m|
-       if m.include?( "\n" )   ## check for newlines (\n) and replace
-         puts " [debug] preproc (multi-line) block:"
-         puts m
-         ## todo/check: replace with two spaces insead of ↵ - why? why not?
-         m.gsub( "\n", '↵' )
-       else
-         m
-       end
-    end
+        ####
+        #  support __END__ marker to cut-off input
+        break if line.match?( /\A [ ]*   ## optional leading space(s)
+                                   __END__
+                                 \z
+                               /x )
-    ####
-    ## quick hack - keep re state/mode between tokenize calls!!!
-    @re  ||= RE     ## note - switch between RE & INSIDE_RE
-    txt.each_line do |line|
-        ## line = line.rstrip   ## note - MUST remove/strip trailing newline (spaces optional)!!!
-        line = line.strip   ## note - strip leading AND trailing whitespaces
-                            ## note - trailing whitespace may incl. \n or \r\n!!!
-        ##
-        ###
-        ##  check for magic comments
-        ##     e.g  # teletype: true    or TELETYPE: TRUE
-        ##             tty/teletype
-        if line.start_with?('#')   ###  skip comments (& check magic comments!!)
-           if (m = MAGIC_COMMENT_RE.match(line))
-              magic_comment_key   = m[:magic_comment_key].downcase
-              magic_comment_value = m[:magic_comment_value].downcase
-              ##   turn on teletype mode
-              ## e.g.  tty: true  or teletype: true
-              if ['tty', 'teletype'].include?( magic_comment_key ) &&
-                 ['true'].include?( magic_comment_value )
-                 puts " magic comment - turn on teletype (tty) mode"
-                 @teletype = true
-              end
-           end
-           next
-        end
-        line = line.sub( /#.*/, '' ).strip   ###  cut-off end-of line comments too
+        ## auto-fixes line-by-line (e.g. check for tabs, smart quotes, etc.)
+        line = _prep_line( line )
-        ####
-        #  support __END__ marker to cut-off input
-        break if line.strip == '__END__'
-       ##
-       ##  first check for tabs
-       ##    add error/warn
-       ##    for auto-fix - replace tabs with two spaces
-        line = line.gsub( "\t" ) do |_|
-                  ## report error here
-                  ## todo/add error here
-                  puts "!! WARN - auto-fix; replacing tab (\\t) with two spaces in line #{line.inspect}"
-                   "  "   ## replace with two spaces
-                 end
-        ## U+00A0 (160)  -- non-breaking space (unicode)
-        line = line.gsub( "\u00A0" ) do |uni|
-                  ## report error here
-                  ## todo/add error here
-                  puts "!! WARN - auto-fix; replacing non-breaking unicode space (#{uni}/#{uni.ord}) w/ ascii space ( /#{" ".ord}) in line #{line.inspect}"
-                   " "   ## replace with space
-                 end
-        ###
-        ## todo/fix - print unicode numbers for [–−]
-        ##                different candidates to differentiate and document!!!
-        ##   – => U+2013 (8211)     -- En Dash     (unicode)
-        ##   − => U+2212 (8722)     -- Minus Sign  (unicode)
-        line = line.gsub( /[–−]/ ) do |uni|
-                  ## report error here
-                  ## todo/add error here
-                  puts "!! WARN - auto-fix; replacing unicode dash (#{uni}/#{uni.ord}) w/ ascii dash (-/#{"-".ord}) in line #{line.inspect}"
-                   '-'   ## replace with ascii dash (-)
-                  end
-        puts "line: >#{line}<"    if debug?
+        _trace( "line #{lineno}: >#{line}<" )
         ######
         ### special case for empty line (aka BLANK)
         if line.empty?
            ## note - blank always resets parser mode to std/top-level!!!
            @re = RE
-           tokens_by_line << [[:BLANK, '<|BLANK|>']]
+           tokens_by_line << [Token.virtual(:BLANK, lineno: lineno)]
         elsif (m = HEADING_RE.match(line))
            ## note - heading always resets parser mode to std/top-level!!!
            @re = RE
-           puts "   HEADING"  if debug?
+           _trace( 'HEADING' )
            ## note - derive heading level from no of (leading) markers
            ##             e.g. = is 1, == is 2, == is 3, etc.
-           heading_level = m[:heading_marker].size
-           tokens_by_line << [[:"H#{heading_level}", m[:heading]]]
+           heading_level = m[:heading_marker].size
+           tokens_by_line << [Token.new(:"H#{heading_level}", m[:heading], lineno: lineno)]
         elsif (m = NOTA_BENE_RE.match(line))
            ## note - nota bene always resets parser mode to std/top-level!!!
            @re = RE
-           tokens_by_line << [[:NOTA_BENE, m[:nota_bene]]]
-       elsif @re == RE && (m = TABLE_RE.match(line))
-            @re = TABLE_MORE_RE  ## switch into table mode
-            if m[:table_heading]
-              tokens_by_line << [[:TABLE_HEADING, m[:table_heading]]]
-            else  ## assume table (line) e.g. m[:table]
-              tokens_by_line << [[:TABLE_LINE, line]]
-            end
-        elsif @re == TABLE_MORE_RE
-            ### todo/fix - check if no match and report/add error!!
-            ##        for now (ummatched) line gets auto-added as table line!!!
-            ##
-            ##   note - MUST be followed by blank line (or nota bene/heading)
-            ##            to switch back into to top-level!!!!
-            m = TABLE_MORE_RE.match(line)
-            if m[:table_note]
-              tokens_by_line << [[:TABLE_NOTE, m[:table_note]]]
-            elsif m[:table_divider]
-              tokens_by_line << [[:TABLE_DIVIDER, m[:table_divider]]]
-            else  ## assume table (line) e.g. m[:table]
-              tokens_by_line << [[:TABLE_LINE, line]]
-            end
-        elsif @re != TABLE_MORE_RE &&  (m = HRULER_RE.match(line))
-           ## note - hruler (---)
-           ##          will only match if NOT in table mode!!!
-           ##   otherwise
-           ##      hruler always resets parser mode to std/top-level!!!
-           @re = RE
-           tokens_by_line << [[:HRULER, '<|HRULER|>']]
-        elsif @teletype && (@re == RE && IS_TTY_LINE_RE.match(line))
-            ## try experimental TELETYPE (TTY) mode!!!
-            ##    note - turn on via magic comment e.g.  tty/teletype: true
-            ###
-            ###    move inside _tokenize_line - why? why not?
-            tokens_by_line << _tokenize_tty_line( line )
-            ##   note - dates such as
-            ##         APR 11 or 11 APR   will trigger TELETYPE
-            ###    ## check letter
+           tokens_by_line << [Token.new(:NOTA_BENE, m[:nota_bene], lineno: lineno)]
         else
-          more_tokens, more_errors = _tokenize_line( line )
-          tokens_by_line  << more_tokens
+          more_tokens, more_errors = _tokenize_line( line, lineno )
+          tokens_by_line  << more_tokens
           errors          += more_errors
         end
     end # each line
@@ -402,1084 +143,113 @@ def tokenize_with_errors
+    tokens_by_line = tokens_by_line.map do |tokens|
-    tokens_by_line = tokens_by_line.map do |tokens|
         #################
-        ##    transform tokens (using simple patterns)
-        ##      to help along the (racc look ahead 1 - LA1) parser
+        ##    transform tokens (using simple patterns)
+        ##      to help along the (racc look ahead 1 - LA1) parser
         nodes = []
         buf = Tokens.new( tokens )
         ## pp buf
     loop do
           break if buf.eos?
           if buf.match?( :DATE, :TIME )   ## merge DATE TIME into DATETIME
-               date = buf.next[1]
-               time = buf.next[1]
+               date = buf.next
+               time = buf.next
                ## puts "DATETIME:"
                ## pp date, time
                ##  note:  time value is { time: {} } or
                ##                       { time: {}, time_local {} }
-               val =  [date[0] + ' ' + time[0],  ## concat string of two tokens
-                        { date: date[1] }.merge( time[1] )
-                      ]
-               nodes << [:DATETIME, val]
-         ### support  date time with comma too - why? why not?
-         elsif buf.match?( :DATE, :',', :TIME )
-               date  = buf.next[1]
-               _    = buf.next  ## ignore comma
-               time = buf.next[1]
+               text  = date.text + ' ' + time.text,  ## concat string of two tokens
+               value = { date: date.value }.merge( time.value )
+               nodes << Token.new(:DATETIME, text,
+                                      lineno: date.lineno,
+                                      offset: [date.offset[0],
+                                               time.offset[1]],
+                                      value: value )
+          ### support  date time with comma too - why? why not?
+          elsif buf.match?( :DATE, ',', :TIME )
+               date = buf.next
+               _    = buf.next  ## ignore comma
+               time = buf.next
                ## puts "DATETIME:"
                ## pp date, time
-               val =  [date[0] + ', ' + time[0],  ## concat string of two tokens
-                        { date: date[1] }.merge( time[1] )
-                      ]
-               nodes << [:DATETIME, val]
-        elsif buf.match?( :TEAM, :SCORE_TEAM )
-            ## merge TEAM SCORE_TEAM into TEAMALT
-            ##     (use TEAMENTRY or TEAMRESULT - why? why not?)
-               team       = buf.next[1]
-               score_team = buf.next[1]
-               val =  [team + ' ' + score_team[0],  ## concat string of two tokens
-                        { team: team }.merge( score_team[1] )
-                      ]
-               nodes << [:TEAMALT, val]
-        elsif buf.match?( :TEAM, :SCORE_TEAM_PEN )
-               team           = buf.next[1]
-               score_team_pen = buf.next[1]
-               val =  [team + ' ' + score_team_pen[0],  ## concat string of two tokens
-                        { team: team }.merge( score_team_pen[1] )
-                      ]
-               nodes << [:TEAMALT_PEN, val]
-        elsif buf.match?( :TEAM, :SCORE_TEAM_NUM )
-               team           = buf.next[1]
-               score_team_num = buf.next[1]
-               val =  [team + ' ' + score_team_num[0],  ## concat string of two tokens
-                        { team: team }.merge( score_team_num[1] )
-                      ]
-               nodes << [:TEAMALT_NUM, val]
-         elsif buf.match?( :GOAL_MINUTE, :',', :GOAL_MINUTE )
+               text  = date.text + ', ' + time.text  ## concat string of two tokens
+               value =  { date: date.value }.merge( time.value )
+               nodes << Token.new(:DATETIME, text,
+                                      lineno: date.lineno,
+                                      offset: [date.offset[0],
+                                               time.offset[1]],
+                                     value: value )
+          elsif buf.match?( :GOAL_MINUTE, ',', :GOAL_MINUTE )
              ## note - only advance by two tokens!
              ##     allows more :GOAL_MINUTE sequences!! e.g. 12,13,14 etc!!!
-             ##
+             ##
              ## help parser with comma shift/reduce conflict
              ##   change ',' to GOAL_MINUTE_SEP !!!
-             nodes << buf.next   ## pass through goal_minute
-             _ = buf.next  ## eat-up goal_minute_sep a.k.a. comma (,)
+             nodes << buf.next   ## pass through goal_minute
+             comma = buf.next  ## eat-up goal_minute_sep a.k.a. comma (,)
                            ##   and replace with dedicated sep(arator)
-             nodes << [:GOAL_MINUTE_SEP,"<|GOAL_MINUTE_SEP|>"]
-         elsif buf.match?( :',', :INLINE_ATTENDANCE )
-             ## note  - allow optional comma before inline attendance
+             nodes << Token.new( :GOAL_MINUTE_SEP,
+                                      comma.text,
+                                      lineno: comma.lineno,
+                                      offset: comma.offset,
+                                      value:  comma.value)
+          elsif buf.match?( ',', :INLINE_ATTENDANCE )
+             ## note  - allow optional comma before inline attendance
              ## help parser with comma shift/reduce conflict
              ##   change ',' to INLINE_ATTENDANCE_SEP !!!
-             nodes << [:INLINE_ATTENDANCE_SEP, "<|INLINE_ATTENDANCE_SEP|>"]
-             _ = buf.next  ## eat-up inline_attendance_sep a.k.a. comma (,)
+             comma = buf.next  ## eat-up inline_attendance_sep a.k.a. comma (,)
                            ##   and replace with dedicated sep(arator)
-             nodes << buf.next   ## pass through inline_attendance
+             nodes << Token.new(:INLINE_ATTENDANCE_SEP,
+                                    comma.text,
+                                    lineno: comma.lineno,
+                                    offset: comma.offset,
+                                    value:  comma.value)
+             nodes << buf.next   ## pass through inline_attendance
           else
              ## pass through
              nodes << buf.next
           end
     end  # loop
-    nodes
+    nodes
   end  # map tokens_by_line
+    ## puts "tokens_by_line:"
+    ## pp tokens_by_line
     ## flatten tokens
     tokens = []
-    tokens_by_line.each do |tok|
+    tokens_by_line.each do |tok_line|
-         if debug?
-           pp tok
-         end
+        ## if debug?
+        ##   pp tok_line
+        ## end
+         tokens  += tok_line
-     ###############
-     ##   "hacky" (automagic) line merges (remove newline)
-           ## if line start with @  - check if incl. teams
-     ###
-     ### quick merge lines hack
-     ##    if line starts with geo-marker token @
-     ##            check if line incl. TEAM
-     ##           if yes, leave alone
-     ##            otherwise  merge line into previous line!!
-     ##       - todo/fix - handle in possibly in grammar!!!
-     ##        for now match_line CAN start with @ London
-     ##                 resulting in parser conflict(s)!!!
-     ##    e.g.
-     ##       England v Scotland
-     ##          @ London
-     ##          =>
-     ##        England v Scotland @ London
-     ##
-     ##
-     ##  note/todo - if INDENT / SPACES get added
-     ##                adjust here
-     ##   tok[0][0] == :INDENT  (or :SPACES) &&
-     ##   tok[1][0] == :'@'
-           if tok[0] && tok[0][0] == :'@'
-                team =  tok.find { |t| t[0] == :TEAM }
-                if team
-                   ## do nothing - keep as is (assume match_line starting w/ @)
-                else
-                  ## no team(s) found in line
-                  ##    remove last token (that is, NEWLINE)
-                  ##   note - possibly is blank ?!  keep blank
-                  tokens.pop  if tokens[-1][0] == :NEWLINE
-                end
-           end
-         tokens  += tok
          ## auto-add newlines  (unless BLANK!!)
-         tokens  << [:NEWLINE, "\n"]   unless tok[0] && tok[0][0] == :BLANK
+         unless tok_line[0] && tok_line[0].type == :BLANK
+            ## note - reuse lineno from first token in line
+            ##                  use last - why? why not?
+            tokens  << Token.newline( lineno: tok_line[0].lineno )
+         end
     end
     [tokens,errors]
-end   # method tokenize_with_errors
-def _tokenize_line( line )
-  tokens = []
-  errors = []   ## keep a list of errors - why? why not?
+end   # method tokenize_with_errors
-  pos = 0
-  ## track last offsets - to report error on no match
-  ##   or no match in end of string
-  offsets = [0,0]
-  m = nil
-  ## track number of geo text seen
-  ##    (use for - do NOT break on two spaces if no geo text seen yet!!)
-  geo_count = 0
-  ####
-  ## quick hack - keep re state/mode between tokenize calls!!!
-  @re  ||= RE     ## note - switch between RE & INSIDE_RE
-  if @re == RE  ## top-level
-    ### check for modes once (per line) here to speed-up parsing
-    ###   for now goals only possible for start of line!!
-    ###        fix - remove optional [] - why? why not?
-    ####
-    ## note - ord e.g. (45) for match number can only start a (match) line
-    ##                "inline" use NOT possible
-    ## note -  ord (for ordinal number!!!) e.g match number (1), (42), etc.
-    if (m = START_WITH_ORD.match(line))
-       ## note -  strip enclosing () and convert to integer
-       tokens << [:ORD, [m[:ord], { value: m[:value].to_i(10) } ]]
-       offsets = [m.begin(0), m.end(0)]
-       pos = offsets[1]    ## update pos
-    elsif (m = START_WITH_YEAR.match(line))
-       ## note -  strip enclosing () and convert to integer
-       tokens << [:YEAR, m[:year].to_i(10)]
-       offsets = [m.begin(0), m.end(0)]
-       pos = offsets[1]    ## update pos
-    ###
-    ##  todo/fix - rename to START_GROUP_DEF_LINE_RE !!!!
-    elsif (m = GROUP_DEF_LINE_RE.match( line ))
-      puts "  ENTER GROUP_DEF_RE MODE"   if debug?
-      @re = GROUP_DEF_RE
-      tokens << [:GROUP_DEF, m[:group_def]]
-      offsets = [m.begin(0), m.end(0)]
-      pos = offsets[1]    ## update pos
-    ###  todo/fix - rename to PROP_KEY_RE to START_WITH_PROP_KEY_RE !!!
-    elsif (m = PROP_KEY_RE.match( line ))
-      ##  start with prop key (match will switch into prop mode!!!)
-      ##   - fix - remove leading spaces in regex (upstream) - why? why not?
-      ##
-      ###  switch into new mode
-      ##  switch context  to PROP_RE
-        puts "  ENTER PROP_RE MODE"   if debug?
-        key = m[:key]
-        ### todo/fix - add prop yellow/red cards too - why? why not?
-        ##  todo/fix - separate sent off and red card
-        ##     sent-off - incl. red card, yellow/red card and the era before red cards!!
-        if ['sent off'].include?( key.downcase)
-          @re = PROP_CARDS_RE    ## use CARDS_RE ???
-          tokens << [:PROP_SENTOFF, m[:key]]
-        elsif ['red cards'].include?( key.downcase )
-          @re = PROP_CARDS_RE    ## use CARDS_RE ???
-          tokens << [:PROP_REDCARDS, m[:key]]
-        elsif ['yellow cards'].include?( key.downcase )
-          @re = PROP_CARDS_RE
-          tokens << [:PROP_YELLOWCARDS, m[:key]]
-        elsif ['ref', 'referee',
-               'refs', 'referees'   ## note - allow/support assistant refs
-              ].include?( key.downcase )
-          @re = PROP_REFEREE_RE
-          tokens << [:PROP_REFEREE, m[:key]]
-        elsif ['att', 'attn', 'attendance'].include?( key.downcase )
-          @re = PROP_ATTENDANCE_RE
-          tokens << [:PROP_ATTENDANCE, m[:key]]
-     #   elsif ['goals'].include?( key.downcase )
-     #     @re = PROP_GOAL_RE
-     #     tokens << [:PROP_GOALS, m[:key]]
-        elsif ['penalties',
-               'penalty shootout',
-               'penalty shoot-out',
-               'penalty kicks'].include?( key.downcase )
-          @re = PROP_PENALTIES_RE
-          tokens << [:PROP_PENALTIES, m[:key]]
-        else   ## assume (team) line-up
-          @re = PROP_RE           ## use LINEUP_RE ???
-          tokens << [:PROP, m[:key]]
-        end
-        offsets = [m.begin(0), m.end(0)]
-        pos = offsets[1]    ## update pos
-    ###
-    ### todo/fix
-    ###   rename to START_WITH_ROUND_DEF_OUTLINE_RE !!!!
-    elsif (m = ROUND_DEF_OUTLINE_RE.match( line ))
-      puts "   ENTER ROUND_DEF_RE MODE"  if debug?
-      @re = ROUND_DEF_RE
-      ## note - return ROUND_DEF NOT  ROUND_OUTLINE token
-      tokens << [:ROUND_DEF, m[:round_outline]]
-      offsets = [m.begin(0), m.end(0)]
-      pos = offsets[1]    ## update pos
-    elsif (m = ROUND_OUTLINE_RE.match( line ))
-      puts "   ROUND_OUTLINE"  if debug?
-      ## note - derive round level from no of (leading) markers
-      ##             e.g. ▪/:: is 1, ▪▪/::: is 2, ▪▪▪/:::: is 3, etc.
-      ##       note  - ascii-style starts with double ::, thus, autodecrement by one!
-      round_level = m[:round_marker].size
-      round_level -= 1  if m[:round_marker].start_with?( '::' )
-      tokens << [:ROUND_OUTLINE, [m[:round_outline],
-                      { outline: m[:round_outline] ,
-                        level: round_level}]]
-      ## note - eats-up line for now (change later to only eat-up marker e.g. »|>>)
-      offsets = [m.begin(0), m.end(0)]
-      pos = offsets[1]    ## update pos
-    elsif (m = START_GOAL_LINE_RE.match( line ))   ## line starting with ( - assume
-      ##  switch context to GOAL_RE (goalline(s))
-      ####
-      ##  note - check for alternate goal line styles / formats
-      if START_GOAL_LINE_COMPAT_RE.match(line )
-        ## "legacy" style starting with minute e.g.
-        ##  (6 Puskás 0-1, 9 Czibor 0-2, 11 Morlock 1-2, 18 Rahn 2-2,
-        ##    84 Rahn 3-2)
-        @re = GOAL_COMPAT_RE
-        puts "  ENTER GOAL_COMPAT_RE MODE"   if debug?
-        tokens << [:GOALS_COMPAT, "<|GOALS_COMPAT|>"]
-      elsif START_GOAL_LINE_ALT_RE.match( line )
-        ##  goals with scores e.g.
-        ##    (1-0 Franck Ribéry, 2-0 Ivica Olić, 2-1 Wayne Rooney)
-        ##         -or-
-        ##      (Dion Beljo  1-0
-        ##                   1-1  Andreas Gruber
-        ##   Matthias Seidl  2-1)
-        @re = GOAL_ALT_RE
-        puts "  ENTER GOAL_ALT_RE MODE"   if debug?
-        tokens << [:GOALS_ALT, "<|GOALS_ALT|>"]
-      else
-        ## "standard" / default style
-        @re = GOAL_RE
-        puts "  ENTER GOAL_RE MODE"   if debug?
-        tokens << [:GOALS, "<|GOALS|>"]
-      end
-      ## note - eat-up ( for now
-      ##   pass along "virtual" GOALS or GOALS_ALT token
-      ##      (see INLINE_GOALS for the starting goal line inline)
-      offsets = [m.begin(0), m.end(0)]
-      pos = offsets[1]    ## update pos
-    end
-  end
-  old_pos = -1   ## allows to backtrack to old pos (used in geo)
-  while m = @re.match( line, pos )
-    # if debug?
-    #  pp m
-    #  puts "pos: #{pos}"
-    # end
-    offsets = [m.begin(0), m.end(0)]
-    if offsets[0] != pos
-      ## match NOT starting at start/begin position!!!
-      ##  report parse error!!!
-      msg =  "!! WARN - parse error (tokenize) - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
-      puts msg
-      errors << "parse error (tokenize) - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
-      log( msg )
-    end
-    ##
-    ## todo/fix - also check if possible
-    ##   if no match but not yet end off string!!!!
-    ##    report skipped text run too!!!
-    old_pos = pos
-    pos     = offsets[1]
-#    pp offsets   if debug?
-    ##
-    ## note: racc requires pairs e.g. [:TOKEN, VAL]
-    ##         for VAL use "text" or ["text", { opts }]  array
-  t = if @re == ROUND_DEF_RE
-           if m[:spaces] || m[:space]
-               nil    ## skip spaces
-           elsif m[:date]
-            [:DATE, [m[:date], _build_date( m )]]
-          elsif m[:duration]
-            [:DURATION, [m[:duration], _build_duration( m )]]
-          elsif m[:sym]
-              sym = m[:sym]
-              case sym
-              when '|' then  [:'|']
-              when ':' then  [:':']
-              when ',' then  [:',']
-              else
-                puts "!!! TOKENIZE ERROR (sym) - ignore sym >#{sym}<"
-                nil  ## ignore others (e.g. brackets [])
-              end
-           elsif m[:any]
-              ## todo/check log error
-               msg = "parse error (tokenize round_def) - skipping any match>#{m[:any]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
-               puts "!! WARN - #{msg}"
-               errors << msg
-               log( "!! WARN - #{msg}" )
-               nil
-            else
-              ## report error/raise expection
-               puts "!!! TOKENIZE ERROR - no match found"
-               nil
-            end
-      elsif @re == GROUP_DEF_RE
-           if m[:spaces] || m[:space]
-               nil    ## skip spaces
-           elsif m[:text]
-               [:TEAM, m[:text]]
-           elsif m[:sym]
-              sym = m[:sym]
-              case sym
-              when '|' then  [:'|']
-              when ':' then  [:':']
-              when ',' then  [:',']
-              else
-                puts "!!! TOKENIZE ERROR (sym) - ignore sym >#{sym}<"
-                nil  ## ignore others (e.g. brackets [])
-              end
-           elsif m[:any]
-              ## todo/check log error
-               msg = "parse error (tokenize group_def) - skipping any match>#{m[:any]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
-               puts "!! WARN - #{msg}"
-               errors << msg
-               log( "!! WARN - #{msg}" )
-               nil
-            else
-              ## report error/raise expection
-               puts "!!! TOKENIZE ERROR - no match found"
-               nil
-            end
-       elsif @re == GEO_RE
-           ### note - possibly end inline geo on [ (and others?? in the future
-           ## note: break on double spaces e.g.
-           ## e.g. Jul/16 @ Arena Auf Schalke, Gelsenkirchen  Serbia 0-1 England
-           if m[:spaces]
-                 ### note - do NOT break out
-                 ##           if not text seen yet!!!
-                 if geo_count > 0
-                    ## get out-off geo mode and backtrack (w/ next)
-                    puts "  LEAVE GEO_RE MODE, BACK TO TOP_LEVEL/RE"  if debug?
-                    @re = RE
-                    pos = old_pos
-                    next   ## backtrack (resume new loop step)
-                 else
-                     nil   ## skip spaces
-                 end
-           elsif m[:space]
-               nil    ## skip (single) space
-           elsif m[:text]
-               geo_count += 1
-               [:GEO, m[:text]]   ## keep pos - why? why not?
-           elsif m[:geo_end]   ## "hacky" special comma; always ends geo mode!!!
-                 ## get out-off geo mode and backtrack (w/ next)
-                 puts "  LEAVE GEO_RE MODE, BACK TO TOP_LEVEL/RE"  if debug?
-                 @re = RE
-                 pos = old_pos
-                 next   ## backtrack (resume new loop step)
-           elsif m[:sym]
-              sym = m[:sym]
-              ## return symbols "inline" as is - why? why not?
-              ## (?<sym>[;,@|\[\]-])
-              case sym
-                ## note - reset geo_count to 0 (avoids break on two spaces)
-                ##                     if separator seen!!
-              when ',' then geo_count = 0; [:',']
-              when '›' then geo_count = 0; [:',']  ## note - treat geo sep › (unicode) like comma for now!!!
-              when '>' then geo_count = 0; [:',']  ## note - treat geo sep > (ascii) like comma for now!!!
-              when '[' then
-                 ## get out-off geo mode and backtrack (w/ next)
-                 puts "  LEAVE GEO_RE MODE, BACK TO TOP_LEVEL/RE"  if debug?
-                 @re = RE
-                 pos = old_pos
-                 next   ## backtrack (resume new loop step)
-            else
-              puts "!!! TOKENIZE ERROR (sym) - ignore sym >#{sym}<"
-              nil  ## ignore others (e.g. brackets [])
-            end
-          elsif m[:any]
-             ## todo/check log error
-             msg = "parse error (tokenize geo) - skipping any match>#{m[:any]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
-             puts "!! WARN - #{msg}"
-             errors << msg
-             log( "!! WARN - #{msg}" )
-             nil
-          else
-            ## report error/raise expection
-             puts "!!! TOKENIZE ERROR - no match found"
-             nil
-          end
-      elsif @re == PROP_CARDS_RE
-        if m[:space] || m[:spaces]
-              nil    ## skip space(s)
-         elsif m[:prop_name]
-              [:PROP_NAME, m[:name]]
-         elsif m[:minute]
-              minute = {}
-              minute[:m]      = m[:value].to_i(10)
-              minute[:offset] = m[:value2].to_i(10)   if m[:value2]
-             ## note - for debugging keep (pass along) "literal" minute
-             [:MINUTE, [m[:minute], minute]]
-         elsif m[:sym]
-            sym = m[:sym]
-            case sym
-            when ',' then [:',']
-            when ';' then [:';']
-            when '-' then [:'-']
-            else
-              nil  ## ignore others (e.g. brackets [])
-            end
-         else
-            ## report error
-             puts "!!! TOKENIZE ERROR (PROP_CARDS_RE) - no match found"
-             nil
-         end
-      elsif @re == PROP_RE   ### todo/fix - change to LINEUP_RE !!!!
-         if m[:space] || m[:spaces]
-              nil    ## skip space(s)
-         elsif m[:prop_key]   ## check for inline prop keys
-              key = m[:key]
-              ##  supported for now coach/trainer (add manager?)
-              if ['coach',
-                  'trainer'].include?( key.downcase )
-                [:COACH, m[:key]]   ## use COACH_KEY or such - why? why not?
-              else
-                ## report error - for unknown (inline) prop key in lineup
-                nil
-              end
-         elsif m[:inline_captain]
-              [:INLINE_CAPTAIN, m[:inline_captain]]
-         elsif m[:inline_yellow]
-              card = {}
-              card[:m]      = m[:minute].to_i(10)  if m[:minute]
-              card[:offset] = m[:offset].to_i(10)  if m[:offset]
-              [:INLINE_YELLOW, [m[:inline_yellow], card]]
-         elsif m[:inline_red]
-              card = {}
-              card[:m]      = m[:minute].to_i(10)  if m[:minute]
-              card[:offset] = m[:offset].to_i(10)  if m[:offset]
-              [:INLINE_RED, [m[:inline_red], card]]
-         elsif m[:inline_yellow_red]
-              card = {}
-              card[:m]      = m[:minute].to_i(10)  if m[:minute]
-              card[:offset] = m[:offset].to_i(10)  if m[:offset]
-              [:INLINE_YELLOW_RED, [m[:inline_yellow_red], card]]
-         elsif m[:prop_name]
-              [:PROP_NAME, m[:name]]
-         elsif m[:minute]
-              minute = {}
-              minute[:m]      = m[:value].to_i(10)
-              minute[:offset] = m[:value2].to_i(10)   if m[:value2]
-             [:MINUTE, [m[:minute], minute]]
-         elsif m[:sym]
-            sym = m[:sym]
-            ## return symbols "inline" as is - why? why not?
-            ## (?<sym>[;,@|\[\]-])
-            case sym
-            when ',' then [:',']
-            when ';' then [:';']
-            when '[' then [:'[']
-            when ']' then [:']']
-            when '(' then [:'(']
-            when ')' then [:')']
-            when '-' then [:'-']
-            else
-              nil  ## ignore others (e.g. brackets [])
-            end
-         else
-            ## report error
-             puts "!!! TOKENIZE ERROR (PROP_RE) - no match found"
-             nil
-         end
-      elsif @re == PROP_ATTENDANCE_RE
-         if m[:space] || m[:spaces]
-              nil    ## skip space(s)
-         elsif m[:enclosed_name]
-              ## reserverd for use for sold out or such (in the future) - why? why not?
-             [:ENCLOSED_NAME, m[:name]]
-         elsif m[:num]
-             [:PROP_NUM, [m[:num], { value: m[:value].to_i(10) } ]]
-=begin
-         elsif m[:sym]
-            sym = m[:sym]
-            case sym
-            when ',' then [:',']
-            when ';' then [:';']
-            # when '[' then [:'[']
-            # when ']' then [:']']
-            else
-              nil  ## ignore others (e.g. brackets [])
-            end
-=end
-         else
-            ## report error
-            puts "!!! TOKENIZE ERROR (PROP_ATTENDANCE_RE) - no match found"
-            nil
-         end
-      elsif @re == PROP_REFEREE_RE
-         if m[:space] || m[:spaces]
-              nil    ## skip space(s)
-         elsif m[:prop_key]   ## check for inline prop keys
-              key = m[:key]
-              ##  supported for now coach/trainer (add manager?)
-              if ['att', 'attn', 'attendance' ].include?( key.downcase )
-                [:ATTENDANCE, m[:key]]   ## use COACH_KEY or such - why? why not?
-              else
-                ## report error - for unknown (inline) prop key in lineup
-                nil
-              end
-         elsif m[:prop_name]    ## note - change prop_name to player
-             [:PROP_NAME, m[:name]]    ### use PLAYER for token - why? why not?
-         elsif m[:num]
-             [:PROP_NUM, [m[:num], { value: m[:value].to_i(10) } ]]
-         elsif m[:enclosed_name]
-              ## use HOLD,SAVE,POST or such keys - why? why not?
-             [:ENCLOSED_NAME, m[:name]]
-         elsif m[:sym]
-            sym = m[:sym]
-            case sym
-            when ',' then [:',']
-            when ';' then [:';']
- #           when '[' then [:'[']
- #           when ']' then [:']']
-            else
-              nil  ## ignore others (e.g. brackets [])
-            end
-         else
-            ## report error
-            puts "!!! TOKENIZE ERROR (PROP_REFEREE_RE) - no match found"
-            nil
-         end
-      elsif @re == PROP_PENALTIES_RE
-        if m[:space] || m[:spaces]
-              nil    ## skip space(s)
-         elsif m[:prop_name]    ## note - change prop_name to player
-             [:PROP_NAME, m[:name]]    ### use PLAYER for token - why? why not?
-         elsif m[:enclosed_name]
-              ## use HOLD,SAVE,POST or such keys - why? why not?
-             [:ENCLOSED_NAME, m[:name]]
-         elsif m[:score]
-              score = {}
-              ## must always have ft for now e.g. 1-1 or such
-              ###  change to (generic) score from ft -
-              ##     might be score a.e.t. or such - why? why not?
-              score[:score] = [m[:score1].to_i(10),
-                               m[:score2].to_i(10)]
-              [:SCORE, [m[:score], score]]
-         elsif m[:sym]
-            sym = m[:sym]
-            case sym
-            when ',' then [:',']
-            when ';' then [:';']
-            when '[' then [:'[']
-            when ']' then [:']']
-            else
-              nil  ## ignore others (e.g. brackets [])
-            end
-         else
-            ## report error
-            puts "!!! TOKENIZE ERROR (PROP_PENALTIES_RE) - no match found"
-            nil
-         end
-      elsif @re == GOAL_COMPAT_RE
-         if m[:space] || m[:spaces]
-              nil    ## skip space(s)
-         elsif m[:prop_name]    ## note - change prop_name to player
-             [:PLAYER, m[:name]]
-         elsif m[:minute]
-              minute = _build_minute( m )
-             [:MINUTE, [m[:minute], minute]]
-         elsif m[:goal_type]
-              goal_type = _build_goal_type( m )
-             [:GOAL_TYPE, [m[:goal_type], goal_type]]
-         elsif m[:score]
-            score = {}
-             ##  note - score is "generic"
-            ##      might be full-time (ft) or
-            ##         after extra-time (aet) or such
-            ##         or even undecided/unknown
-            ##    thus, use score1/score2 and NOT ft1/ft2
-            score[:score] = [m[:score1].to_i(10),
-                             m[:score2].to_i(10)]
-            ## note - for debugging keep (pass along) "literal" score
-            [:SCORE, [m[:score], score]]
-         elsif m[:sym]
-            sym = m[:sym]
-            ## return symbols "inline" as is - why? why not?
-            ## (?<sym>[;,@|\[\]-])
-            case sym
-            when ',' then [:',']
-            when ')'  ## leave goal mode!!
-                puts "  LEAVE GOAL_COMPAT_RE MODE"   if debug?
-                @re = RE
-                ##  note - use/return GOAL_END token   - change to GOAL_END_PAREN(THESIS)
-                ##                                or GOAL_PAREN_CLOSE/END ???
-                [:GOALS_END, '<|GOALS_END|>']
-            else
-              nil  ## ignore others (e.g. brackets [])
-            end
-         else
-            ## report error
-            puts "!!! TOKENIZE ERROR (GOAL_COMPAT_RE) - no match found"
-            nil
-         end
-      elsif @re == GOAL_ALT_RE
-         if m[:space] || m[:spaces]
-              nil    ## skip space(s)
-         elsif m[:prop_name]    ## note - change prop_name to player
-             [:PLAYER, m[:name]]
-         elsif m[:goal_minute]
-              minute = _build_goal_minute( m )
-             [:GOAL_MINUTE, [m[:goal_minute], minute]]
-         elsif m[:goal_type]
-              goal_type = _build_goal_type( m )
-             [:GOAL_TYPE, [m[:goal_type], goal_type]]
-         elsif m[:score]
-            score = {}
-             ##  note - score is "generic"
-            ##      might be full-time (ft) or
-            ##         after extra-time (aet) or such
-            ##         or even undecided/unknown
-            ##    thus, use score1/score2 and NOT ft1/ft2
-            score[:score] = [m[:score1].to_i(10),
-                             m[:score2].to_i(10)]
-            ## note - for debugging keep (pass along) "literal" score
-            [:SCORE, [m[:score], score]]
-         elsif m[:sym]
-            sym = m[:sym]
-            ## return symbols "inline" as is - why? why not?
-            ## (?<sym>[;,@|\[\]-])
-            case sym
-            when ',' then [:',']
-            when ')'  ## leave goal mode!!
-                puts "  LEAVE GOAL_ALT_RE MODE"   if debug?
-                @re = RE
-                ##  note - use/return GOAL_END token   - change to GOAL_END_PAREN(THESIS)
-                ##                                or GOAL_PAREN_CLOSE/END ???
-                [:GOALS_END, '<|GOALS_END|>']
-            else
-              nil  ## ignore others (e.g. brackets [])
-            end
-         else
-            ## report error
-            puts "!!! TOKENIZE ERROR (GOAL_ALT_RE) - no match found"
-            nil
-         end
-      elsif @re == GOAL_RE
-         if m[:space] || m[:spaces]
-              nil    ## skip space(s)
-         elsif m[:goals_none]    ## note - eats-up semicolon!! e.g. -; or - ;
-             [:GOALS_NONE, "<|GOALS_NONE|>"]
-         elsif m[:goal_sep_alt]
-             [:GOAL_SEP_ALT, "<|GOAL_SEP_ALT|>" ]   ## e.g. dash (-) WITH leading & trailing space required
-         elsif m[:prop_name]    ## note - change prop_name to player
-             [:PLAYER, m[:name]]
-         elsif m[:goal_minute]
-              minute = _build_goal_minute( m )
-             [:GOAL_MINUTE, [m[:goal_minute], minute]]
-         elsif m[:goal_count]
-              count = _build_goal_count( m )
-              [:GOAL_COUNT, [m[:goal_count], count]]
-         elsif m[:sym]
-            sym = m[:sym]
-            ## return symbols "inline" as is - why? why not?
-            ## (?<sym>[;,@|\[\]-])
-            case sym
-            when ',' then [:',']
-            when ';' then [:';']
-            # when '[' then [:'[']
-            # when ']' then [:']']
-            when ')'  ## leave goal mode!!
-                puts "  LEAVE GOAL_RE MODE"   if debug?
-                @re = RE
-                ##  note - use/return GOAL_END token   - change to GOAL_END_PAREN(THESIS)
-                ##                                or GOAL_PAREN_CLOSE/END ???
-                [:GOALS_END, '<|GOALS_END|>']
-            else
-              nil  ## ignore others (e.g. brackets [])
-            end
-         else
-            ## report error
-            puts "!!! TOKENIZE ERROR (GOAL_RE) - no match found"
-            nil
-         end
-      ###################################################
-      ## assume TOP_LEVEL (a.k.a. RE) machinery
-      else
-        if m[:space] || m[:spaces]
-           nil   ## skip space(s)
-        elsif m[:text]
-          ##  note - top-level (for now always) assumes TEAM for TEXT match!!
-          [:TEAM, m[:text]]   ## keep pos - why? why not?
-        elsif m[:status]   ## (match) status e.g. cancelled, awarded, etc.
-            [:STATUS, [m[:status], _build_status( m ) ]]
-        elsif m[:inline_wo]   ## w/o - walkover  (match status)
-            [:INLINE_WO, m[:inline_wo]]
-        elsif m[:inline_np]   ## n/p - not played (match status)
-            [:INLINE_NP, m[:inline_np]]
-        elsif m[:inline_bye]  ## bye  (match status)
-            [:INLINE_BYE, m[:inline_bye]]
-        elsif m[:inline_abd]  ## abd/abd. - abandoned (match status)
-            [:INLINE_ABD, m[:inline_abd]]
-        elsif m[:inline_void]  ## abd/abd. - abandoned (match status)
-            [:INLINE_VOID, m[:inline_void]]
-        elsif m[:inline_susp]  ## susp/susp. - suspended (match status)
-            [:INLINE_SUSP, m[:inline_susp]]
-        elsif m[:inline_ppd]  ## ppd/ppd. or postp/postp. - postponed (match status)
-            [:INLINE_PPD, m[:inline_ppd]]
-        elsif m[:inline_awd]  ## awd/awd. - awarded (match status)
-            [:INLINE_AWD, m[:inline_awd]]
-        elsif m[:inline_canc]  ## canc/canc. - cancelled/canceled (match status)
-            [:INLINE_CANC, m[:inline_canc]]
-        elsif m[:team_home]
-            [:TEAM_HOME, m[:team_home]]
-        elsif m[:team_away]
-            [:TEAM_AWAY, m[:team_away]]
-        elsif m[:team_neutral]
-            [:TEAM_NEUTRAL, m[:team_neutral]]
-        elsif m[:attendance]
-             att = {}
-             att[:value] = m[:value].gsub( '_', '' ).to_i(10)
-             ## note - for token id use INLINE_ATTENDANCE  (ATTENDANCE in use for prop!!!)
-            [:INLINE_ATTENDANCE, [m[:attendance], att ]]
-        elsif m[:note]
-            ###  todo/check:
-            ##      use value hash - why? why not? or simplify to:
-            ## [:NOTE, [m[:note], {note: m[:note] } ]]
-             [:NOTE, m[:note]]
-        elsif m[:time]
-            [:TIME, [m[:time], _build_time(m)]]
-        elsif m[:date]
-            [:DATE, [m[:date], _build_date(m)]]
-        elsif m[:date_legs]
-            [:DATE_LEGS, [m[:date_legs], _build_date_legs(m)]]
-        elsif m[:score_team]
-            [:SCORE_TEAM, [m[:score_team], _build_score_team(m)]]
-        elsif m[:score_team_pen]
-            [:SCORE_TEAM_PEN, [m[:score_team_pen], _build_score_team_pen(m)]]
-        elsif m[:score_team_num]
-            [:SCORE_TEAM_NUM, [m[:score_team_num], _build_score_team_num(m)]]
-          elsif m[:score_legs]
-              legs = {}
-              ### leg1
-              score = {}
-              score[:ft] = [m[:leg1_ft1].to_i(10),
-                            m[:leg1_ft2].to_i(10)]
-              legs['leg1'] = score
-              ### leg2
-              score = {}
-              score[:ft] = [m[:leg2_ft1].to_i(10),
-                            m[:leg2_ft2].to_i(10)]  if m[:leg2_ft1] && m[:leg2_ft2]
-              score[:et] = [m[:leg2_et1].to_i(10),
-                            m[:leg2_et2].to_i(10)]  if m[:leg2_et1] && m[:leg2_et2]
-              score[:p]  = [m[:leg2_p1].to_i(10),
-                            m[:leg2_p2].to_i(10)]  if m[:leg2_p1] && m[:leg2_p2]
-              legs['leg2'] = score
-              ## check for (opt) aggregate - keep on "top-level"
-              legs[:agg] = [m[:agg1].to_i(10),
-                            m[:agg2].to_i(10)]  if m[:agg1] && m[:agg2]
-              legs[:away] = true  if m[:away]
-              ## note - for debugging keep (pass along) "literal" score
-              [:SCORE_LEGS, [m[:score_legs], legs]]
-        elsif m[:score_full]
-              score = {}
-              score[:p] = [m[:p1].to_i(10),
-                           m[:p2].to_i(10)]  if m[:p1] && m[:p2]
-              score[:et] = [m[:et1].to_i(10),
-                            m[:et2].to_i(10)]  if m[:et1] && m[:et2]
-              score[:ft] = [m[:ft1].to_i(10),
-                            m[:ft2].to_i(10)]  if m[:ft1] && m[:ft2]
-              score[:ht] = [m[:ht1].to_i(10),
-                            m[:ht2].to_i(10)]  if m[:ht1] && m[:ht2]
-              score[:agg] = [m[:agg1].to_i(10),
-                             m[:agg2].to_i(10)]  if m[:agg1] && m[:agg2]
-              if m[:away1] && m[:away2]
-                 score[:away] = [m[:away1].to_i(10),
-                                 m[:away2].to_i(10)]
-              elsif m[:away]    ## fallback if no away score; check away flag
-                 score[:away] = true
-              end
-              ## add golden/silver flags
-              score[:golden] = true   if m[:aetgg]  ## golden goal (gg)/sudden death (sd)
-              score[:silver] = true   if m[:aetsg]  ## silver goal (sg)
-            ## note - for debugging keep (pass along) "literal" score
-            [:SCORE_FULL, [m[:score_full], score]]
-        elsif m[:score_fuller]
-              score = {}
-              score[:p] = [m[:p1].to_i(10),
-                           m[:p2].to_i(10)]  if m[:p1] && m[:p2]
-              score[:et] = [m[:et1].to_i(10),
-                            m[:et2].to_i(10)]  if m[:et1] && m[:et2]
-              score[:ft] = [m[:ft1].to_i(10),
-                            m[:ft2].to_i(10)]  if m[:ft1] && m[:ft2]
-              score[:ht] = [m[:ht1].to_i(10),
-                            m[:ht2].to_i(10)]  if m[:ht1] && m[:ht2]
-              score[:agg] = [m[:agg1].to_i(10),
-                             m[:agg2].to_i(10)]  if m[:agg1] && m[:agg2]
-              if m[:away1] && m[:away2]
-                 score[:away] = [m[:away1].to_i(10),
-                                 m[:away2].to_i(10)]
-              elsif m[:away]    ## fallback if no away score; check away flag
-                 score[:away] = true
-              end
-              ## add aet flag true/false
-              # score[:aet] = true   if m[:aet] || m[:aetgg] || m[:aetsg]
-              ## add golden/silver flags
-              score[:golden] = true   if m[:aetgg]  ## golden goal (gg)/sudden death (sd)
-              score[:silver] = true   if m[:aetsg]  ## silver goal (sg)
-            ## note - for debugging keep (pass along) "literal" score
-            [:SCORE_FULLER, [m[:score_fuller], score]]
-        elsif m[:score_fuller_more]
-               ##    SCORE + SCORE_FULLER_MORE
-               ## note -  after extra-time (aet) or full-time (ft)
-               ##           score may be present in SCORE!!!
-              score = {}
-              score[:p] = [m[:p1].to_i(10),
-                           m[:p2].to_i(10)]  if m[:p1] && m[:p2]
-              score[:et] = [m[:et1].to_i(10),
-                            m[:et2].to_i(10)]  if m[:et1] && m[:et2]
-              score[:ft] = [m[:ft1].to_i(10),
-                            m[:ft2].to_i(10)]  if m[:ft1] && m[:ft2]
-              score[:ht] = [m[:ht1].to_i(10),
-                            m[:ht2].to_i(10)]  if m[:ht1] && m[:ht2]
-              score[:agg] = [m[:agg1].to_i(10),
-                             m[:agg2].to_i(10)]  if m[:agg1] && m[:agg2]
-              if m[:away1] && m[:away2]
-                 score[:away] = [m[:away1].to_i(10),
-                                 m[:away2].to_i(10)]
-              elsif m[:away]    ## fallback if no away score; check away flag
-                 score[:away] = true
-              end
-              ## add flag in score for et/ft/ht
-              score[:score] = 'et'   if m[:aet] || m[:aetgg] || m[:aetsg]
-              score[:score] = 'ft'   if m[:ft]
-              score[:score] = 'ht'   if m[:ht]
-              ## add golden/silver flags
-              score[:golden] = true   if m[:aetgg]  ## golden goal (gg)/sudden death (sd)
-              score[:silver] = true   if m[:aetsg]  ## silver goal (sg)
-            ## note - for debugging keep (pass along) "literal" score
-            [:SCORE_FULLER_MORE, [m[:score_fuller_more], score]]
-        elsif m[:score]
-            score = {}
-             ##  note - score is "generic"
-            ##      might be full-time (ft) or
-            ##         after extra-time (aet) or such
-            ##         or even undecided/unknown
-            ##    thus, use score1/score2 and NOT ft1/ft2
-            score[:score] = [m[:score1].to_i(10),
-                             m[:score2].to_i(10)]
-         ## note - for debugging keep (pass along) "literal" score
-          [:SCORE, [m[:score], score]]
-        elsif m[:score_awd]   ## score awarded (awd/awd.)
-            score = {}
-            ### note - use "generic" score for now
-            ##         to match  A 3-0 B [awarded] etc.
-            score[:score] = [m[:score1].to_i(10),
-                             m[:score2].to_i(10)]
-            ## add score[:awarded] = true ???
-            ##    or only use match status to avoid duplicate?
-            [:SCORE_AWD, [m[:score_awd], score]]
-        elsif m[:score_abd]   ## score abandonded (abd/abd.)
-            score = {}
-            ### note - use "generic" score for now
-            score[:score] = [m[:score1].to_i(10),
-                             m[:score2].to_i(10)]
-            ## add score[:awarded] = true ???
-            ##    or only use match status to avoid duplicate?
-            [:SCORE_ABD, [m[:score_abd], score]]
-      elsif m[:minute]
-              minute = {}
-              minute[:m]      = m[:value].to_i(10)
-              minute[:offset] = m[:value2].to_i(10)   if m[:value2]
-             ## note - for debugging keep (pass along) "literal" minute
-             [:MINUTE, [m[:minute], minute]]
-        elsif m[:vs]
-           [:VS, m[:vs]]
-        elsif m[:sym]
-          sym = m[:sym]
-          ## return symbols "inline" as is - why? why not?
-          ## (?<sym>[;,@|\[\]-])
-          case sym
-          when '@'    ##  enter geo mode
-            puts "  ENTER GEO_RE MODE"  if debug?
-            @re = GEO_RE
-            geo_count = 0
-            [:'@']
-          when ',' then [:',']
-          when ';' then [:';']
-          when '/' then [:'/']
-          when '|' then [:'|']
-          when '[' then [:'[']
-          when ']' then [:']']
-          when '-' then [:'-']
-          when '('    ## enter goal scorer mode on "free-floating" open paranthesis!!!
-             puts "  ENTER GOAL_RE MODE"   if debug?
-             @re = GOAL_RE
-              ## note - eat-up ( for now; do NOT pass along as token
-              ##       pass along "virutal" INLINE GOALS - why? why not?
-              [:INLINE_GOALS, "<|INLINE_GOALS|>"]
-          when ')' then [:')']
-          else
-            puts "!!! TOKENIZE ERROR (sym) - ignore sym >#{sym}<"
-            nil  ## ignore others (e.g. brackets [])
-          end
-        elsif m[:any]
-           ## todo/check log error
-           msg = "parse error (tokenize) - skipping any match>#{m[:any]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
-           puts "!! WARN - #{msg}"
-           errors << msg
-           log( "!! WARN - #{msg}" )
-           nil
-        else
-          ## report error
-           puts "!!! TOKENIZE ERROR - no match found"
-           nil
-        end
-      end
-    tokens << t    if t
-#    if debug?
-#      print ">"
-#      print "*" * pos
-#      puts "#{line[pos..-1]}<"
-#    end
-  end
-  ## check if no match in end of string
-  if offsets[1] != line.size
-    msg =  "!! WARN - parse error - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size} in line >#{line}<"
-    puts msg
-    log( msg )
-    errors << "parse error (tokenize) - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size} in line >#{line}<"
-  end
-  # if @re == GOAL_RE   ### ALWAYS switch back to top level mode
-  #   puts "  LEAVE GOAL_RE MODE, BACK TO TOP_LEVEL/RE"  if debug?
-  #   @re = RE
-  # end
-   if @re == GEO_RE   ### ALWAYS switch back to top level mode
-     puts "  LEAVE GEO_RE MODE, BACK TO TOP_LEVEL/RE"  if debug?
-     @re = RE
-   end
-   @re = RE  if @re == GROUP_DEF_RE   ### ALWAYS switch back to top level mode
-   @re = RE  if @re == ROUND_DEF_RE
-   ##
-   ## if in prop mode continue if   last token is [,-]
-   ##        otherwise change back to "standard" mode
-   if @re == PROP_RE            || @re == PROP_CARDS_RE ||
-      @re == PROP_PENALTIES_RE ||
-      @re == PROP_ATTENDANCE_RE || @re == PROP_REFEREE_RE
-     if [:',', :'-', :';'].include?( tokens[-1][0] )
-        ## continue/stay in PROP_RE mode
-        ##  todo/check - auto-add PROP_CONT token or such
-        ##                to help parser with possible NEWLINE
-        ##                  conflicts  - why? why not?
-     else
-        ## switch back to top-level mode!!
-        puts "  LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE"  if debug?
-        @re = RE
-        ## note - auto-add PROP_END (<PROP_END>)
-        tokens << [:PROP_END, "<|PROP_END|>"]
-     end
-   end
-  [tokens,errors]
-end
 end  # class Lexer
 end # module SportDb