RubyGems - sportdb-parser - Versions diffs - 0.7.1 → 0.7.2 - Mend

sportdb-parser 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +1 -1
data/Manifest.txt +17 -4
data/lib/sportdb/parser/lexer-on_goal.rb +172 -0
data/lib/sportdb/parser/lexer-on_group_def.rb +31 -0
data/lib/sportdb/parser/lexer-on_prop_lineup.rb +79 -0
data/lib/sportdb/parser/lexer-on_prop_misc.rb +110 -0
data/lib/sportdb/parser/lexer-on_prop_penalties.rb +40 -0
data/lib/sportdb/parser/lexer-on_round_def.rb +37 -0
data/lib/sportdb/parser/lexer-on_top.rb +125 -0
data/lib/sportdb/parser/lexer-prep_doc.rb +131 -0
data/lib/sportdb/parser/lexer-prep_line.rb +63 -0
data/lib/sportdb/parser/lexer-tokenize.rb +449 -0
data/lib/sportdb/parser/lexer.rb +133 -1363
data/lib/sportdb/parser/lexer_buffer.rb +8 -37
data/lib/sportdb/parser/lexer_token.rb +126 -0
data/lib/sportdb/parser/parser.rb +1104 -1403
data/lib/sportdb/parser/racc_parser.rb +36 -32
data/lib/sportdb/parser/racc_tree.rb +65 -98
data/lib/sportdb/parser/token-date--helpers.rb +130 -0
data/lib/sportdb/parser/token-date--names.rb +108 -0
data/lib/sportdb/parser/token-date.rb +20 -192
data/lib/sportdb/parser/token-date_duration.rb +8 -27
data/lib/sportdb/parser/token-geo.rb +16 -16
data/lib/sportdb/parser/token-goals--helpers.rb +114 -0
data/lib/sportdb/parser/token-goals.rb +103 -249
data/lib/sportdb/parser/token-group.rb +8 -22
data/lib/sportdb/parser/token-prop.rb +138 -124
data/lib/sportdb/parser/token-prop_name.rb +48 -39
data/lib/sportdb/parser/token-round.rb +21 -35
data/lib/sportdb/parser/token-score--helpers.rb +189 -0
data/lib/sportdb/parser/token-score.rb +9 -393
data/lib/sportdb/parser/token-score_full.rb +331 -0
data/lib/sportdb/parser/token-status.rb +44 -46
data/lib/sportdb/parser/token-status_inline.rb +112 -0
data/lib/sportdb/parser/token-text.rb +41 -31
data/lib/sportdb/parser/token-time.rb +29 -26
data/lib/sportdb/parser/token.rb +58 -159
data/lib/sportdb/parser/version.rb +1 -1
data/lib/sportdb/parser.rb +45 -17
metadata +19 -6
data/lib/sportdb/parser/blocktxt.rb +0 -99
data/lib/sportdb/parser/lexer_tty.rb +0 -111
data/lib/sportdb/parser/token-table.rb +0 -149
data/lib/sportdb/parser/token_helpers.rb +0 -92

data/lib/sportdb/parser.rb CHANGED Viewed

@@ -1,52 +1,80 @@
-## pulls in
 require 'cocos'
-####
-# try a (simple) tokenizer/parser with regex
-## note - match line-by-line
-#            avoid massive backtracking by definition
-#             that is, making it impossible
-## sym(bols) -
-##  text - change text to name - why? why not?
+require_relative 'parser/version'
-require_relative 'parser/version'
+## core machinery
 ##
-## generic helper
-require_relative 'parser/blocktxt'
+## add shared/most basic regexes here
+## todo - use ANY_RE  to token_commons or such - for shared by many?
+module SportDb
+class Lexer
+## general catch-all  (RECOMMENDED (ALWAYS) use as last entry in union)
+##   to avoid advance of pos match!!!
+ANY_RE = %r{
+               (?<any> .)
+          }ix
+SPACES_RE = %r{
+                  (?<spaces> [ ]{2,})
+                | (?<space>  [ ])
+             }ix
+end # class Lexer
+end # module SportDb
-## core machinery
 require_relative 'parser/token-score'
+require_relative 'parser/token-score_full'
 require_relative 'parser/token-score_fuller'
 require_relative 'parser/token-score_legs'
+require_relative 'parser/token-score--helpers'
 require_relative 'parser/token-time'
+require_relative 'parser/token-date--names'
 require_relative 'parser/token-date'
 require_relative 'parser/token-date_duration'
+require_relative 'parser/token-date--helpers'
 require_relative 'parser/token-text'
+require_relative 'parser/token-prop'    ## team prop(erty) mode (note - must be before token)
 require_relative 'parser/token-prop_name'    ## a.k.a token-text_ii
 require_relative 'parser/token-status'
+require_relative 'parser/token-status_inline'
 require_relative 'parser/token-note'
 require_relative 'parser/token-goals'
-require_relative 'parser/token-prop'    ## team prop(erty) mode (note - must be before token)
+require_relative 'parser/token-goals--helpers'
 require_relative 'parser/token-geo'
 require_relative 'parser/token-group'
 require_relative 'parser/token-round'
-require_relative 'parser/token-table'
 require_relative 'parser/token'
-### add token ("private") parse helpers  e.g.  _parse_team() etc.
-require_relative 'parser/token_helpers'
 require_relative 'parser/lexer_buffer'   ## incl. Tokens (aka TokenBuffer)
+require_relative 'parser/lexer-prep_doc'
+require_relative 'parser/lexer-prep_line'
+require_relative 'parser/lexer_token'
+require_relative 'parser/lexer-tokenize'
+require_relative 'parser/lexer-on_round_def'
+require_relative 'parser/lexer-on_group_def'
+require_relative 'parser/lexer-on_prop_misc'
+require_relative 'parser/lexer-on_prop_lineup'
+require_relative 'parser/lexer-on_prop_penalties'
+require_relative 'parser/lexer-on_goal'
+require_relative 'parser/lexer-on_top'
 require_relative 'parser/lexer'
-require_relative 'parser/lexer_tty'      ## teletype (tty) mode
 ## note - use "embeded" racc parser runtime

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: sportdb-parser
 version: !ruby/object:Gem::Version
-  version: 0.7.1
+  version: 0.7.2
 platform: ruby
 authors:
 - Gerald Bauer
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2026-05-22 00:00:00.000000000 Z
+date: 2026-06-08 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: cocos
@@ -72,32 +72,45 @@ files:
 - README.md
 - Rakefile
 - lib/sportdb/parser.rb
-- lib/sportdb/parser/blocktxt.rb
+- lib/sportdb/parser/lexer-on_goal.rb
+- lib/sportdb/parser/lexer-on_group_def.rb
+- lib/sportdb/parser/lexer-on_prop_lineup.rb
+- lib/sportdb/parser/lexer-on_prop_misc.rb
+- lib/sportdb/parser/lexer-on_prop_penalties.rb
+- lib/sportdb/parser/lexer-on_round_def.rb
+- lib/sportdb/parser/lexer-on_top.rb
+- lib/sportdb/parser/lexer-prep_doc.rb
+- lib/sportdb/parser/lexer-prep_line.rb
+- lib/sportdb/parser/lexer-tokenize.rb
 - lib/sportdb/parser/lexer.rb
 - lib/sportdb/parser/lexer_buffer.rb
-- lib/sportdb/parser/lexer_tty.rb
+- lib/sportdb/parser/lexer_token.rb
 - lib/sportdb/parser/parser.rb
 - lib/sportdb/parser/parser_runtime.rb
 - lib/sportdb/parser/racc_parser.rb
 - lib/sportdb/parser/racc_tree.rb
+- lib/sportdb/parser/token-date--helpers.rb
+- lib/sportdb/parser/token-date--names.rb
 - lib/sportdb/parser/token-date.rb
 - lib/sportdb/parser/token-date_duration.rb
 - lib/sportdb/parser/token-geo.rb
+- lib/sportdb/parser/token-goals--helpers.rb
 - lib/sportdb/parser/token-goals.rb
 - lib/sportdb/parser/token-group.rb
 - lib/sportdb/parser/token-note.rb
 - lib/sportdb/parser/token-prop.rb
 - lib/sportdb/parser/token-prop_name.rb
 - lib/sportdb/parser/token-round.rb
+- lib/sportdb/parser/token-score--helpers.rb
 - lib/sportdb/parser/token-score.rb
+- lib/sportdb/parser/token-score_full.rb
 - lib/sportdb/parser/token-score_fuller.rb
 - lib/sportdb/parser/token-score_legs.rb
 - lib/sportdb/parser/token-status.rb
-- lib/sportdb/parser/token-table.rb
+- lib/sportdb/parser/token-status_inline.rb
 - lib/sportdb/parser/token-text.rb
 - lib/sportdb/parser/token-time.rb
 - lib/sportdb/parser/token.rb
-- lib/sportdb/parser/token_helpers.rb
 - lib/sportdb/parser/version.rb
 homepage: https://github.com/sportdb/sport.db
 licenses:

data/lib/sportdb/parser/blocktxt.rb DELETED Viewed

@@ -1,99 +0,0 @@
-###
-#  generic block text/txt helper
-## todo/chekc - find a better name SectTxt or ??
-class BlockTxt
-def self.parse( txt ) new( txt); end
-def self.read( path ) parse( read_text( path )); end
-def initialize( txt )
-   @sects = _parse( txt )
-   self
-end
-def size()       @sects.size; end
-def each( &blk )            @sects.each( &blk ); end
-def each_with_index( &blk ) @sects.each_with_index( &blk ); end
-def text
-    ## only get all txt1 parts joined as single all-in-one string txt
-    @sects.map {|sect| sect[0] }.join( "\n" )
-end
-def dump   ## for debugging
-    puts "==> sects (#{@sects.size}):"
-    pp @sects
-    puts "    #{@sects.size} sect(s)"
-end
-##
-#  quick support for  __END__
-END_RE = %r{ ^
-                          [ ]* __END__ [ ]*
-                            .*?
-                         \z   ## note - until end-of-string/file !!!
-                      }mx
-##    SECT_RE (old) = %r{^
-##                                 [ ]* --- [ ]*
-##                              $}x
-##
-##  do NOT use --- (used in fbtxt and markdown and yaml etc.)
-## e.g.  §  or §§ or § § § or such
-##    maybe allow  :: § :: or such too   or --- § --- or such
-SECT_RE = %r{^
-                     [ ]* §
-                         ([ ]*§)*
-                     [ ]*
-                 $}x
-## split by " => or  =====> "
-## todo/check - subsect?? find a better name?  in/out or txt1/txt2
-SUBSECT_RE = %r{^
-                        [ ]*
-                            =+ >
-                        [ ]*
-                  $}x
-def _parse( txt )
-    blocks = []   ## note - holds [txt,exp] pairs
-    txt = txt.sub( END_RE, '' )
-    ## split by §
-    sections = txt.split( SECT_RE  )
-    sections.each_with_index do |sect,i|
-       ## puts ">>> start #{i+1}"
-       ## pp sect
-       ## puts "<<< end #{i+1}"
-      txt1, txt2 = sect.split( SUBSECT_RE )
-      blocks << [txt1,txt2]
-    end
-    blocks
-end
-end # class BlockTxt
-###
-#  function-style helpers
-def read_blocktxt( path ); BlockTxt.read( path ); end
-def parse_blocktxt( txt ); BlockTxt.new( txt ); end

data/lib/sportdb/parser/lexer_tty.rb DELETED Viewed

@@ -1,111 +0,0 @@
-module SportDb
-class Lexer
-########
-##   experimental teletype mode
-##    only space, A-Z and 0-9 allowed
-IS_TTY_LINE_RE = %r{  \A
-                       ## note - use NEGATIVE lookahead to exclude blank lines
-                         (?! [ ]*\z)
-                          [A-Z0-9 ]+
-                      \z
-                  }x
-TTY_SPACES_RE = %r{ (?<spaces> [ ]{2,}) |
-                    (?<space>  [ ])
-                  }x
-TTY_NUM_RE    = %r{   \b  (?<num> \d+ ) \b
-                  }x
-##
-##  note - TEXT for now allows    A, 1A, A1, A1A, A1 B1 C1,
-##                                A1AA1 2B22 3C33
-##    - single space only for concat
-##       text segments MUST NOT be all numbers e.g. 1, 11, etc.
-TTY_TEXT_RE   = %r{   \b (?<text>
-                           (?:
-                              [A-Z]  ## MUST start with letter
-                                |
-                               [0-9]+[A-Z]   ## or numbers followed by letter
-                             )
-                             [0-9A-Z]*
-                             (?:
-                                 ### allow move segements separated
-                                 ##     by single space
-                                  [ ]
-                                 (?:
-                                     [A-Z]  ## MUST start with letter
-                                      |
-                                     [0-9]+[A-Z]   ## or numbers followed by letter
-                                  )
-                                 [0-9A-Z]*
-                             )*
-                          )
-                          \b
-                  }x
-TTY_RE = Regexp.union(
-                TTY_SPACES_RE,
-                TTY_TEXT_RE,
-                TTY_NUM_RE,
-                ##  fix add ANY_RE,
-)
-def _tokenize_tty_line( line )
-   line = line.strip
-   tokens = []
-   ## track last offsets - to report error on no match
-   ##   or no match in end of string
-   offsets = [0,0]
-   pos = 0
-   m = nil
-  while m = TTY_RE.match( line, pos )
-    offsets = [m.begin(0), m.end(0)]
-    if offsets[0] != pos
-      ## match NOT starting at start/begin position!!!
-      ##  report parse error!!!
-      msg =  "!! WARN - tokenize (tty) error - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
-      puts msg
-      log( msg )
-    end
-    pos = offsets[1]
-    t =  if m[:spaces] || m[:space]
-               nil    ## skip spaces
-          elsif m[:text]
-            [:TTY_TEXT, m[:text]]
-          elsif m[:num]
-            [:TTY_NUM, m[:num].to_i(10)]
-          else
-              ## report error/raise expection
-              puts "!!! TTY TOKENIZE ERROR - no match found"
-              nil
-          end
-    tokens << t    if t
-  end
-  ## check if no match in end of string
-  if offsets[1] != line.size
-      msg =  "!! WARN - tokenize (tty) error - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size} in line >#{line}<"
-      puts msg
-      log( msg )
-  end
-  tokens
-end
-end  # class Lexer
-end # module SportDb

data/lib/sportdb/parser/token-table.rb DELETED Viewed

@@ -1,149 +0,0 @@
-module SportDb
-class Lexer
-###
-## check for
-##   table (standing) lines
-##
-##  e.g.
-##
-##        Pld W D L GF-GA Pts   |  d d d d-d d
-##        Pld GF-GA Pts         |  d d-d d
-##        Pld Pts W D L GF-GA   |  d d d d d d-d
-##
-## Pld   = matches played
-## GF-GA = goal for, goal against
-##        Pld W D L GF-GA Pts   |  d d d d-d d
-##
-##  1.BRAZIL            3  2  1  0   7- 2  7
-##  2.MEXICO            3  2  1  0   4- 1  7
-##  3.Croatia           3  1  0  2   6- 6  3
-##  4.Cameroon          3  0  0  3   1- 9  0
-##  add more headings?? e.g.
-##    Final Table:
-##
-TABLE_HEADING_I_RE = %r{
-       \A
-        [ ]*  ## ignore leading spaces (if any)
-       (?<table_heading>
-         \b
-          P(?:ld)?  [ ]+
-           W        [ ]+
-           D        [ ]+
-           L        [ ]+
-           Gls      [ ]+
-           Pts
-        \b
-         )
-        [ ]*  ## ignore trailing spaces (if any)
-        \z
-   }xi
-##
-##  "solid"-style
-##     -----------------------------------------------------
-##  "dashed"-style ??
-##     - - - - - - - - - - - - - - - - - - - - - - - - - - -
-TABLE_DIVIDER_RE = %r{
-            \A
-        [ ]*  ## ignore leading spaces (if any)
-            (?<table_divider>
-                (?:  ---   ## note - require three dashes minimum (---)
-                      [-]*
-                )
-                  |
-                (?: - [ ]+ - [ ]+ -  ## note - require three dashes minimum (- - -)
-                      (?: [ ]+ -)*   ##   todo/check - restrict spaces to 2 or 3 or such - why? why not?
-                )
-            )
-        [ ]*  ## ignore trailing spaces (if any)
-            \z
-      }xi
-####
-##   1.SOLOMON I.    1  1  0  0  3- 1  3
-##   2.TAHITI        1  0  0  1  1- 3  0
-##   -.Cook Islands  withdrew after first match (annulled) due to Covid-19 outbreak in squad
-##   -.Vanuatu       withdrew before playing any matches due to Covid-19 outbreak in squad  -->
-##
-##  note - starting with -. is a table note!!!
-TABLE_NOTE_RE = %r{
-       \A
-        [ ]*  ## ignore leading spaces (if any)
-           -\.
-           [ ]*
-       (?<table_note>
-            .+?   ## note - use non-greedy
-         )
-        [ ]*  ## ignore trailing spaces (if any)
-        \z
-}xi
-TABLE_I_RE = %r{
-        (?<table>\b
-             \d{1,2} [ ]+                        # Pld
-             \d{1,2} [ ]+                        # W
-             \d{1,2} [ ]+                        # D
-             \d{1,2} [ ]+                        # L
-             (?: \d{1,3} - [ ]* \d{1,3} [ ]+ )   # GF-GA
-             \d{1,3}                             # Pts
-              \b
-        )}xi
-##      Pld Pts W D L GF-GA   |  d d d d d d-d
-##
-## 1. ARG^         3  6  3 0 0    10-4
-## 2. CHI          3  4  2 0 1     5-3
-## 3. FRA          3  2  1 0 2     4-3
-## 4. MEX          3  0  0 0 3     4-13
-TABLE_II_RE = %r{
-        (?<table>\b
-             \d{1,2} [ ]+                        # Pld
-             \d{1,3} [ ]+                        # Pts
-             \d{1,2} [ ]+                        # W
-             \d{1,2} [ ]+                        # D
-             \d{1,2} [ ]+                        # L
-             (?: \d{1,3} - [ ]* \d{1,3})   # GF-GA
-              \b
-        )}xi
-#############################################
-# map tables
-#  note: order matters; first come-first matched/served
-##  possible start lines for a table
-##    excludes NOTE
-##    and RULER (e.g. --- or) or such in the future
-TABLE_RE = Regexp.union(
-    TABLE_HEADING_I_RE,
-    TABLE_I_RE,
-    TABLE_II_RE,
-)
-## all possible continuation for a table
-##   excludes HEADING
-TABLE_MORE_RE = Regexp.union(
-    TABLE_NOTE_RE,
-    TABLE_DIVIDER_RE,
-    TABLE_I_RE,
-    TABLE_II_RE,
-)
-end  #   class Lexer
-end  # module SportDb

data/lib/sportdb/parser/token_helpers.rb DELETED Viewed

@@ -1,92 +0,0 @@
-module SportDb
-class Lexer
-=begin
-def self._mk_is( re )
-    ##   add  \A ... \z to regex
-    ##     for strict matching of beginning and end of string
-    ##     regex note -  \z will NOT allow trailing newline(s)!!!!
-    ##      note - must double espace \\A,\\z  in quoted string!!
-    Regexp.new( %Q<   \\A
-                    (?:#{re.source})
-                       \\z
-                  >, re.options )
-end
-  IS_TEAM_RE = _mk_is( TEXT_RE )   ## todo/fix - rename TEXT_RE to TEAM_RE!!!
-  IS_DATE_RE = _mk_is( DATE_IIII_RE )    ## DATE_RE )
-=end
-def self._parse_team( str )
-    ## note - strip - leading/trailing spaces
-    m = TEXT_RE.match( str.strip )
-    if m && m.pre_match == '' && m.post_match == ''
-      m
-    elsif  m
-        ## note - match BUT not anchored to start and end-of-string!!!
-        ##  report, error somehow??
-      nil
-    else
-      nil  ## no match - return nil
-    end
-end
-def self._parse_date( str )
-    ## note - strip - leading/trailing spaces
-    m = DATE_RE.match( str.strip )
-    ####  todo/fix/check:
-    ###   wrapped with  \A \z NOT working with union  - check later - why?
-    ###   use hand-coded  with pre_match = "" and post_match = ""
-    if m && m.pre_match == '' && m.post_match == ''
-      ## return hash table with captured components
-      date = {}
-      ## map month names
-      ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
-      date[:y]  = m[:year].to_i(10)  if m[:year]
-      ## check - use y too for two-digit year or keep separate - why? why not?
-      date[:yy] = m[:yy].to_i(10)    if m[:yy]    ## two digit year (e.g. 25 or 78 etc.)
-      date[:m]  = m[:month].to_i(10)  if m[:month]
-      date[:m]  = MONTH_MAP[ m[:month_name].downcase ]   if m[:month_name]
-      date[:d]  = m[:day].to_i(10)   if m[:day]
-      date[:wday] = DAY_MAP[ m[:day_name].downcase ]   if m[:day_name]
-      date
-    elsif  m
-        ## note - match BUT not anchored to start and end-of-string!!!
-        ##  report, error somehow??
-      nil
-    else
-      nil  ## no match - return nil
-    end
-end
-def self._parse_score_full( str )
-    ## note - strip - leading/trailing spaces
-    m=SCORE_FULL_RE.match( str )
-    if m && m.pre_match == '' && m.post_match == ''
-       score = {}
-       score[:p]  = [m[:p1].to_i,m[:p2].to_i]     if m[:p1] && m[:p2]
-       score[:et] = [m[:et1].to_i,m[:et2].to_i]   if m[:et1] && m[:et2]
-       score[:ft] = [m[:ft1].to_i,m[:ft2].to_i]   if m[:ft1] && m[:ft2]
-       score[:ht] = [m[:ht1].to_i,m[:ht2].to_i]   if m[:ht1] && m[:ht2]
-       ## score[:agg] = [m[:agg1].to_i,m[:agg2].to_i]   if m[:agg1] && m[:agg2]
-       score
-    elsif  m
-        ## note - match BUT not anchored to start and end-of-string!!!
-        ##  report, error somehow??
-      nil
-    else
-      nil  ## no match - return nil
-    end
-end
-end  #   class Lexer
-end  # module SportDb