sportdb-parser 0.5.6 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +1 -1
- data/lib/sportdb/parser/{tokenizer.rb → lexer.rb} +38 -29
- data/lib/sportdb/parser/parser.rb +340 -320
- data/lib/sportdb/parser/racc_parser.rb +40 -12
- data/lib/sportdb/parser/racc_tree.rb +1 -1
- data/lib/sportdb/parser/token-date.rb +2 -2
- data/lib/sportdb/parser/token-score.rb +2 -2
- data/lib/sportdb/parser/token-status.rb +2 -2
- data/lib/sportdb/parser/token-text.rb +2 -2
- data/lib/sportdb/parser/token.rb +2 -2
- data/lib/sportdb/parser/version.rb +1 -1
- data/lib/sportdb/parser.rb +31 -12
- metadata +3 -3
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: b3c102d758209b64a04033a772faad7cdaaa4631f5079e56b92dccdfc4b84292
         | 
| 4 | 
            +
              data.tar.gz: 4b49b9a0234be96c552233b74fb4b2f8702b5d402d264382f2b13b9367515740
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 78faffba17eff5ff5dd4b665099cee8eff8addd5e8263433f0662da8a88bd4fa5fa80ed83968dcdb4b7c95ab4254508b156dc09c79ee2d58e556e20cf2168aba
         | 
| 7 | 
            +
              data.tar.gz: 4a9a5546ccff399028a2e629a87e86e7c3ee505b7c5b16a15de0053918ddc8e7704c2c8c75517feaab0a9bb5648a8bcf63c66b2cc34800f9d41335748e336f66
         | 
    
        data/CHANGELOG.md
    CHANGED
    
    
    
        data/Manifest.txt
    CHANGED
    
    | @@ -9,6 +9,7 @@ config/rounds_misc.txt | |
| 9 9 | 
             
            config/rounds_pt.txt
         | 
| 10 10 | 
             
            lib/sportdb/parser.rb
         | 
| 11 11 | 
             
            lib/sportdb/parser/lang.rb
         | 
| 12 | 
            +
            lib/sportdb/parser/lexer.rb
         | 
| 12 13 | 
             
            lib/sportdb/parser/parser.rb
         | 
| 13 14 | 
             
            lib/sportdb/parser/racc_parser.rb
         | 
| 14 15 | 
             
            lib/sportdb/parser/racc_tree.rb
         | 
| @@ -17,5 +18,4 @@ lib/sportdb/parser/token-score.rb | |
| 17 18 | 
             
            lib/sportdb/parser/token-status.rb
         | 
| 18 19 | 
             
            lib/sportdb/parser/token-text.rb
         | 
| 19 20 | 
             
            lib/sportdb/parser/token.rb
         | 
| 20 | 
            -
            lib/sportdb/parser/tokenizer.rb
         | 
| 21 21 | 
             
            lib/sportdb/parser/version.rb
         | 
| @@ -1,6 +1,6 @@ | |
| 1 1 |  | 
| 2 2 | 
             
            module SportDb
         | 
| 3 | 
            -
            class  | 
| 3 | 
            +
            class Lexer
         | 
| 4 4 |  | 
| 5 5 |  | 
| 6 6 |  | 
| @@ -14,6 +14,20 @@ def log( msg ) | |
| 14 14 | 
             
            end
         | 
| 15 15 |  | 
| 16 16 |  | 
| 17 | 
            +
              ###
         | 
| 18 | 
            +
              ##  todo/fix -   use LangHelper or such
         | 
| 19 | 
            +
              ##   e.g.     class Lexer
         | 
| 20 | 
            +
              ##                include LangHelper
         | 
| 21 | 
            +
              ##            end
         | 
| 22 | 
            +
              ##
         | 
| 23 | 
            +
              ##  merge back Lang into Lexer - why? why not?
         | 
| 24 | 
            +
              ## keep "old" access to checking for group, round & friends
         | 
| 25 | 
            +
              ##    for now for compatibility
         | 
| 26 | 
            +
              def is_group?( text )  Lang.is_group?( text ); end
         | 
| 27 | 
            +
              def is_round?( text )  Lang.is_round?( text ); end
         | 
| 28 | 
            +
              def is_leg?( text )    Lang.is_leg?( text ); end
         | 
| 29 | 
            +
             | 
| 30 | 
            +
             | 
| 17 31 | 
             
            ## transforms
         | 
| 18 32 | 
             
            ##
         | 
| 19 33 | 
             
            ##  Netherlands  1-2 (1-1)   England
         | 
| @@ -107,15 +121,11 @@ end  # class Tokens | |
| 107 121 |  | 
| 108 122 |  | 
| 109 123 |  | 
| 110 | 
            -
             | 
| 111 | 
            -
            def tokenize( lines, debug: false )
         | 
| 112 | 
            -
              tokens, _ = tokenize_with_errors( lines, debug: debug )
         | 
| 113 | 
            -
              tokens
         | 
| 114 | 
            -
            end
         | 
| 124 | 
            +
            def debug?()  @debug == true; end
         | 
| 115 125 |  | 
| 116 | 
            -
            def  | 
| 126 | 
            +
            def initialize( lines, debug: false )
         | 
| 127 | 
            +
               @debug = debug
         | 
| 117 128 |  | 
| 118 | 
            -
            ##
         | 
| 119 129 | 
             
            ##  note - for convenience - add support
         | 
| 120 130 | 
             
            ##         comments (incl. inline end-of-line comments) and empty lines here
         | 
| 121 131 | 
             
            ##             why? why not?
         | 
| @@ -137,33 +147,33 @@ def tokenize_with_errors( lines, debug: false ) | |
| 137 147 | 
             
                ##   strip lines with comments and empty lines striped / removed
         | 
| 138 148 | 
             
                ##      keep empty lines? why? why not?
         | 
| 139 149 | 
             
                ##      keep leading spaces (indent) - why?
         | 
| 140 | 
            -
                txt = String.new
         | 
| 150 | 
            +
                @txt = String.new
         | 
| 141 151 | 
             
                txt_pre.each_line do |line|    ## preprocess
         | 
| 142 152 | 
             
                   line = line.strip
         | 
| 143 153 | 
             
                   next if line.empty? || line.start_with?('#')   ###  skip empty lines and comments
         | 
| 144 154 |  | 
| 145 155 | 
             
                   line = line.sub( /#.*/, '' ).strip             ###  cut-off end-of line comments too
         | 
| 146 156 |  | 
| 147 | 
            -
                   txt << line
         | 
| 148 | 
            -
                   txt << "\n"
         | 
| 157 | 
            +
                   @txt << line
         | 
| 158 | 
            +
                   @txt << "\n"
         | 
| 149 159 | 
             
                end
         | 
| 150 | 
            -
             | 
| 160 | 
            +
            end
         | 
| 161 | 
            +
             | 
| 151 162 |  | 
| 163 | 
            +
             | 
| 164 | 
            +
            def tokenize_with_errors
         | 
| 152 165 | 
             
                tokens_by_line = []   ## note: add tokens line-by-line (flatten later)
         | 
| 153 166 | 
             
                errors         = []   ## keep a list of errors - why? why not?
         | 
| 154 167 |  | 
| 155 | 
            -
                txt.each_line do |line|
         | 
| 168 | 
            +
                @txt.each_line do |line|
         | 
| 156 169 | 
             
                    line = line.rstrip   ## note - MUST remove/strip trailing newline (spaces optional)!!!
         | 
| 157 170 |  | 
| 158 | 
            -
                    more_tokens, more_errors = _tokenize_line( line | 
| 171 | 
            +
                    more_tokens, more_errors = _tokenize_line( line )
         | 
| 159 172 |  | 
| 160 173 | 
             
                    tokens_by_line  << more_tokens   
         | 
| 161 174 | 
             
                    errors          += more_errors
         | 
| 162 175 | 
             
                end # each line
         | 
| 163 176 |  | 
| 164 | 
            -
             | 
| 165 | 
            -
             | 
| 166 | 
            -
             | 
| 167 177 | 
             
                tokens_by_line = tokens_by_line.map do |tokens|
         | 
| 168 178 | 
             
                    #############
         | 
| 169 179 | 
             
                    ## pass 1
         | 
| @@ -246,11 +256,11 @@ end   # method tokenize_with_errors | |
| 246 256 |  | 
| 247 257 |  | 
| 248 258 |  | 
| 249 | 
            -
            def _tokenize_line( line | 
| 259 | 
            +
            def _tokenize_line( line )
         | 
| 250 260 | 
             
              tokens = []
         | 
| 251 261 | 
             
              errors = []   ## keep a list of errors - why? why not?
         | 
| 252 262 |  | 
| 253 | 
            -
              puts ">#{line}<"    if debug
         | 
| 263 | 
            +
              puts ">#{line}<"    if debug?
         | 
| 254 264 |  | 
| 255 265 | 
             
              pos = 0
         | 
| 256 266 | 
             
              ## track last offsets - to report error on no match
         | 
| @@ -265,7 +275,7 @@ def _tokenize_line( line, debug: false ) | |
| 265 275 |  | 
| 266 276 |  | 
| 267 277 | 
             
              while m = @re.match( line, pos )
         | 
| 268 | 
            -
                if debug
         | 
| 278 | 
            +
                if debug?
         | 
| 269 279 | 
             
                  pp m
         | 
| 270 280 | 
             
                  puts "pos: #{pos}"
         | 
| 271 281 | 
             
                end
         | 
| @@ -274,10 +284,10 @@ def _tokenize_line( line, debug: false ) | |
| 274 284 | 
             
                if offsets[0] != pos
         | 
| 275 285 | 
             
                  ## match NOT starting at start/begin position!!!
         | 
| 276 286 | 
             
                  ##  report parse error!!!
         | 
| 277 | 
            -
                  msg =  "!! WARN - parse error - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
         | 
| 287 | 
            +
                  msg =  "!! WARN - parse error (tokenize) - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
         | 
| 278 288 | 
             
                  puts msg
         | 
| 279 289 |  | 
| 280 | 
            -
                  errors << "parse error - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]}"
         | 
| 290 | 
            +
                  errors << "parse error (tokenize) - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
         | 
| 281 291 | 
             
                  log( msg )
         | 
| 282 292 | 
             
                end
         | 
| 283 293 |  | 
| @@ -288,7 +298,7 @@ def _tokenize_line( line, debug: false ) | |
| 288 298 |  | 
| 289 299 | 
             
                pos = offsets[1]
         | 
| 290 300 |  | 
| 291 | 
            -
                pp offsets   if debug
         | 
| 301 | 
            +
                pp offsets   if debug?
         | 
| 292 302 |  | 
| 293 303 | 
             
                ##
         | 
| 294 304 | 
             
                ## note: racc requires pairs e.g. [:TOKEN, VAL]
         | 
| @@ -331,7 +341,7 @@ def _tokenize_line( line, debug: false ) | |
| 331 341 | 
             
                        when '-' then [:'-']
         | 
| 332 342 | 
             
                        when '.' then 
         | 
| 333 343 | 
             
                            ## switch back to top-level mode!!
         | 
| 334 | 
            -
                            puts "  LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE"
         | 
| 344 | 
            +
                            puts "  LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE"  if debug?
         | 
| 335 345 | 
             
                            @re = RE 
         | 
| 336 346 | 
             
                            [:'.']
         | 
| 337 347 | 
             
                        else
         | 
| @@ -352,7 +362,7 @@ def _tokenize_line( line, debug: false ) | |
| 352 362 | 
             
                    elsif m[:prop_key]
         | 
| 353 363 | 
             
                       ##  switch context  to PROP_RE
         | 
| 354 364 | 
             
                       @re = PROP_RE
         | 
| 355 | 
            -
                       puts "  ENTER PROP_RE MODE"
         | 
| 365 | 
            +
                       puts "  ENTER PROP_RE MODE"  if debug?
         | 
| 356 366 | 
             
                       [:PROP, m[:key]]
         | 
| 357 367 | 
             
                    elsif m[:text]
         | 
| 358 368 | 
             
                      [:TEXT, m[:text]]   ## keep pos - why? why not?
         | 
| @@ -462,7 +472,7 @@ def _tokenize_line( line, debug: false ) | |
| 462 472 |  | 
| 463 473 | 
             
                tokens << t    if t
         | 
| 464 474 |  | 
| 465 | 
            -
                if debug
         | 
| 475 | 
            +
                if debug?
         | 
| 466 476 | 
             
                  print ">"
         | 
| 467 477 | 
             
                  print "*" * pos
         | 
| 468 478 | 
             
                  puts "#{line[pos..-1]}<"
         | 
| @@ -475,13 +485,12 @@ def _tokenize_line( line, debug: false ) | |
| 475 485 | 
             
                puts msg
         | 
| 476 486 | 
             
                log( msg )
         | 
| 477 487 |  | 
| 478 | 
            -
                errors << "parse error - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size}"
         | 
| 488 | 
            +
                errors << "parse error (tokenize) - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size} in line >#{line}<"
         | 
| 479 489 | 
             
              end
         | 
| 480 490 |  | 
| 481 491 |  | 
| 482 492 | 
             
              [tokens,errors]
         | 
| 483 493 | 
             
            end
         | 
| 484 494 |  | 
| 485 | 
            -
             | 
| 486 | 
            -
            end  # class Parser
         | 
| 495 | 
            +
            end  # class Lexer
         | 
| 487 496 | 
             
            end # module SportDb
         |