sportdb-parser 0.5.9 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +2 -0
- data/lib/sportdb/parser/lexer.rb +47 -28
- data/lib/sportdb/parser/parser.rb +421 -344
- data/lib/sportdb/parser/racc_parser.rb +1 -1
- data/lib/sportdb/parser/racc_tree.rb +12 -5
- data/lib/sportdb/parser/token-date.rb +18 -1
- data/lib/sportdb/parser/token-minute.rb +45 -0
- data/lib/sportdb/parser/token-prop.rb +133 -0
- data/lib/sportdb/parser/token-text.rb +9 -2
- data/lib/sportdb/parser/token.rb +43 -177
- data/lib/sportdb/parser/version.rb +2 -2
- data/lib/sportdb/parser.rb +2 -0
- metadata +4 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 9af6317b144478400067502e60de2f8d6232ebf6e036b0f99b78f9c29922dba2
         | 
| 4 | 
            +
              data.tar.gz: 7066483378693f6376f9c30ec71e5d4172c817c11025dd4e669da6d581b6ad54
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: '039fdc82039d05ae8f51847a79dd77c0a657e316e8b0705a28bdf2f8e594f37531ea07a230c9e1a9133f96293975190dba070f50515d58bc9926e4ef3e8e152f'
         | 
| 7 | 
            +
              data.tar.gz: e0f6483cd26ba7ef4800ecf76efd6f05e01e3a2458dbc6b65fe6582654c8d28627facbfc8228655e361df84c43418c9583826908cdcf3d61bf06d186288c56fa
         | 
    
        data/CHANGELOG.md
    CHANGED
    
    
    
        data/Manifest.txt
    CHANGED
    
    | @@ -14,6 +14,8 @@ lib/sportdb/parser/parser.rb | |
| 14 14 | 
             
            lib/sportdb/parser/racc_parser.rb
         | 
| 15 15 | 
             
            lib/sportdb/parser/racc_tree.rb
         | 
| 16 16 | 
             
            lib/sportdb/parser/token-date.rb
         | 
| 17 | 
            +
            lib/sportdb/parser/token-minute.rb
         | 
| 18 | 
            +
            lib/sportdb/parser/token-prop.rb
         | 
| 17 19 | 
             
            lib/sportdb/parser/token-score.rb
         | 
| 18 20 | 
             
            lib/sportdb/parser/token-status.rb
         | 
| 19 21 | 
             
            lib/sportdb/parser/token-text.rb
         | 
    
        data/lib/sportdb/parser/lexer.rb
    CHANGED
    
    | @@ -247,6 +247,11 @@ def tokenize_with_errors | |
| 247 247 | 
             
                ## flatten tokens
         | 
| 248 248 | 
             
                tokens = []
         | 
| 249 249 | 
             
                tokens_by_line.each do |tok|
         | 
| 250 | 
            +
             | 
| 251 | 
            +
                     if debug?
         | 
| 252 | 
            +
                       pp tok
         | 
| 253 | 
            +
                     end
         | 
| 254 | 
            +
             | 
| 250 255 | 
             
                     tokens  += tok 
         | 
| 251 256 | 
             
                     tokens  << [:NEWLINE, "\n"]   ## auto-add newlines
         | 
| 252 257 | 
             
                end
         | 
| @@ -260,7 +265,7 @@ def _tokenize_line( line ) | |
| 260 265 | 
             
              tokens = []
         | 
| 261 266 | 
             
              errors = []   ## keep a list of errors - why? why not?
         | 
| 262 267 |  | 
| 263 | 
            -
              puts ">#{line}<"    if debug?
         | 
| 268 | 
            +
              puts "line: >#{line}<"    if debug?
         | 
| 264 269 |  | 
| 265 270 | 
             
              pos = 0
         | 
| 266 271 | 
             
              ## track last offsets - to report error on no match
         | 
| @@ -275,10 +280,10 @@ def _tokenize_line( line ) | |
| 275 280 |  | 
| 276 281 |  | 
| 277 282 | 
             
              while m = @re.match( line, pos )
         | 
| 278 | 
            -
                if debug?
         | 
| 279 | 
            -
                  pp m
         | 
| 280 | 
            -
                  puts "pos: #{pos}"
         | 
| 281 | 
            -
                end
         | 
| 283 | 
            +
            #    if debug?
         | 
| 284 | 
            +
            #      pp m
         | 
| 285 | 
            +
            #      puts "pos: #{pos}"
         | 
| 286 | 
            +
            #    end
         | 
| 282 287 | 
             
                offsets = [m.begin(0), m.end(0)]
         | 
| 283 288 |  | 
| 284 289 | 
             
                if offsets[0] != pos
         | 
| @@ -298,7 +303,7 @@ def _tokenize_line( line ) | |
| 298 303 |  | 
| 299 304 | 
             
                pos = offsets[1]
         | 
| 300 305 |  | 
| 301 | 
            -
                pp offsets   if debug?
         | 
| 306 | 
            +
            #    pp offsets   if debug?
         | 
| 302 307 |  | 
| 303 308 | 
             
                ##
         | 
| 304 309 | 
             
                ## note: racc requires pairs e.g. [:TOKEN, VAL]
         | 
| @@ -306,12 +311,8 @@ def _tokenize_line( line ) | |
| 306 311 |  | 
| 307 312 |  | 
| 308 313 | 
             
              t = if @re == PROP_RE
         | 
| 309 | 
            -
                     if m[:space]
         | 
| 310 | 
            -
                          ## skip space
         | 
| 311 | 
            -
                          nil
         | 
| 312 | 
            -
                     elsif m[:spaces]
         | 
| 313 | 
            -
                          ## skip spaces
         | 
| 314 | 
            -
                          nil
         | 
| 314 | 
            +
                     if m[:space] || m[:spaces]
         | 
| 315 | 
            +
                          nil    ## skip space(s)
         | 
| 315 316 | 
             
                     elsif m[:prop_name]
         | 
| 316 317 | 
             
                           if m[:name] == 'Y'
         | 
| 317 318 | 
             
                             [:YELLOW_CARD, m[:name]]
         | 
| @@ -339,11 +340,11 @@ def _tokenize_line( line ) | |
| 339 340 | 
             
                        when '(' then [:'(']
         | 
| 340 341 | 
             
                        when ')' then [:')']
         | 
| 341 342 | 
             
                        when '-' then [:'-']
         | 
| 342 | 
            -
             | 
| 343 | 
            -
             | 
| 344 | 
            -
             | 
| 345 | 
            -
             | 
| 346 | 
            -
             | 
| 343 | 
            +
                       # when '.' then 
         | 
| 344 | 
            +
                       #     ## switch back to top-level mode!!
         | 
| 345 | 
            +
                       #     puts "  LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE"  if debug?
         | 
| 346 | 
            +
                       #     @re = RE 
         | 
| 347 | 
            +
                       #     [:'.']
         | 
| 347 348 | 
             
                        else
         | 
| 348 349 | 
             
                          nil  ## ignore others (e.g. brackets [])
         | 
| 349 350 | 
             
                        end
         | 
| @@ -353,12 +354,8 @@ def _tokenize_line( line ) | |
| 353 354 | 
             
                         nil 
         | 
| 354 355 | 
             
                     end
         | 
| 355 356 | 
             
                  else  ## assume TOP_LEVEL (a.k.a. RE) machinery
         | 
| 356 | 
            -
                    if m[:space]
         | 
| 357 | 
            -
                       ## skip space
         | 
| 358 | 
            -
                       nil
         | 
| 359 | 
            -
                    elsif m[:spaces]
         | 
| 360 | 
            -
                       ## skip spaces
         | 
| 361 | 
            -
                       nil
         | 
| 357 | 
            +
                    if m[:space] || m[:spaces]
         | 
| 358 | 
            +
                       nil   ## skip space(s)
         | 
| 362 359 | 
             
                    elsif m[:prop_key]
         | 
| 363 360 | 
             
                       ##  switch context  to PROP_RE
         | 
| 364 361 | 
             
                       @re = PROP_RE
         | 
| @@ -397,6 +394,7 @@ def _tokenize_line( line ) | |
| 397 394 | 
             
             ## map month names
         | 
| 398 395 | 
             
             ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
         | 
| 399 396 | 
             
                        date[:y] = m[:year].to_i(10)  if m[:year]
         | 
| 397 | 
            +
                        date[:m] = m[:month].to_i(10)  if m[:month]
         | 
| 400 398 | 
             
                        date[:m] = MONTH_MAP[ m[:month_name].downcase ]   if m[:month_name]
         | 
| 401 399 | 
             
                        date[:d]  = m[:day].to_i(10)   if m[:day]
         | 
| 402 400 | 
             
                        date[:wday] = DAY_MAP[ m[:day_name].downcase ]   if m[:day_name]
         | 
| @@ -417,6 +415,8 @@ def _tokenize_line( line ) | |
| 417 415 | 
             
                        duration[:end][:wday] = DAY_MAP[ m[:day_name2].downcase ]   if m[:day_name2]
         | 
| 418 416 | 
             
                        ## note - for debugging keep (pass along) "literal" duration
         | 
| 419 417 | 
             
                        [:DURATION, [m[:duration], duration]]
         | 
| 418 | 
            +
                    elsif m[:wday]    ## standalone weekday e.g. Mo/Tu/We/etc.
         | 
| 419 | 
            +
                         [:WDAY, [m[:wday], { wday: DAY_MAP[ m[:day_name].downcase ] } ]]
         | 
| 420 420 | 
             
                    elsif m[:num]   ## fix - change to ord (for ordinal number!!!)
         | 
| 421 421 | 
             
                          ## note -  strip enclosing () and convert to integer
         | 
| 422 422 | 
             
                         [:ORD, [m[:num], { value: m[:value].to_i(10) } ]]
         | 
| @@ -454,6 +454,7 @@ def _tokenize_line( line ) | |
| 454 454 | 
             
                      case sym
         | 
| 455 455 | 
             
                      when ',' then [:',']
         | 
| 456 456 | 
             
                      when ';' then [:';']
         | 
| 457 | 
            +
                      when '/' then [:'/']
         | 
| 457 458 | 
             
                      when '@' then [:'@']
         | 
| 458 459 | 
             
                      when '|' then [:'|']
         | 
| 459 460 | 
             
                      when '[' then [:'[']
         | 
| @@ -472,11 +473,11 @@ def _tokenize_line( line ) | |
| 472 473 |  | 
| 473 474 | 
             
                tokens << t    if t
         | 
| 474 475 |  | 
| 475 | 
            -
                if debug?
         | 
| 476 | 
            -
                  print ">"
         | 
| 477 | 
            -
                  print "*" * pos
         | 
| 478 | 
            -
                  puts "#{line[pos..-1]}<"
         | 
| 479 | 
            -
                end
         | 
| 476 | 
            +
            #    if debug?
         | 
| 477 | 
            +
            #      print ">"
         | 
| 478 | 
            +
            #      print "*" * pos
         | 
| 479 | 
            +
            #      puts "#{line[pos..-1]}<"
         | 
| 480 | 
            +
            #    end
         | 
| 480 481 | 
             
              end
         | 
| 481 482 |  | 
| 482 483 | 
             
              ## check if no match in end of string
         | 
| @@ -489,6 +490,24 @@ def _tokenize_line( line ) | |
| 489 490 | 
             
              end
         | 
| 490 491 |  | 
| 491 492 |  | 
| 493 | 
            +
               ##
         | 
| 494 | 
            +
               ## if in prop mode continue if   last token is [,-]
         | 
| 495 | 
            +
               ##        otherwise change back to "standard" mode
         | 
| 496 | 
            +
               if @re == PROP_RE
         | 
| 497 | 
            +
                 if [:',', :'-'].include?( tokens[-1][0] )
         | 
| 498 | 
            +
                    ## continue/stay in PROP_RE mode
         | 
| 499 | 
            +
                    ##  todo/check - auto-add PROP_CONT token or such
         | 
| 500 | 
            +
                    ##                to help parser with possible NEWLINE
         | 
| 501 | 
            +
                    ##                  conflicts  - why? why not?
         | 
| 502 | 
            +
                 else
         | 
| 503 | 
            +
                    ## switch back to top-level mode!!
         | 
| 504 | 
            +
                    puts "  LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE"  if debug?
         | 
| 505 | 
            +
                    @re = RE 
         | 
| 506 | 
            +
                    ## note - auto-add PROP_END (<PROP_END>)
         | 
| 507 | 
            +
                    tokens << [:PROP_END, "<PROP_END>"]    
         | 
| 508 | 
            +
                 end
         | 
| 509 | 
            +
               end
         | 
| 510 | 
            +
              
         | 
| 492 511 | 
             
              [tokens,errors]
         | 
| 493 512 | 
             
            end
         | 
| 494 513 |  |