RubyGems - sportdb-parser - Versions diffs - 0.0.1 → 0.2.0 - Mend

sportdb-parser 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +2 -0
data/Manifest.txt +2 -0
data/Rakefile +6 -2
data/bin/fbt +14 -64
data/lib/sportdb/parser/lang.rb +51 -19
data/lib/sportdb/parser/linter.rb +4 -8
data/lib/sportdb/parser/opts.rb +70 -0
data/lib/sportdb/parser/outline_reader.rb +2 -6
data/lib/sportdb/parser/parser.rb +45 -27
data/lib/sportdb/parser/token-date.rb +19 -17
data/lib/sportdb/parser/token.rb +64 -51
data/lib/sportdb/parser/version.rb +24 -0
data/lib/sportdb/parser.rb +8 -4
metadata +32 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 1466b82654b4a4f0f823a96709488dedb595d08731a55abc128691e0ffe2a80b
-  data.tar.gz: 14995e94dc079ab61e77d056d15c9a5830dc573129661ca453b2892d087c2061
+  metadata.gz: c94dcd42fc13a7043f6b926ca1d947df3199877693b22e53e4f50b5aa522bf5d
+  data.tar.gz: 33eb689dcfb2bab0728c19b7d706da1556ddefafbfbcc6e424ac5bcbe3bccef6
 SHA512:
-  metadata.gz: 75c2b4f455e8bb1b5e471c39f8fa3b5069bd0bb2a808ad8b246c0f2b060c5416f9f56a3619ad7db7ac5f21a6177c762aa28ae8e9c939b03a2569cf27d34f9b81
-  data.tar.gz: 9c4f9095a61410499ae7628b1eb3295d8f456e62feae45a4c254d9157904326abf6571f3c4a04c078551b6364cd09252509f709bfeef46a569dbe202f4058460
+  metadata.gz: 97ef8d76ffa26312d66359f364588af3d7c76a3b0cebd3644b1f1ae775463defa9cb9552b267f26677c2c6f4e9b7b9fe62479dd34a7211fd1a4a3c1b5e9af830
+  data.tar.gz: ca9b56c6c02c132f3924fb40c293e90379812b830a2899e2be02c1d6469a278456c6d68db7f73d5f5fd69b372c958953e3fefd829ac1120cf56b0944176a2b87

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,5 @@
+### 0.2.0
 ### 0.0.1 / 2024-07-12
 * Everything is new. First release.

data/Manifest.txt CHANGED Viewed

@@ -6,9 +6,11 @@ bin/fbt
 lib/sportdb/parser.rb
 lib/sportdb/parser/lang.rb
 lib/sportdb/parser/linter.rb
+lib/sportdb/parser/opts.rb
 lib/sportdb/parser/outline_reader.rb
 lib/sportdb/parser/parser.rb
 lib/sportdb/parser/token-date.rb
 lib/sportdb/parser/token-score.rb
 lib/sportdb/parser/token-text.rb
 lib/sportdb/parser/token.rb
+lib/sportdb/parser/version.rb

data/Rakefile CHANGED Viewed

@@ -1,9 +1,10 @@
 require 'hoe'
+require './lib/sportdb/parser/version.rb'
 Hoe.spec 'sportdb-parser' do
-  self.version = '0.0.1'
+  self.version = SportDb::Module::Parser::VERSION
   self.summary = "sportdb-parser - football.txt match parser (& tokenizer)"
   self.description = summary
@@ -19,7 +20,10 @@ Hoe.spec 'sportdb-parser' do
   self.licenses = ['Public Domain']
-  self.extra_deps = []
+  self.extra_deps = [
+       ['cocos', '>= 0.4.0'],
+       ['season-formats'],
+  ]
   self.spec_extras = {
     required_ruby_version: '>= 2.2.2'

data/bin/fbt CHANGED Viewed

@@ -3,52 +3,20 @@
 ## tip: to test run:
 ##   ruby -I ./lib bin/fbt
+## our own code
 require 'sportdb/parser'
 require 'optparse'
 ##
-## read textfile
+## read textfile
 ##   and dump tokens
 ##
 ##   fbt  ../openfootball/.../euro.txt
-  SEASON_RE = %r{ (?:
-                       \d{4}-\d{2}
-                     | \d{4}(--[a-z0-9_-]+)?
-                    )
-                  }x
-    SEASON = SEASON_RE.source    ## "inline" helper for embedding in other regexes - keep? why? why not?
-    ## note: if pattern includes directory add here
-    ##     (otherwise move to more "generic" datafile) - why? why not?
-    MATCH_RE = %r{ (?: ^|/ )      # beginning (^) or beginning of path (/)
-                       #{SEASON}
-                     /[a-z0-9_-]+\.txt$  ## txt e.g /1-premierleague.txt
-                }x
-def find( path, pattern=MATCH_RE )
-    datafiles = []
-    ## check all txt files
-    ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
-    candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
-    ## pp candidates
-    candidates.each do |candidate|
-      datafiles << candidate    if pattern.match( candidate )
-    end
-    ## pp datafiles
-    datafiles
- end
  args = ARGV
@@ -64,7 +32,7 @@ def find( path, pattern=MATCH_RE )
   parser.on( "--verbose", "--debug",
-               "turn on verbose / debug output (default: #{opts[:debug]} )" ) do |debug|
+               "turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
     opts[:debug] = debug
   end
@@ -85,28 +53,6 @@ p args
-def expand_args( args )
-    paths = []
-    args.each do |arg|
-        ## check if directory
-        if Dir.exist?( arg )
-            datafiles = find( arg )
-            puts
-            puts "  found #{datafiles.size} match txt datafiles in #{arg}"
-            pp datafiles
-            paths += datafiles
-        else
-              ## assume it's a file
-            paths << arg
-        end
-    end
-    paths
-end
 paths =  if args.empty?
             [
               '../../../openfootball/euro/2020--europe/euro.txt',
@@ -115,8 +61,8 @@ paths =  if args.empty?
          else
             ## check for directories
             ##   and auto-expand
-            expand_args( args )
+            SportDb::Parser::Opts.expand_args( args )
          end
@@ -125,18 +71,22 @@ SportDb::Parser::Linter.debug = true    if opts[:debug]
 linter = SportDb::Parser::Linter.new
+errors = []
 paths.each_with_index do |path,i|
     puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
     linter.read( path, parse: !opts[:metal] )
+    errors += linter.errors    if linter.errors?
 end
-if linter.errors?
+if errors.size > 0
     puts
-    pp linter.errors
-    puts "!!   #{linter.errors.size} parse error(s) in #{paths.size} datafiles(s)"
+    pp errors
+    puts
+    puts "!!   #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
 else
+    puts
     puts "OK   no parse errors found in #{paths.size} datafile(s)"
 end

data/lib/sportdb/parser/lang.rb CHANGED Viewed

@@ -15,7 +15,7 @@ class Parser
 GROUP_RE = %r{^
                 Group [ ]
-                   (?<key>[a-z0-9]+)
+                   (?<key>[a-z0-9]+)
               $}ix
 def is_group?( text )
    ## use regex for match
@@ -28,42 +28,68 @@ end
 ROUND_RE = %r{^(
    # round  - note - requiers number e.g. round 1,2, etc.
+   #   note - use 1-9 regex (cannot start with 0) - why? why not?
+   #             make week 01 or round 01 or matchday 01 possible?
       (?: (?: Round |
               Matchday |
               Week
            )
-           [ ] [0-9]+
+           [ ] [1-9][0-9]*
       )
        |
+   ##  starting with qual(ification)
+   ## Qual. Round 1 / Qual. Round 2 / Qual. Round 3
+     (?:  Qual \. [ ]
+          Round
+           [ ] [1-9][0-9]*
+      )
+       |
+   ## 1. Round / 2. Round / 3. Round / etc.
+   ##  Play-off Round
+      (?:
+           (?: [1-9][0-9]* \.
+                |
+                Play-?off
+           )
+             [ ] Round
+       )
+       |
+  ## starting with preliminary
+     (?:  Preliminary  [ ]
+           (?:  Semi-?finals |
+                Final
+           )
+     )
+     |
    # more (kockout) rounds
    # playoffs  - playoff, play-off, play-offs
-        (?: Play-?offs?
+        (?: Play-?offs?
            (?: [ ]for[ ]quarter-?finals )?
         )
-        |
+        |
    # round32
-        (?: Round[ ]of[ ]32 |
+        (?: Round[ ]of[ ]32 |
             Last[ ]32 )
           |
-   # round16
+   # round16
         (?: Round[ ]of[ ]16 |
-            Last[ ]16 |
+            Last[ ]16 |
             8th[ ]finals )
            |
    # fifthplace
          (?:
-             (?: (Fifth|5th)[ -]place
+             (?: (Fifth|5th)[ -]place
                   (?: [ ] (?: match|play-?off|final ))?
               ) |
              (?: Match[ ]for[ ](?: fifth|5th )[ -]place )
          )
           |
    # thirdplace
-          (?:
-              (?: (Third|3rd)[ -]place
-                     (?: [ ] (?: match|play-?off|final ))?
+          (?:
+              (?: (Third|3rd)[ -]place
+                     (?: [ ] (?: match|play-?off|final ))?
                ) |
-              (?: Match[ ]for[ ](?: third|3rd )[ -]place )
+              (?: Match[ ]for[ ](?: third|3rd )[ -]place )
            )
            |
    # quarterfinals
@@ -72,18 +98,24 @@ ROUND_RE = %r{^(
               Quarters |
               Last[ ]8
           )
-          |
+          |
    # semifinals
-        (?:
+        (?:
              Semi-?finals? |
              Semis |
              Last[ ]4
         )
         |
    # final
-         Finals?
-        )$}ix
+         Finals?
+         |
+    ## add replays
+    ##  Final Replay
+     (?:
+        Final
+        [ ] Replay
+      )
+)$}ix
 def is_round?( text )
@@ -95,9 +127,9 @@ end
 ##
 LEG_RE = %r{^
   # leg1
-     (?: 1st|First)[ ]leg
+     (?: 1st|First)[ ]leg
      |
-  # leg2
+  # leg2
      (?: 2nd|Second)[ ]leg
 $}ix

data/lib/sportdb/parser/linter.rb CHANGED Viewed

@@ -10,12 +10,6 @@ def self.debug=(value) @@debug = value; end
 def self.debug?() @@debug ||= false; end  ## note: default is FALSE
 def debug?()  self.class.debug?; end
-## keep typed - why? why not?
-## - used anywhere?
-def self.typed=(value) @@typed = value; end
-def self.typed?() @@typed ||= true; end   ## note: default is TRUE
-def typed?()  self.class.typed?; end
 attr_reader :errors
@@ -58,6 +52,9 @@ def errors?() @errors.size > 0; end
 ## parse - false (default) - tokenize (only)
 ##       - true            - tokenize & parse
 def read( path, parse: false )
+  ## note: every (new) read call - resets errors list to empty
+  @errors = []
   nodes = OutlineReader.read( path )
   ##  process nodes
@@ -94,9 +91,8 @@ def read( path, parse: false )
         ## skip new (experimental attrib syntax)
-        m = nil
         if attrib_found == false &&
-            m=ATTRIB_RE.match( line )
+            ATTRIB_RE.match?( line )
           ## note: check attrib regex AFTER group def e.g.:
           ##         Group A:
           ##         Group B:  etc.

data/lib/sportdb/parser/opts.rb ADDED Viewed

@@ -0,0 +1,70 @@
+module SportDb
+class Parser
+###
+## note - Opts Helpers for now nested inside Parser - keep here? why? why not?
+class Opts
+    SEASON_RE = %r{ (?:
+                       \d{4}-\d{2}
+                     | \d{4}(--[a-z0-9_-]+)?
+                    )
+                  }x
+    SEASON = SEASON_RE.source    ## "inline" helper for embedding in other regexes - keep? why? why not?
+    ## note: if pattern includes directory add here
+    ##     (otherwise move to more "generic" datafile) - why? why not?
+    MATCH_RE = %r{ (?: ^|/ )      # beginning (^) or beginning of path (/)
+                       #{SEASON}
+                     /[a-z0-9_-]+\.txt$  ## txt e.g /1-premierleague.txt
+                }x
+def self.find( path )
+    datafiles = []
+    ## note: normalize path - use File.expand_path ??
+    ##    change all backslash to slash for now
+    ## path = path.gsub( "\\", '/' )
+    path = File.expand_path( path )
+    ## check all txt files
+    ## note: incl. files starting with dot (.)) as candidates
+    ##     (normally excluded with just *)
+    candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
+    ## pp candidates
+    candidates.each do |candidate|
+      datafiles << candidate    if MATCH_RE.match( candidate )
+    end
+    ## pp datafiles
+    datafiles
+end
+def self.expand_args( args )
+    paths = []
+    args.each do |arg|
+        ## check if directory
+        if Dir.exist?( arg )
+            datafiles = find( arg )
+            puts
+            puts "  found #{datafiles.size} match txt datafiles in #{arg}"
+            pp datafiles
+            paths += datafiles
+        else
+              ## assume it's a file
+            paths << arg
+        end
+    end
+    paths
+end
+end  # class Opts
+end   # class Parser
+end   # module SportDb

data/lib/sportdb/parser/outline_reader.rb CHANGED Viewed

@@ -1,8 +1,4 @@
-###
-## todo/fix -  move to sportdb-parser - why? why not? !!!!!!
-##
 module SportDb
@@ -10,7 +6,7 @@ class OutlineReader
   def self.debug=(value) @@debug = value; end
   def self.debug?() @@debug ||= false; end
-  def debug?()  self.class.debug?; end
+  def debug?()  self.class.debug?; end
@@ -73,7 +69,7 @@ class OutlineReader
            start_para = true
            heading_marker = m[:marker]
-           heading_level  = m[:marker].length   ## count number of = for heading level
+           heading_level  = heading_marker.length   ## count number of = for heading level
            heading        = m[:text].strip
            puts "heading #{heading_level} >#{heading}<"   if debug?

data/lib/sportdb/parser/parser.rb CHANGED Viewed

@@ -1,24 +1,24 @@
-module SportDb
+module SportDb
 class Parser
 ## transforms
 ##
 ##  Netherlands  1-2 (1-1)   England
-##   =>  text => team
-##       score|vs
+##   =>  text => team
+##       score|vs
 ##       text => team
 ## token iter/find better name
 ##  e.g. TokenBuffer/Scanner or such ??
-class Tokens
+class Tokens
     def initialize( tokens )
         @tokens = tokens
         @pos = 0
     end
-    def pos()  @pos; end
+    def pos()  @pos; end
     def eos?() @pos >= @tokens.size; end
@@ -47,17 +47,17 @@ class Tokens
     ## return token type  (e.g. :text, :num, etc.)
     def cur()           peek(0); end
     ## return content (assumed to be text)
-    def text(offset=0)
+    def text(offset=0)
         ## raise error - why? why not?
         ##   return nil?
         if peek( offset ) != :text
             raise ArgumentError, "text(#{offset}) - token not a text type"
         end
-        @tokens[@pos+offset][1]
+        @tokens[@pos+offset][1]
     end
-    def peek(offset=1)
+    def peek(offset=1)
         ## return nil if eos
         if @pos+offset >= @tokens.size
             nil
@@ -66,7 +66,7 @@ class Tokens
         end
     end
-    ## note - returns complete token
+    ## note - returns complete token
     def next
        # if @pos >= @tokens.size
        #     raise ArgumentError, "end of array - #{@pos} >= #{@tokens.size}"
@@ -81,7 +81,7 @@ class Tokens
     def collect( &blk )
         tokens = []
         loop do
-          break if eos?
+          break if eos?
           tokens <<  if block_given?
                         blk.call( self.next )
                      else
@@ -106,7 +106,7 @@ def parse_with_errors( line, debug: false )
     errors += token_errors
 #############
-## pass 1
+## pass 1
 ##   replace all texts with keyword matches (e.g. group, round, leg, etc.)
      tokens = tokens.map do |t|
                       if t[0] == :text
@@ -129,24 +129,40 @@ def parse_with_errors( line, debug: false )
     ## puts "tokens:"
     ## pp tokens
-## transform tokens into (parse tree/ast) nodes
+## transform tokens into (parse tree/ast) nodes
     nodes = []
     buf = Tokens.new( tokens )
     ## pp buf
-    loop do
-          if buf.pos == 0
-            ## check for
-            ##    group def or round def
-            if buf.match?( :round, :'|' )    ## assume round def (change round to round_def)
+    loop do
+          break if buf.eos?
+          ## simplify - remove separator for round + leg pair
+          ##     e.g.  Round of 16, 1st Leg
+          ##     allow Round of 16 - 1st Leg  too - why? why not?
+          if buf.match?( :round, [:',', :'|',
+                                    :'-',
+                                    :vs,   ### fix - change parser to issue :'-' only for (-) not :vs!!!
+                                    ], :leg )
+                    nodes << [:round, buf.next[1]]
+                    buf.next  ## swallow separator
+                    nodes << [:leg, buf.next[1]]
+                    next
+          end
+          if buf.pos == 0   ## MUST start line
+            ## check for
+            ##    group def or round def
+            if buf.match?( :round, :'|', [:date, :duration] )    ## assume round def (change round to round_def)
                       nodes << [:round_def, buf.next[1]]
                       buf.next ## swallow pipe
                       nodes += buf.collect
                       break
             end
-            if buf.match?( :group, :'|' )    ## assume group def (change group to group_def)
+            if buf.match?( :group, :'|', :text )    ## assume group def (change group to group_def)
                       nodes << [:group_def, buf.next[1]]
                       buf.next ## swallow pipe
                       ## change all text to team
@@ -154,11 +170,15 @@ def parse_with_errors( line, debug: false )
                                 t[0] == :text ? [:team, t[1]] : t
                                }
                       break
-            end
+            end
           end
-          if buf.match?( :text, [:score, :vs], :text )
+          if buf.match?( :text, :'-', :text )  ## hacky? convert "generic" :- to :vs
+             nodes << [:team, buf.next[1]]     ##    keep this rule/option - why? why not?
+             nodes << [:vs]
+             nodes << [:team, buf.next[1]]
+          elsif buf.match?( :text, [:score, :vs], :text )
              nodes << [:team, buf.next[1]]
              nodes << buf.next
              nodes << [:team, buf.next[1]]
@@ -170,14 +190,12 @@ def parse_with_errors( line, debug: false )
                ##   only change text to geo
               nodes += buf.collect  { |t|
                            t[0] == :text ? [:geo, t[1]] : t
-                            }
+                            }
               break
           else
              ## pass through
              nodes << buf.next
           end
-          break if buf.eos?
     end
     [nodes,errors]
@@ -192,5 +210,5 @@ end
 end #  class Parser
-end  # module SportDb
+end  # module SportDb

data/lib/sportdb/parser/token-date.rb CHANGED Viewed

@@ -37,22 +37,24 @@ def self.build_names( lines )
 end
+def self.build_map( lines, downcase: false )
+   ## note: downcase name!!!
+  ## build a lookup map that maps the word to the index (line no) plus 1 e.g.
+  ##  {"january" => 1,  "jan" => 1,
+  ##   "february" => 2, "feb" => 2,
+  ##   "march" => 3,    "mar" => 3,
+  ##   "april" => 4,    "apr" => 4,
+  ##   "may" => 5,
+  ##   "june" => 6,     "jun" => 6, ...
+  lines.each_with_index.reduce( {} ) do |h,(line,i)|
+    line.each do |name|
+       h[ downcase ? name.downcase : name ] = i+1
+    end  ## note: start mapping with 1 (and NOT zero-based, that is, 0)
+    h
+  end
+end
-## add normalize option (for downcase) - why? why not?
-def self.build_map( lines )
-    ## note: downcase name!!!
-   ## build a lookup map that maps the word to the index (line no) plus 1 e.g.
-   ##  {"january" => 1,  "jan" => 1,
-   ##   "february" => 2, "feb" => 2,
-   ##   "march" => 3,    "mar" => 3,
-   ##   "april" => 4,    "apr" => 4,
-   ##   "may" => 5,
-   ##   "june" => 6,     "jun" => 6, ...
-   lines.each_with_index.reduce( {} ) do |h,(line,i)|
-     line.each { |name| h[ name.downcase ] = i+1 }  ## note: start mapping with 1 (and NOT zero-based, that is, 0)
-     h
-   end
- end
 MONTH_LINES = parse_names( <<TXT )
@@ -72,7 +74,7 @@ TXT
 MONTH_NAMES = build_names( MONTH_LINES )
 # pp MONTH_NAMES
-MONTH_MAP   = build_map( MONTH_LINES )
+MONTH_MAP   = build_map( MONTH_LINES, downcase: true )
 # pp MONTH_MAP
@@ -89,7 +91,7 @@ TXT
 DAY_NAMES = build_names( DAY_LINES )
 # pp DAY_NAMES
-DAY_MAP   = build_map( DAY_LINES )
+DAY_MAP   = build_map( DAY_LINES, downcase: true )
 # pp DAY_MAP

data/lib/sportdb/parser/token.rb CHANGED Viewed

@@ -1,6 +1,6 @@
-module SportDb
+module SportDb
 class Parser
@@ -15,7 +15,7 @@ TIME_RE = %r{
                  (?: :|\.|h )
               (?<minute>\d{2})
               \b
-    )
+    )
 }ix
@@ -28,7 +28,7 @@ TIME_RE = %r{
 # (CEST/UTC+2)  - central european summer time  - daylight saving time (DST).
 # (EET/UTC+1)  - eastern european time
 # (EEST/UTC+2)  - eastern european summer time  - daylight saving time (DST).
-#
+#
 # UTC+3
 # UTC+4
 # UTC+0
@@ -45,7 +45,7 @@ TIME_RE = %r{
 TIMEZONE_RE = %r{
    ## e.g. (UTC-2) or (CEST/UTC-2) etc.
-   (?<timezone>
+   (?<timezone>
       \(
            ## optional "local" timezone name eg. BRT or CEST etc.
            (?:  [a-z]+
@@ -63,28 +63,28 @@ TIMEZONE_RE = %r{
 BASICS_RE = %r{
     ## e.g. (51) or (1) etc.  - limit digits of number???
-    (?<num> \(  (?<value>\d+) \) )
+    (?<num> \(  (?<value>\d+) \) )
        |
-    (?<vs>
-       (?<=[ ])	# Positive lookbehind for space
-       (?:
+    (?<vs>
+       (?<=[ ])	# Positive lookbehind for space
+       (?:
           vs\.?|   ## allow optional dot (eg. vs. v.)
           v\.?|
           -
        )   # not bigger match first e.g. vs than v etc.
        (?=[ ])   # positive lookahead for space
-    )
-       |
+    )
+       |
     (?<none>
-       (?<=[ \[]|^)	 # Positive lookbehind for space or [
+       (?<=[ \[]|^)	 # Positive lookbehind for space or [
            -
         (?=[ ]*;)   # positive lookahead for space
     )
        |
     (?<spaces> [ ]{2,}) |
-    (?<space>  [ ])
+    (?<space>  [ ])
         |
-    (?<sym>[;,@|\[\]])
+    (?<sym>[;,@|\[\]])
 }ix
@@ -94,13 +94,13 @@ MINUTE_RE = %r{
            (?<value>\d{1,3})      ## constrain numbers to 0 to 999!!!
         (?: \+
             (?<value2>\d{1,3})
-        )?
+        )?
         '     ## must have minute marker!!!!
      )
 }ix
-##  (match) status
+##  (match) status
 ##    note: english usage - cancelled (in UK), canceled (in US)
 ##
 ##  add more variants - why? why not?
@@ -115,30 +115,30 @@ STATUS_RE = %r{
                |
             postponed
                |
-            awarded|awd\.
+            awarded|awd\.
                |
-            replay
+            replay
          )
    (?=[ \]]|$)
      )}ix
 ## todo/check:  remove loakahead assertion here - why require space?
-## note: \b works only after non-alphanum
-##          to make it work with awd. (dot) "custom" lookahead neeeded
+## note: \b works only after non-alphanum
+##          to make it work with awd. (dot) "custom" lookahead neeeded
 ##   goal types
-# (pen.) or (pen) or (p.) or (p)
+# (pen.) or (pen) or (p.) or (p)
 ## (o.g.) or (og)
 GOAL_PEN_RE = %r{
-   (?<pen> \(
-           (?:pen|p)\.?
+   (?<pen> \(
+           (?:pen|p)\.?
            \)
     )
 }ix
 GOAL_OG_RE = %r{
-   (?<og> \(
-          (?:og|o\.g\.)
+   (?<og> \(
+          (?:og|o\.g\.)
           \)
    )
 }ix
@@ -158,11 +158,11 @@ RE = Regexp.union(   STATUS_RE,
 def log( msg )
-   ## append msg to ./logs.txt
+   ## append msg to ./logs.txt
    ##     use ./errors.txt - why? why not?
    File.open( './logs.txt', 'a:utf-8' ) do |f|
      f.write( msg )
-     f.write( "\n" )
+     f.write( "\n" )
    end
 end
@@ -176,7 +176,7 @@ def tokenize_with_errors( line, typed: false,
   puts ">#{line}<"    if debug
   pos = 0
-  ## track last offsets - to report error on no match
+  ## track last offsets - to report error on no match
   ##   or no match in end of string
   offsets = [0,0]
   m = nil
@@ -184,7 +184,7 @@ def tokenize_with_errors( line, typed: false,
   while m = RE.match( line, pos )
     if debug
       pp m
-      puts "pos: #{pos}"
+      puts "pos: #{pos}"
     end
     offsets = [m.begin(0), m.end(0)]
@@ -213,10 +213,10 @@ def tokenize_with_errors( line, typed: false,
         elsif m[:spaces]
            ## skip spaces
            nil
-        elsif m[:text]
+        elsif m[:text]
           [:text, m[:text]]   ## keep pos - why? why not?
         elsif m[:status]   ## (match) status e.g. cancelled, awarded, etc.
-          [:status, m[:status]]
+          [:status, m[:status]]
         elsif m[:time]
           if typed
               ## unify to iso-format
@@ -230,7 +230,7 @@ def tokenize_with_errors( line, typed: false,
               if (hour >= 0 && hour <= 24) &&
                  (minute >=0 && minute <= 59)
                ## note - for debugging keep (pass along) "literal" time
-               ##   might use/add support for am/pm later
+               ##   might use/add support for am/pm later
                [:time, m[:time], {h:hour,m:minute}]
               else
                  raise ArgumentError, "parse error - time >#{m[:time]}< out-of-range"
@@ -241,54 +241,68 @@ def tokenize_with_errors( line, typed: false,
         elsif m[:date]
           if typed
             date = {}
-=begin
+=begin
             ((?<day_name>#{DAY_NAMES})
             [ ]
-       )?
+       )?
        (?<month_name>#{MONTH_NAMES})
            (?: \/|[ ] )
        (?<day>\d{1,2})
        ## optional year
        (  [ ]
           (?<year>\d{4})
-       )?
+       )?
 =end
  ## map month names
  ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
-            date[:y] = m[:year].to_i(10)  if m[:year]
+            date[:y] = m[:year].to_i(10)  if m[:year]
             date[:m] = MONTH_MAP[ m[:month_name].downcase ]   if m[:month_name]
             date[:d]  = m[:day].to_i(10)   if m[:day]
             date[:wday] = DAY_MAP[ m[:day_name].downcase ]   if m[:day_name]
-            ## note - for debugging keep (pass along) "literal" date
-            [:date, m[:date], date]
+            ## note - for debugging keep (pass along) "literal" date
+            [:date, m[:date], date]
           else
             [:date, m[:date]]
           end
         elsif m[:timezone]
           [:timezone, m[:timezone]]
         elsif m[:duration]
-          [:duration, m[:duration]]
+          if typed
+            duration = { start: {}, end: {}}
+            duration[:start][:y] = m[:year1].to_i(10)  if m[:year1]
+            duration[:start][:m] = MONTH_MAP[ m[:month_name1].downcase ]   if m[:month_name1]
+            duration[:start][:d]  = m[:day1].to_i(10)   if m[:day1]
+            duration[:start][:wday] = DAY_MAP[ m[:day_name1].downcase ]   if m[:day_name1]
+            duration[:end][:y] = m[:year2].to_i(10)  if m[:year2]
+            duration[:end][:m] = MONTH_MAP[ m[:month_name2].downcase ]   if m[:month_name2]
+            duration[:end][:d]  = m[:day2].to_i(10)   if m[:day2]
+            duration[:end][:wday] = DAY_MAP[ m[:day_name2].downcase ]   if m[:day_name2]
+            ## note - for debugging keep (pass along) "literal" duration
+            [:duration, m[:duration], duration]
+          else
+            [:duration, m[:duration]]
+          end
         elsif m[:num]
           if typed
               ## note -  strip enclosing () and convert to integer
              [:num, m[:value].to_i(10)]
-          else
+          else
              [:num, m[:num]]
           end
         elsif m[:score]
           if typed
               score = {}
               ## check for pen
-              score[:p] = [m[:p1].to_i(10),
+              score[:p] = [m[:p1].to_i(10),
                            m[:p2].to_i(10)]  if m[:p1] && m[:p2]
-              score[:et] = [m[:et1].to_i(10),
+              score[:et] = [m[:et1].to_i(10),
                             m[:et2].to_i(10)]  if m[:et1] && m[:et2]
-              score[:ft] = [m[:ft1].to_i(10),
+              score[:ft] = [m[:ft1].to_i(10),
                             m[:ft2].to_i(10)]  if m[:ft1] && m[:ft2]
-              score[:ht] = [m[:ht1].to_i(10),
+              score[:ht] = [m[:ht1].to_i(10),
                             m[:ht2].to_i(10)]  if m[:ht1] && m[:ht2]
-            ## note - for debugging keep (pass along) "literal" score
+            ## note - for debugging keep (pass along) "literal" score
             [:score, m[:score], score]
           else
             [:score, m[:score]]
@@ -298,7 +312,7 @@ def tokenize_with_errors( line, typed: false,
               minute = {}
               minute[:m]      = m[:value].to_i(10)
               minute[:offset] = m[:value2].to_i(10)   if m[:value2]
-             ## note - for debugging keep (pass along) "literal" minute
+             ## note - for debugging keep (pass along) "literal" minute
              [:minute, m[:minute], minute]
           else
              [:minute, m[:minute]]
@@ -318,16 +332,16 @@ def tokenize_with_errors( line, typed: false,
           when ',' then [:',']
           when ';' then [:';']
           when '@' then [:'@']
-          when '|' then [:'|']
+          when '|' then [:'|']
           else
             nil  ## ignore others (e.g. brackets [])
           end
         else
-          ## report error
+          ## report error
           nil
         end
-    tokens << t    if t
+    tokens << t    if t
     if debug
       print ">"
@@ -346,7 +360,7 @@ def tokenize_with_errors( line, typed: false,
   end
-  [tokens,errors]
+  [tokens,errors]
 end
@@ -360,5 +374,4 @@ end
 end  # class Parser
-end # module SportDb
+end # module SportDb

data/lib/sportdb/parser/version.rb ADDED Viewed

@@ -0,0 +1,24 @@
+module SportDb
+  module Module
+    module Parser
+  MAJOR = 0    ## todo: namespace inside version or something - why? why not??
+  MINOR = 2
+  PATCH = 0
+  VERSION = [MAJOR,MINOR,PATCH].join('.')
+  def self.version
+    VERSION
+  end
+  def self.banner
+    "sportdb-parser/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
+  end
+  def self.root
+    File.expand_path( File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) )
+  end
+    end   # module Parser
+  end
+end

data/lib/sportdb/parser.rb CHANGED Viewed

@@ -1,3 +1,7 @@
+## pulls in
+require 'cocos'
+require 'season/formats'  # e.g. Season() support machinery
 ####
@@ -11,7 +15,7 @@
 ##  text - change text to name - why? why not?
+require_relative 'parser/version'
 require_relative 'parser/token-score'
 require_relative 'parser/token-date'
 require_relative 'parser/token-text'
@@ -23,6 +27,7 @@ require_relative 'parser/parser'
 ## more
 require_relative 'parser/outline_reader'
 require_relative 'parser/linter'
+require_relative 'parser/opts'
 ###
@@ -31,7 +36,7 @@ require_relative 'parser/linter'
 =begin
 module SportDb
    def self.parser() @@parser ||= Parser.new; end
-   def self.parse( ... )
+   def self.parse( ... )
    end
    def self.tokenize( ... )
    end
@@ -39,6 +44,5 @@ end  # module SportDb
 =end
+puts SportDb::Module::Parser.banner    # say hello

metadata CHANGED Viewed

@@ -1,15 +1,43 @@
 --- !ruby/object:Gem::Specification
 name: sportdb-parser
 version: !ruby/object:Gem::Version
-  version: 0.0.1
+  version: 0.2.0
 platform: ruby
 authors:
 - Gerald Bauer
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2024-07-14 00:00:00.000000000 Z
+date: 2024-08-22 00:00:00.000000000 Z
 dependencies:
+- !ruby/object:Gem::Dependency
+  name: cocos
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 0.4.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 0.4.0
+- !ruby/object:Gem::Dependency
+  name: season-formats
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: rdoc
   requirement: !ruby/object:Gem::Requirement
@@ -62,12 +90,14 @@ files:
 - lib/sportdb/parser.rb
 - lib/sportdb/parser/lang.rb
 - lib/sportdb/parser/linter.rb
+- lib/sportdb/parser/opts.rb
 - lib/sportdb/parser/outline_reader.rb
 - lib/sportdb/parser/parser.rb
 - lib/sportdb/parser/token-date.rb
 - lib/sportdb/parser/token-score.rb
 - lib/sportdb/parser/token-text.rb
 - lib/sportdb/parser/token.rb
+- lib/sportdb/parser/version.rb
 homepage: https://github.com/sportdb/sport.db
 licenses:
 - Public Domain