RubyGems - sportdb-parser - Versions diffs - 0.2.0 → 0.2.2 - Mend

sportdb-parser 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +1 -1
data/Manifest.txt +0 -4
data/Rakefile +1 -1
data/lib/sportdb/parser/lang.rb +24 -7
data/lib/sportdb/parser/token-date.rb +128 -21
data/lib/sportdb/parser/version.rb +1 -1
data/lib/sportdb/parser.rb +0 -5
metadata +4 -9
data/bin/fbt +0 -94
data/lib/sportdb/parser/linter.rb +0 -149
data/lib/sportdb/parser/opts.rb +0 -70
data/lib/sportdb/parser/outline_reader.rb +0 -97

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: c94dcd42fc13a7043f6b926ca1d947df3199877693b22e53e4f50b5aa522bf5d
-  data.tar.gz: 33eb689dcfb2bab0728c19b7d706da1556ddefafbfbcc6e424ac5bcbe3bccef6
+  metadata.gz: 0c9225b21f400b9f9cced2052c3062f41a091ed81d3d4239164c9652f53ebc6e
+  data.tar.gz: f7250eaa21324962df27e7cdd397857afa570c610f00c80c31e5105e40964002
 SHA512:
-  metadata.gz: 97ef8d76ffa26312d66359f364588af3d7c76a3b0cebd3644b1f1ae775463defa9cb9552b267f26677c2c6f4e9b7b9fe62479dd34a7211fd1a4a3c1b5e9af830
-  data.tar.gz: ca9b56c6c02c132f3924fb40c293e90379812b830a2899e2be02c1d6469a278456c6d68db7f73d5f5fd69b372c958953e3fefd829ac1120cf56b0944176a2b87
+  metadata.gz: 471c938c233d8f81d7a0fd5e4470a27a52486906764816b6c35ea3d88e19650c81302fd5ff9ee30b85d3a8e9f81ada8eef20b49bd3de924c7238acb106ba6082
+  data.tar.gz: 24d1cf3846404859ad7e751895325b256321d43e2881413fda6325c744ca0c31b52ef2032a9dfc8e56e67d7a06df54a6d2780a297982440b8e40b7055fe06c26

data/CHANGELOG.md CHANGED Viewed

@@ -1,4 +1,4 @@
-### 0.2.0
+### 0.2.2
 ### 0.0.1 / 2024-07-12

data/Manifest.txt CHANGED Viewed

@@ -2,12 +2,8 @@ CHANGELOG.md
 Manifest.txt
 README.md
 Rakefile
-bin/fbt
 lib/sportdb/parser.rb
 lib/sportdb/parser/lang.rb
-lib/sportdb/parser/linter.rb
-lib/sportdb/parser/opts.rb
-lib/sportdb/parser/outline_reader.rb
 lib/sportdb/parser/parser.rb
 lib/sportdb/parser/token-date.rb
 lib/sportdb/parser/token-score.rb

data/Rakefile CHANGED Viewed

@@ -26,6 +26,6 @@ Hoe.spec 'sportdb-parser' do
   ]
   self.spec_extras = {
-    required_ruby_version: '>= 2.2.2'
+    required_ruby_version: '>= 3.1.0'
   }
 end

data/lib/sportdb/parser/lang.rb CHANGED Viewed

@@ -27,6 +27,12 @@ end
 ROUND_RE = %r{^(
+   ## add special case for group play-off rounds!
+   ##  group 2 play-off   (e.g. worldcup 1954, 1958)
+     (?:   Group [ ] [a-z0-9]+ [ ]
+           Play-?offs?
+     )
+        |
    # round  - note - requiers number e.g. round 1,2, etc.
    #   note - use 1-9 regex (cannot start with 0) - why? why not?
    #             make week 01 or round 01 or matchday 01 possible?
@@ -46,17 +52,23 @@ ROUND_RE = %r{^(
        |
    ## 1. Round / 2. Round / 3. Round / etc.
    ##  Play-off Round
+   ##  First Round
+   ##  Final Round   (e.g. Worldcup 1950)
       (?:
-           (?: [1-9][0-9]* \.
-                |
-                Play-?off
+           (?: [1-9][0-9]* \.  |
+                Play-?off   |
+                1st | First   |
+                2nd | Second  |
+                Final
            )
              [ ] Round
        )
        |
   ## starting with preliminary
+  #   e.g.  Preliminary round
      (?:  Preliminary  [ ]
-           (?:  Semi-?finals |
+           (?:  Round |
+                Semi-?finals |
                 Final
            )
      )
@@ -110,10 +122,15 @@ ROUND_RE = %r{^(
          Finals?
          |
     ## add replays
-    ##  Final Replay
+    ##  e.g. Final Replay
+    ##       Quarter-finals replays
+    ##       First round replays
      (?:
-        Final
-        [ ] Replay
+        (?: First [ ] Round |
+            Quarter-?finals? |
+            Finals?
+         )
+        [ ] Replays?
       )
 )$}ix

data/lib/sportdb/parser/token-date.rb CHANGED Viewed

@@ -1,6 +1,6 @@
-module SportDb
+module SportDb
 class Parser
 def self.parse_names( txt )
@@ -47,8 +47,8 @@ def self.build_map( lines, downcase: false )
   ##   "may" => 5,
   ##   "june" => 6,     "jun" => 6, ...
   lines.each_with_index.reduce( {} ) do |h,(line,i)|
-    line.each do |name|
-       h[ downcase ? name.downcase : name ] = i+1
+    line.each do |name|
+       h[ downcase ? name.downcase : name ] = i+1
     end  ## note: start mapping with 1 (and NOT zero-based, that is, 0)
     h
   end
@@ -109,28 +109,85 @@ DAY_MAP   = build_map( DAY_LINES, downcase: true )
 ## todo - add more date variants !!!!
 # e.g. Fri Aug/9  or Fri Aug 9
-DATE_RE = %r{
+DATE_I_RE = %r{
 (?<date>
   \b
      ## optional day name
      ((?<day_name>#{DAY_NAMES})
           [ ]
-     )?
+     )?
      (?<month_name>#{MONTH_NAMES})
          (?: \/|[ ] )
      (?<day>\d{1,2})
      ## optional year
      (  [ ]
         (?<year>\d{4})
-     )?
-  \b
+     )?
+  \b
 )}ix
+# e.g. 3 June  or 10 June
+DATE_II_RE = %r{
+(?<date>
+  \b
+     ## optional day name
+     ((?<day_name>#{DAY_NAMES})
+          [ ]
+     )?
+     (?<day>\d{1,2})
+         [ ]
+     (?<month_name>#{MONTH_NAMES})
+     ## optional year
+     (  [ ]
+        (?<year>\d{4})
+     )?
+  \b
+)}ix
+#############################################
+# map tables
+#  note: order matters; first come-first matched/served
+DATE_RE = Regexp.union(
+   DATE_I_RE,
+   DATE_II_RE
+)
+##
+##  add a date parser helper
+def self.parse_date( str, start: )
+    if m=DATE_RE.match( str )
+      year    = m[:year].to_i(10)  if m[:year]
+      month   = MONTH_MAP[ m[:month_name].downcase ]   if m[:month_name]
+      day     = m[:day].to_i(10)   if m[:day]
+      wday    = DAY_MAP[ m[:day_name].downcase ]   if m[:day_name]
+      if year.nil?   ## try to calculate year
+        year =  if  month > start.month ||
+                   (month == start.month && day >= start.day)
+                  # assume same year as start_at event (e.g. 2013 for 2013/14 season)
+                  start.year
+                else
+                  # assume year+1 as start_at event (e.g. 2014 for 2013/14 season)
+                  start.year+1
+                end
+      end
+      Date.new( year,month,day )
+    else
+      puts "!! ERROR - unexpected date format; cannot parse >#{str}<"
+      exit 1
+    end
+end
 ###
-#  date duration
+#  date duration
 #   use - or + as separator
-#    in theory plus( +) only if dates
+#    in theory plus( +) only if dates
 #     are two days next to each other
 #
 #   otherwise  define new dates type in the future? why? why not?
@@ -147,7 +204,7 @@ DATE_RE = %r{
 #  Jun/25 .. 26        - why? why not???
 #  Jun/25 to 26        - why? why not???
 #  Jun/25 + 26        - add - why? why not???
-#  Sun-Wed Jun/23-26  -  add - why? why not???
+#  Sun-Wed Jun/23-26  -  add - why? why not???
 #  Wed+Thu Jun/26+27 2024  -  add - why? why not???
 #
 #  maybe use comman and plus for list of dates
@@ -157,39 +214,89 @@ DATE_RE = %r{
 #   add back optional comma (before) year - why? why not?
-DURATION_RE =  %r{
+##
+#   todo add plus later on - why? why not?
+DURATION_I_RE =  %r{
 (?<duration>
     \b
    ## optional day name
    ((?<day_name1>#{DAY_NAMES})
       [ ]
-   )?
+   )?
    (?<month_name1>#{MONTH_NAMES})
       (?: \/|[ ] )
    (?<day1>\d{1,2})
    ## optional year
    ( [ ]
       (?<year1>\d{4})
-   )?
+   )?
    ## support + and -  (add .. or such - why??)
-   [ ]*[+-][ ]*
+   [ ]*[-][ ]*
    ## optional day name
    ((?<day_name2>#{DAY_NAMES})
       [ ]
-   )?
+   )?
    (?<month_name2>#{MONTH_NAMES})
       (?: \/|[ ] )
    (?<day2>\d{1,2})
    ## optional year
    ( [ ]
       (?<year2>\d{4})
-   )?
-   \b
+   )?
+   \b
+)}ix
+###
+#   variant ii
+# e.g. 26 July - 27 July
+DURATION_II_RE =  %r{
+(?<duration>
+    \b
+   ## optional day name
+   ((?<day_name1>#{DAY_NAMES})
+      [ ]
+   )?
+   (?<day1>\d{1,2})
+      [ ]
+   (?<month_name1>#{MONTH_NAMES})
+   ## optional year
+   ( [ ]
+      (?<year1>\d{4})
+   )?
+   ## support + and -  (add .. or such - why??)
+   [ ]*[-][ ]*
+   ## optional day name
+   ((?<day_name2>#{DAY_NAMES})
+      [ ]
+   )?
+   (?<day2>\d{1,2})
+      [ ]
+   (?<month_name2>#{MONTH_NAMES})
+   ## optional year
+   ( [ ]
+      (?<year2>\d{4})
+   )?
+   \b
 )}ix
+#############################################
+# map tables
+#  note: order matters; first come-first matched/served
+DURATION_RE = Regexp.union(
+   DURATION_I_RE,
+   DURATION_II_RE
+)
 end  #   class Parser
-end  # module SportDb
+end  # module SportDb

data/lib/sportdb/parser/version.rb CHANGED Viewed

@@ -4,7 +4,7 @@ module SportDb
     module Parser
   MAJOR = 0    ## todo: namespace inside version or something - why? why not??
   MINOR = 2
-  PATCH = 0
+  PATCH = 2
   VERSION = [MAJOR,MINOR,PATCH].join('.')
   def self.version

data/lib/sportdb/parser.rb CHANGED Viewed

@@ -24,11 +24,6 @@ require_relative 'parser/lang'
 require_relative 'parser/parser'
-## more
-require_relative 'parser/outline_reader'
-require_relative 'parser/linter'
-require_relative 'parser/opts'
 ###
 #  make parser api (easily) available - why? why not?

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: sportdb-parser
 version: !ruby/object:Gem::Version
-  version: 0.2.0
+  version: 0.2.2
 platform: ruby
 authors:
 - Gerald Bauer
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2024-08-22 00:00:00.000000000 Z
+date: 2024-08-27 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: cocos
@@ -74,8 +74,7 @@ dependencies:
         version: '4.1'
 description: sportdb-parser - football.txt match parser (& tokenizer)
 email: gerald.bauer@gmail.com
-executables:
-- fbt
+executables: []
 extensions: []
 extra_rdoc_files:
 - CHANGELOG.md
@@ -86,12 +85,8 @@ files:
 - Manifest.txt
 - README.md
 - Rakefile
-- bin/fbt
 - lib/sportdb/parser.rb
 - lib/sportdb/parser/lang.rb
-- lib/sportdb/parser/linter.rb
-- lib/sportdb/parser/opts.rb
-- lib/sportdb/parser/outline_reader.rb
 - lib/sportdb/parser/parser.rb
 - lib/sportdb/parser/token-date.rb
 - lib/sportdb/parser/token-score.rb
@@ -112,7 +107,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: 2.2.2
+      version: 3.1.0
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="

data/bin/fbt DELETED Viewed

@@ -1,94 +0,0 @@
-#!/usr/bin/env ruby
-## tip: to test run:
-##   ruby -I ./lib bin/fbt
-## our own code
-require 'sportdb/parser'
-require 'optparse'
-##
-## read textfile
-##   and dump tokens
-##
-##   fbt  ../openfootball/.../euro.txt
- args = ARGV
- opts = { debug: false,
-          metal: false }
- parser = OptionParser.new do |parser|
-  parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
-##
-## check if git has a offline option?? (use same)
-##             check for other tools - why? why not?
-  parser.on( "--verbose", "--debug",
-               "turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
-    opts[:debug] = debug
-  end
-  parser.on( "--metal",
-                 "turn off typed parse tree; show to the metal tokens"+
-                   " (default: #{opts[:metal]})" ) do |metal|
-    opts[:metal] = metal
-  end
-end
-parser.parse!( args )
-puts "OPTS:"
-p opts
-puts "ARGV:"
-p args
-paths =  if args.empty?
-            [
-              '../../../openfootball/euro/2020--europe/euro.txt',
-              '../../../openfootball/euro/2024--germany/euro.txt',
-            ]
-         else
-            ## check for directories
-            ##   and auto-expand
-            SportDb::Parser::Opts.expand_args( args )
-         end
-SportDb::Parser::Linter.debug = true    if opts[:debug]
-linter = SportDb::Parser::Linter.new
-errors = []
-paths.each_with_index do |path,i|
-    puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
-    linter.read( path, parse: !opts[:metal] )
-    errors += linter.errors    if linter.errors?
-end
-if errors.size > 0
-    puts
-    pp errors
-    puts
-    puts "!!   #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
-else
-    puts
-    puts "OK   no parse errors found in #{paths.size} datafile(s)"
-end
-puts "bye"

data/lib/sportdb/parser/linter.rb DELETED Viewed

@@ -1,149 +0,0 @@
-module SportDb
-class Parser
-###
-## note - Linter for now nested inside Parser - keep? why? why not?
-class Linter
-def self.debug=(value) @@debug = value; end
-def self.debug?() @@debug ||= false; end  ## note: default is FALSE
-def debug?()  self.class.debug?; end
-attr_reader :errors
-def initialize
-  @errors = []
-  @parser = Parser.new   ## use own parser instance (not shared) - why? why not?
-end
-def errors?() @errors.size > 0; end
-  ## note:  colon (:) MUST be followed by one (or more) spaces
-  ##      make sure mon feb 12 18:10 will not match
-  ##        allow 1. FC Köln etc.
-  ##               Mainz 05:
-  ##           limit to 30 chars max
-  ##          only allow  chars incl. intl buut (NOT ()[]/;)
-  ##
-  ##   Group A:
-  ##   Group B:   - remove colon
-  ##    or lookup first
-  ATTRIB_RE = %r{^
-                   [ ]*?     # slurp leading spaces
-                (?<key>[^:|\]\[()\/; -]
-                       [^:|\]\[()\/;]{0,30}
-                 )
-                   [ ]*?     # slurp trailing spaces
-                   :[ ]+
-                (?<value>.+)
-                    [ ]*?   # slurp trailing spaces
-                   $
-                }ix
-#########
-## parse - false (default) - tokenize (only)
-##       - true            - tokenize & parse
-def read( path, parse: false )
-  ## note: every (new) read call - resets errors list to empty
-  @errors = []
-  nodes = OutlineReader.read( path )
-  ##  process nodes
-  h1 = nil
-  orphans = 0    ## track paragraphs's with no heading
-  attrib_found = false
-  nodes.each do |node|
-    type = node[0]
-    if type == :h1
-        h1 = node[1]  ## get heading text
-        puts
-        puts "  = Heading 1 >#{node[1]}<"
-    elsif type == :p
-       if h1.nil?
-         orphans += 1    ## only warn once
-         puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
-         next
-       end
-       lines = node[1]
-       tree = []
-       lines.each_with_index do |line,i|
-        if debug?
-         puts
-         puts "line >#{line}<"
-        end
-        ## skip new (experimental attrib syntax)
-        if attrib_found == false &&
-            ATTRIB_RE.match?( line )
-          ## note: check attrib regex AFTER group def e.g.:
-          ##         Group A:
-          ##         Group B:  etc.
-          ##     todo/fix - change Group A: to Group A etc.
-          ##                       Group B: to Group B
-           attrib_found = true
-           ## logger.debug "skipping key/value line - >#{line}<"
-           next
-        end
-        if attrib_found
-          ## check if line ends with dot
-          ##  if not slurp up lines to the next do!!!
-          ## logger.debug "skipping key/value line - >#{line}<"
-          attrib_found = false   if line.end_with?( '.' )
-              # logger.debug "skipping key/value line (cont.) - >#{line}<"
-              next
-        end
-        t, error_messages  =  if parse
-                                  @parser.parse_with_errors( line )
-                              else
-                                  @parser.tokenize_with_errors( line )
-                              end
-         if error_messages.size > 0
-            ## add to "global" error list
-            ##   make a triplet tuple (file / msg / line text)
-            error_messages.each do |msg|
-                @errors << [ path,
-                             msg,
-                             line
-                           ]
-            end
-         end
-         pp t   if debug?
-         tree << t
-       end
-       ## pp tree
-    else
-        pp node
-        raise ArgumentError, "unsupported (node) type >#{type}<"
-    end
-  end  # each node
-end  # read
-end  # class Linter
-end   # class Parser
-end   # module SportDb

data/lib/sportdb/parser/opts.rb DELETED Viewed

@@ -1,70 +0,0 @@
-module SportDb
-class Parser
-###
-## note - Opts Helpers for now nested inside Parser - keep here? why? why not?
-class Opts
-    SEASON_RE = %r{ (?:
-                       \d{4}-\d{2}
-                     | \d{4}(--[a-z0-9_-]+)?
-                    )
-                  }x
-    SEASON = SEASON_RE.source    ## "inline" helper for embedding in other regexes - keep? why? why not?
-    ## note: if pattern includes directory add here
-    ##     (otherwise move to more "generic" datafile) - why? why not?
-    MATCH_RE = %r{ (?: ^|/ )      # beginning (^) or beginning of path (/)
-                       #{SEASON}
-                     /[a-z0-9_-]+\.txt$  ## txt e.g /1-premierleague.txt
-                }x
-def self.find( path )
-    datafiles = []
-    ## note: normalize path - use File.expand_path ??
-    ##    change all backslash to slash for now
-    ## path = path.gsub( "\\", '/' )
-    path = File.expand_path( path )
-    ## check all txt files
-    ## note: incl. files starting with dot (.)) as candidates
-    ##     (normally excluded with just *)
-    candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
-    ## pp candidates
-    candidates.each do |candidate|
-      datafiles << candidate    if MATCH_RE.match( candidate )
-    end
-    ## pp datafiles
-    datafiles
-end
-def self.expand_args( args )
-    paths = []
-    args.each do |arg|
-        ## check if directory
-        if Dir.exist?( arg )
-            datafiles = find( arg )
-            puts
-            puts "  found #{datafiles.size} match txt datafiles in #{arg}"
-            pp datafiles
-            paths += datafiles
-        else
-              ## assume it's a file
-            paths << arg
-        end
-    end
-    paths
-end
-end  # class Opts
-end   # class Parser
-end   # module SportDb

data/lib/sportdb/parser/outline_reader.rb DELETED Viewed

@@ -1,97 +0,0 @@
-module SportDb
-class OutlineReader
-  def self.debug=(value) @@debug = value; end
-  def self.debug?() @@debug ||= false; end
-  def debug?()  self.class.debug?; end
-  def self.read( path )   ## use - rename to read_file or from_file etc. - why? why not?
-    txt = File.open( path, 'r:utf-8' ) {|f| f.read }
-    parse( txt )
-  end
-  def self.parse( txt )
-    new( txt ).parse
-  end
-  def initialize( txt )
-    @txt = txt
-  end
-  ## note: skip "decorative" only heading e.g. ========
-  ##  todo/check:  find a better name e.g. HEADING_EMPTY_RE or HEADING_LINE_RE or ???
-  HEADING_BLANK_RE = %r{\A
-                        ={1,}
-                        \z}x
-  ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
-  HEADING_RE = %r{\A
-                  (?<marker>={1,})       ## 1. leading ======
-                    [ ]*
-                  (?<text>[^=]+)         ## 2. text   (note: for now no "inline" = allowed)
-                    [ ]*
-                    =*                   ## 3. (optional) trailing ====
-                  \z}x
-  def parse
-    outline=[]   ## outline structure
-    start_para = true      ## start new para(graph) on new text line?
-    @txt.each_line do |line|
-        line = line.strip      ## todo/fix: keep leading and trailing spaces - why? why not?
-        if line.empty?    ## todo/fix: keep blank line nodes?? and just remove comments and process headings?! - why? why not?
-          start_para = true
-          next
-        end
-        break if line == '__END__'
-        next if line.start_with?( '#' )   ## skip comments too
-        ## strip inline (until end-of-line) comments too
-        ##  e.g Eupen | KAS Eupen ## [de]
-        ##   => Eupen | KAS Eupen
-        ##  e.g bq   Bonaire,  BOE        # CONCACAF
-        ##   => bq   Bonaire,  BOE
-        line = line.sub( /#.*/, '' ).strip
-        pp line    if debug?
-        ## todo/check: also use heading blank as paragraph "breaker" or treat it like a comment ?? - why? why not?
-        next if HEADING_BLANK_RE.match( line )  # skip "decorative" only heading e.g. ========
-         ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
-        if m=HEADING_RE.match( line )
-           start_para = true
-           heading_marker = m[:marker]
-           heading_level  = heading_marker.length   ## count number of = for heading level
-           heading        = m[:text].strip
-           puts "heading #{heading_level} >#{heading}<"   if debug?
-           outline << [:"h#{heading_level}", heading]
-        else    ## assume it's a (plain/regular) text line
-           if start_para
-             outline << [:p, [line]]
-             start_para = false
-           else
-             node = outline[-1]    ## get last entry
-             if node[0] == :p      ##  assert it's a p(aragraph) node!!!
-                node[1] << line    ## add line to p(aragraph)
-             else
-               puts "!! ERROR - invalid outline state / format - expected p(aragraph) node; got:"
-               pp node
-               exit 1
-             end
-           end
-        end
-    end
-    outline
-  end # method read
-end # class OutlineReader
-end # module SportDb