RubyGems - sportdb-parser - Versions diffs - 0.2.1 → 0.2.2 - Mend

sportdb-parser 0.2.1 → 0.2.2

Files changed (11) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +1 -1
data/Manifest.txt +0 -4
data/lib/sportdb/parser/token-date.rb +29 -0
data/lib/sportdb/parser/version.rb +1 -1
data/lib/sportdb/parser.rb +0 -5
metadata +3 -8
data/bin/fbt +0 -94
data/lib/sportdb/parser/linter.rb +0 -149
data/lib/sportdb/parser/opts.rb +0 -70
data/lib/sportdb/parser/outline_reader.rb +0 -97

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 3657cedc5125ee2515efa8be4a1838d05b7290523dd893f7eba5b87024e71238
-  data.tar.gz: caf6d7e909e17fa0dcabf659ab8f5046ca1940d8f7c1c6f5312e485dc0089384
+  metadata.gz: 0c9225b21f400b9f9cced2052c3062f41a091ed81d3d4239164c9652f53ebc6e
+  data.tar.gz: f7250eaa21324962df27e7cdd397857afa570c610f00c80c31e5105e40964002
 SHA512:
-  metadata.gz: 4063565aada304a1eb96009b6fe542392f41a55d4ad4d21b5de156004bd69a055c5f86b076bed1defbe50423c8c891dd538931ea6ca9b8ec41e237c23e699219
-  data.tar.gz: 91f6476810cb6617dfcd703ada57592cd38b87f3b4b9fc6fd4468a9457ff0e6ae6337a4e4f5c782e1b80f5f6b6015d5ce26ed6330915cd67a5fb6606f665017f
+  metadata.gz: 471c938c233d8f81d7a0fd5e4470a27a52486906764816b6c35ea3d88e19650c81302fd5ff9ee30b85d3a8e9f81ada8eef20b49bd3de924c7238acb106ba6082
+  data.tar.gz: 24d1cf3846404859ad7e751895325b256321d43e2881413fda6325c744ca0c31b52ef2032a9dfc8e56e67d7a06df54a6d2780a297982440b8e40b7055fe06c26

data/CHANGELOG.md CHANGED Viewed

@@ -1,4 +1,4 @@
-### 0.2.1
+### 0.2.2
 ### 0.0.1 / 2024-07-12

data/Manifest.txt CHANGED Viewed

@@ -2,12 +2,8 @@ CHANGELOG.md
 Manifest.txt
 README.md
 Rakefile
-bin/fbt
 lib/sportdb/parser.rb
 lib/sportdb/parser/lang.rb
-lib/sportdb/parser/linter.rb
-lib/sportdb/parser/opts.rb
-lib/sportdb/parser/outline_reader.rb
 lib/sportdb/parser/parser.rb
 lib/sportdb/parser/token-date.rb
 lib/sportdb/parser/token-score.rb

data/lib/sportdb/parser/token-date.rb CHANGED Viewed

@@ -155,6 +155,35 @@ DATE_RE = Regexp.union(
 )
+##
+##  add a date parser helper
+def self.parse_date( str, start: )
+    if m=DATE_RE.match( str )
+      year    = m[:year].to_i(10)  if m[:year]
+      month   = MONTH_MAP[ m[:month_name].downcase ]   if m[:month_name]
+      day     = m[:day].to_i(10)   if m[:day]
+      wday    = DAY_MAP[ m[:day_name].downcase ]   if m[:day_name]
+      if year.nil?   ## try to calculate year
+        year =  if  month > start.month ||
+                   (month == start.month && day >= start.day)
+                  # assume same year as start_at event (e.g. 2013 for 2013/14 season)
+                  start.year
+                else
+                  # assume year+1 as start_at event (e.g. 2014 for 2013/14 season)
+                  start.year+1
+                end
+      end
+      Date.new( year,month,day )
+    else
+      puts "!! ERROR - unexpected date format; cannot parse >#{str}<"
+      exit 1
+    end
+end
 ###
 #  date duration
 #   use - or + as separator

data/lib/sportdb/parser/version.rb CHANGED Viewed

@@ -4,7 +4,7 @@ module SportDb
     module Parser
   MAJOR = 0    ## todo: namespace inside version or something - why? why not??
   MINOR = 2
-  PATCH = 1
+  PATCH = 2
   VERSION = [MAJOR,MINOR,PATCH].join('.')
   def self.version

data/lib/sportdb/parser.rb CHANGED Viewed

@@ -24,11 +24,6 @@ require_relative 'parser/lang'
 require_relative 'parser/parser'
-## more
-require_relative 'parser/outline_reader'
-require_relative 'parser/linter'
-require_relative 'parser/opts'
 ###
 #  make parser api (easily) available - why? why not?

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: sportdb-parser
 version: !ruby/object:Gem::Version
-  version: 0.2.1
+  version: 0.2.2
 platform: ruby
 authors:
 - Gerald Bauer
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2024-08-24 00:00:00.000000000 Z
+date: 2024-08-27 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: cocos
@@ -74,8 +74,7 @@ dependencies:
         version: '4.1'
 description: sportdb-parser - football.txt match parser (& tokenizer)
 email: gerald.bauer@gmail.com
-executables:
-- fbt
+executables: []
 extensions: []
 extra_rdoc_files:
 - CHANGELOG.md
@@ -86,12 +85,8 @@ files:
 - Manifest.txt
 - README.md
 - Rakefile
-- bin/fbt
 - lib/sportdb/parser.rb
 - lib/sportdb/parser/lang.rb
-- lib/sportdb/parser/linter.rb
-- lib/sportdb/parser/opts.rb
-- lib/sportdb/parser/outline_reader.rb
 - lib/sportdb/parser/parser.rb
 - lib/sportdb/parser/token-date.rb
 - lib/sportdb/parser/token-score.rb

data/bin/fbt DELETED Viewed

@@ -1,94 +0,0 @@
-#!/usr/bin/env ruby
-## tip: to test run:
-##   ruby -I ./lib bin/fbt
-## our own code
-require 'sportdb/parser'
-require 'optparse'
-##
-## read textfile
-##   and dump tokens
-##
-##   fbt  ../openfootball/.../euro.txt
- args = ARGV
- opts = { debug: false,
-          metal: false }
- parser = OptionParser.new do |parser|
-  parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
-##
-## check if git has a offline option?? (use same)
-##             check for other tools - why? why not?
-  parser.on( "--verbose", "--debug",
-               "turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
-    opts[:debug] = debug
-  end
-  parser.on( "--metal",
-                 "turn off typed parse tree; show to the metal tokens"+
-                   " (default: #{opts[:metal]})" ) do |metal|
-    opts[:metal] = metal
-  end
-end
-parser.parse!( args )
-puts "OPTS:"
-p opts
-puts "ARGV:"
-p args
-paths =  if args.empty?
-            [
-              '../../../openfootball/euro/2021--europe/euro.txt',
-              '../../../openfootball/euro/2024--germany/euro.txt',
-            ]
-         else
-            ## check for directories
-            ##   and auto-expand
-            SportDb::Parser::Opts.expand_args( args )
-         end
-SportDb::Parser::Linter.debug = true    if opts[:debug]
-linter = SportDb::Parser::Linter.new
-errors = []
-paths.each_with_index do |path,i|
-    puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
-    linter.read( path, parse: !opts[:metal] )
-    errors += linter.errors    if linter.errors?
-end
-if errors.size > 0
-    puts
-    pp errors
-    puts
-    puts "!!   #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
-else
-    puts
-    puts "OK   no parse errors found in #{paths.size} datafile(s)"
-end
-puts "bye"

data/lib/sportdb/parser/linter.rb DELETED Viewed

@@ -1,149 +0,0 @@
-module SportDb
-class Parser
-###
-## note - Linter for now nested inside Parser - keep? why? why not?
-class Linter
-def self.debug=(value) @@debug = value; end
-def self.debug?() @@debug ||= false; end  ## note: default is FALSE
-def debug?()  self.class.debug?; end
-attr_reader :errors
-def initialize
-  @errors = []
-  @parser = Parser.new   ## use own parser instance (not shared) - why? why not?
-end
-def errors?() @errors.size > 0; end
-  ## note:  colon (:) MUST be followed by one (or more) spaces
-  ##      make sure mon feb 12 18:10 will not match
-  ##        allow 1. FC Köln etc.
-  ##               Mainz 05:
-  ##           limit to 30 chars max
-  ##          only allow  chars incl. intl buut (NOT ()[]/;)
-  ##
-  ##   Group A:
-  ##   Group B:   - remove colon
-  ##    or lookup first
-  ATTRIB_RE = %r{^
-                   [ ]*?     # slurp leading spaces
-                (?<key>[^:|\]\[()\/; -]
-                       [^:|\]\[()\/;]{0,30}
-                 )
-                   [ ]*?     # slurp trailing spaces
-                   :[ ]+
-                (?<value>.+)
-                    [ ]*?   # slurp trailing spaces
-                   $
-                }ix
-#########
-## parse - false (default) - tokenize (only)
-##       - true            - tokenize & parse
-def read( path, parse: false )
-  ## note: every (new) read call - resets errors list to empty
-  @errors = []
-  nodes = OutlineReader.read( path )
-  ##  process nodes
-  h1 = nil
-  orphans = 0    ## track paragraphs's with no heading
-  attrib_found = false
-  nodes.each do |node|
-    type = node[0]
-    if type == :h1
-        h1 = node[1]  ## get heading text
-        puts
-        puts "  = Heading 1 >#{node[1]}<"
-    elsif type == :p
-       if h1.nil?
-         orphans += 1    ## only warn once
-         puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
-         next
-       end
-       lines = node[1]
-       tree = []
-       lines.each_with_index do |line,i|
-        if debug?
-         puts
-         puts "line >#{line}<"
-        end
-        ## skip new (experimental attrib syntax)
-        if attrib_found == false &&
-            ATTRIB_RE.match?( line )
-          ## note: check attrib regex AFTER group def e.g.:
-          ##         Group A:
-          ##         Group B:  etc.
-          ##     todo/fix - change Group A: to Group A etc.
-          ##                       Group B: to Group B
-           attrib_found = true
-           ## logger.debug "skipping key/value line - >#{line}<"
-           next
-        end
-        if attrib_found
-          ## check if line ends with dot
-          ##  if not slurp up lines to the next do!!!
-          ## logger.debug "skipping key/value line - >#{line}<"
-          attrib_found = false   if line.end_with?( '.' )
-              # logger.debug "skipping key/value line (cont.) - >#{line}<"
-              next
-        end
-        t, error_messages  =  if parse
-                                  @parser.parse_with_errors( line )
-                              else
-                                  @parser.tokenize_with_errors( line )
-                              end
-         if error_messages.size > 0
-            ## add to "global" error list
-            ##   make a triplet tuple (file / msg / line text)
-            error_messages.each do |msg|
-                @errors << [ path,
-                             msg,
-                             line
-                           ]
-            end
-         end
-         pp t   if debug?
-         tree << t
-       end
-       ## pp tree
-    else
-        pp node
-        raise ArgumentError, "unsupported (node) type >#{type}<"
-    end
-  end  # each node
-end  # read
-end  # class Linter
-end   # class Parser
-end   # module SportDb

data/lib/sportdb/parser/opts.rb DELETED Viewed

@@ -1,70 +0,0 @@
-module SportDb
-class Parser
-###
-## note - Opts Helpers for now nested inside Parser - keep here? why? why not?
-class Opts
-    SEASON_RE = %r{ (?:
-                       \d{4}-\d{2}
-                     | \d{4}(--[a-z0-9_-]+)?
-                    )
-                  }x
-    SEASON = SEASON_RE.source    ## "inline" helper for embedding in other regexes - keep? why? why not?
-    ## note: if pattern includes directory add here
-    ##     (otherwise move to more "generic" datafile) - why? why not?
-    MATCH_RE = %r{ (?: ^|/ )      # beginning (^) or beginning of path (/)
-                       #{SEASON}
-                     /[a-z0-9_-]+\.txt$  ## txt e.g /1-premierleague.txt
-                }x
-def self.find( path )
-    datafiles = []
-    ## note: normalize path - use File.expand_path ??
-    ##    change all backslash to slash for now
-    ## path = path.gsub( "\\", '/' )
-    path = File.expand_path( path )
-    ## check all txt files
-    ## note: incl. files starting with dot (.)) as candidates
-    ##     (normally excluded with just *)
-    candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
-    ## pp candidates
-    candidates.each do |candidate|
-      datafiles << candidate    if MATCH_RE.match( candidate )
-    end
-    ## pp datafiles
-    datafiles
-end
-def self.expand_args( args )
-    paths = []
-    args.each do |arg|
-        ## check if directory
-        if Dir.exist?( arg )
-            datafiles = find( arg )
-            puts
-            puts "  found #{datafiles.size} match txt datafiles in #{arg}"
-            pp datafiles
-            paths += datafiles
-        else
-              ## assume it's a file
-            paths << arg
-        end
-    end
-    paths
-end
-end  # class Opts
-end   # class Parser
-end   # module SportDb

data/lib/sportdb/parser/outline_reader.rb DELETED Viewed

@@ -1,97 +0,0 @@
-module SportDb
-class OutlineReader
-  def self.debug=(value) @@debug = value; end
-  def self.debug?() @@debug ||= false; end
-  def debug?()  self.class.debug?; end
-  def self.read( path )   ## use - rename to read_file or from_file etc. - why? why not?
-    txt = File.open( path, 'r:utf-8' ) {|f| f.read }
-    parse( txt )
-  end
-  def self.parse( txt )
-    new( txt ).parse
-  end
-  def initialize( txt )
-    @txt = txt
-  end
-  ## note: skip "decorative" only heading e.g. ========
-  ##  todo/check:  find a better name e.g. HEADING_EMPTY_RE or HEADING_LINE_RE or ???
-  HEADING_BLANK_RE = %r{\A
-                        ={1,}
-                        \z}x
-  ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
-  HEADING_RE = %r{\A
-                  (?<marker>={1,})       ## 1. leading ======
-                    [ ]*
-                  (?<text>[^=]+)         ## 2. text   (note: for now no "inline" = allowed)
-                    [ ]*
-                    =*                   ## 3. (optional) trailing ====
-                  \z}x
-  def parse
-    outline=[]   ## outline structure
-    start_para = true      ## start new para(graph) on new text line?
-    @txt.each_line do |line|
-        line = line.strip      ## todo/fix: keep leading and trailing spaces - why? why not?
-        if line.empty?    ## todo/fix: keep blank line nodes?? and just remove comments and process headings?! - why? why not?
-          start_para = true
-          next
-        end
-        break if line == '__END__'
-        next if line.start_with?( '#' )   ## skip comments too
-        ## strip inline (until end-of-line) comments too
-        ##  e.g Eupen | KAS Eupen ## [de]
-        ##   => Eupen | KAS Eupen
-        ##  e.g bq   Bonaire,  BOE        # CONCACAF
-        ##   => bq   Bonaire,  BOE
-        line = line.sub( /#.*/, '' ).strip
-        pp line    if debug?
-        ## todo/check: also use heading blank as paragraph "breaker" or treat it like a comment ?? - why? why not?
-        next if HEADING_BLANK_RE.match( line )  # skip "decorative" only heading e.g. ========
-         ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
-        if m=HEADING_RE.match( line )
-           start_para = true
-           heading_marker = m[:marker]
-           heading_level  = heading_marker.length   ## count number of = for heading level
-           heading        = m[:text].strip
-           puts "heading #{heading_level} >#{heading}<"   if debug?
-           outline << [:"h#{heading_level}", heading]
-        else    ## assume it's a (plain/regular) text line
-           if start_para
-             outline << [:p, [line]]
-             start_para = false
-           else
-             node = outline[-1]    ## get last entry
-             if node[0] == :p      ##  assert it's a p(aragraph) node!!!
-                node[1] << line    ## add line to p(aragraph)
-             else
-               puts "!! ERROR - invalid outline state / format - expected p(aragraph) node; got:"
-               pp node
-               exit 1
-             end
-           end
-        end
-    end
-    outline
-  end # method read
-end # class OutlineReader
-end # module SportDb