RubyGems - sportdb-formats - Versions diffs - 0.4.0 → 1.0.0 - Mend

sportdb-formats 0.4.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

checksums.yaml +4 -4
data/Manifest.txt +24 -4
data/Rakefile +3 -3
data/lib/sportdb/formats.rb +25 -2
data/lib/sportdb/formats/config.rb +40 -0
data/lib/sportdb/formats/datafile.rb +42 -62
data/lib/sportdb/formats/datafile_package.rb +160 -0
data/lib/sportdb/formats/match/conf_parser.rb +120 -0
data/lib/sportdb/formats/match/mapper.rb +319 -0
data/lib/sportdb/formats/match/mapper_teams.rb +23 -0
data/lib/sportdb/formats/match/match_parser.rb +659 -0
data/lib/sportdb/formats/match/match_parser_auto_conf.rb +202 -0
data/lib/sportdb/formats/name_helper.rb +84 -0
data/lib/sportdb/formats/outline_reader.rb +53 -15
data/lib/sportdb/formats/package.rb +172 -160
data/lib/sportdb/formats/parser_helper.rb +81 -0
data/lib/sportdb/formats/score/score_formats.rb +180 -0
data/lib/sportdb/formats/score/score_parser.rb +196 -0
data/lib/sportdb/formats/structs/country.rb +1 -43
data/lib/sportdb/formats/structs/group.rb +25 -0
data/lib/sportdb/formats/structs/league.rb +7 -26
data/lib/sportdb/formats/structs/match.rb +72 -51
data/lib/sportdb/formats/structs/round.rb +14 -4
data/lib/sportdb/formats/structs/season.rb +3 -0
data/lib/sportdb/formats/structs/team.rb +144 -0
data/lib/sportdb/formats/version.rb +2 -2
data/test/helper.rb +83 -1
data/test/test_clubs.rb +3 -3
data/test/test_conf.rb +65 -0
data/test/test_datafile.rb +21 -30
data/test/test_match.rb +0 -6
data/test/test_match_auto.rb +72 -0
data/test/test_match_auto_champs.rb +45 -0
data/test/test_match_auto_euro.rb +37 -0
data/test/test_match_auto_worldcup.rb +61 -0
data/test/test_match_champs.rb +27 -0
data/test/test_match_eng.rb +26 -0
data/test/test_match_euro.rb +27 -0
data/test/test_match_worldcup.rb +27 -0
data/test/test_name_helper.rb +67 -0
data/test/test_outline_reader.rb +3 -3
data/test/test_package.rb +21 -2
data/test/test_package_match.rb +78 -0
data/test/test_scores.rb +67 -51
metadata +32 -12
data/lib/sportdb/formats/scores.rb +0 -253
data/lib/sportdb/formats/structs/club.rb +0 -213
data/test/test_club_helpers.rb +0 -63
data/test/test_datafile_match.rb +0 -65

data/lib/sportdb/formats/match/match_parser_auto_conf.rb ADDED Viewed

@@ -0,0 +1,202 @@
+# encoding: utf-8
+module SportDb
+class AutoConfParser     ## todo/check: rename/change to MatchAutoConfParser - why? why not?
+  def self.parse( lines, start: )
+    ##  todo/fix: add support for txt and lines
+    ##    check if lines_or_txt is an array or just a string
+    parser = new( lines, start )
+    parser.parse
+  end
+  include Logging         ## e.g. logger#debug, logger#info, etc.
+  include ParserHelper    ## e.g. read_lines, etc.
+  def initialize( lines, start )
+    # for convenience split string into lines
+    ##    note: removes/strips empty lines
+    ## todo/check: change to text instead of array of lines - why? why not?
+    @lines        = lines.is_a?( String ) ? read_lines( lines ) : lines
+    @start        = start
+  end
+  def parse
+    ## try to  find all teams in match schedule
+    @last_round   = nil
+    @last_group   = nil
+    ## definitions/defs
+    @round_defs = Hash.new(0)
+    @group_defs = Hash.new(0)
+    ## usage/refs
+    @rounds       = {}           ## track usage counter and match (two teams) counter
+    @groups       = {}           ##  -"-
+    @teams        = Hash.new(0)   ## keep track of usage counter
+    @warns        = []    ## track list of warnings (unmatched lines)  too - why? why not?
+    @lines.each do |line|
+      if is_goals?( line )
+        logger.debug "skipping matched goals line: >#{line}<"
+      elsif is_round_def?( line )
+        ## todo/fix:  add round definition (w begin n end date)
+        ## todo: do not patch rounds with definition (already assume begin/end date is good)
+        ##  -- how to deal with matches that get rescheduled/postponed?
+        logger.debug "skipping matched round def line: >#{line}<"
+        @round_defs[ line ] += 1
+      elsif is_round?( line )
+        logger.debug "skipping matched round line: >#{line}<"
+        round = @rounds[ line ] ||= {count: 0, match_count: 0}   ## usage counter, match counter
+        round[:count] +=1
+        @last_round = round
+      elsif is_group_def?( line ) ## NB: group goes after round (round may contain group marker too)
+        ### todo: add pipe (|) marker (required)
+        logger.debug "skipping matched group def line: >#{line}<"
+        @group_defs[ line ] += 1
+      elsif is_group?( line )
+        ##  -- lets you set group  e.g. Group A etc.
+        logger.debug "skipping matched group line: >#{line}<"
+        group = @groups[ line ] ||= {count: 0, match_count: 0}
+        group[:count] +=1
+        @last_group = group
+        ## todo/fix:  parse group line!!!
+      elsif try_parse_game( line )
+        # do nothing here
+      else
+        logger.warn "skipping line (no match found): >#{line}<"
+        @warns << line
+      end
+    end # lines.each
+    [@teams, @rounds, @groups, @round_defs, @group_defs, @warns]
+  end
+  def try_parse_game( line )
+    # note: clone line; for possible test do NOT modify in place for now
+    # note: returns true if parsed, false if no match
+    parse_game( line.dup )
+  end
+  def parse_game( line )
+    logger.debug "parsing game (fixture) line: >#{line}<"
+    ## remove all protected text runs e.g. []
+    ##   fix: add [ to end-of-line too
+    ##  todo/fix: move remove protected text runs AFTER find date!! - why? why not?
+    line = line.gsub( /\[
+                        [^\]]+?
+                       \]/x, '' ).strip
+    return true if line.empty?    ## note: return true (for valid line with no match/teams)
+    ## split by geo (@) - remove for now
+    values = line.split( '@' )
+    line = values[0]
+    ## try find date
+    date = find_date!( line, start: @start )
+    if date   ## if found remove tagged run too; note using singular sub (NOT global gsub)
+      line = line.sub( /\[
+                          [^\]]+?
+                         \]/x, '' ).strip
+    else
+      ##  check for leading hours only e.g.  20.30 or 20:30 or 20h30 or 20H30 or 09h00
+      ##   todo/fix: make language dependent (or move to find_date/hour etc.) - why? why not?
+      line = line.sub(  %r{^           ## MUST be anchored to beginning of line
+                            [012]?[0-9]
+                            [.:hH]
+                            [0-9][0-9]
+                           (?=[ ])    ## must be followed by space for now (add end of line too - why? why not?)
+                          }x, '' ).strip
+    end
+    return true if line.empty?    ## note: return true (for valid line with no match/teams)
+    score = find_score!( line )
+    logger.debug "  line: >#{line}<"
+    line = line.sub( /\[
+                        [^\]]+?
+                       \]/x, '$$' )  # note: replace first score tag with $$
+    line = line.gsub( /\[
+                    [^\]]+?
+                   \]/x, '' )    # note: replace/remove all other score tags with nothing
+     ##  clean-up  remove all text run inside () or empty () too
+     line = line.gsub( /\(
+                     [^)]*?
+                    \)/x, '' )
+     ## check for more match separators e.g. - or vs for now
+     line = line.sub( / \s+
+                          (   -
+                            | v
+                            | vs\.?    # note: allow optional dot eg. vs.
+                          )
+                        \s+
+                       /ix, '$$' )
+     values = line.split( '$$' )
+     values = values.map { |value| value.strip }        ## strip spaces
+     values = values.select { |value| !value.empty? }   ## remove empty strings
+     return true    if values.size == 0  ## note: return true (for valid line with no match/teams)
+     if values.size == 1
+       puts "(auto config) try matching teams separated by spaces (2+):"
+       pp values
+       values = values[0].split( /[ ]{2,}/ )
+       pp values
+     end
+     return false   if values.size != 2
+     puts "(auto config) try matching teams:"
+     pp values
+     @teams[ values[0] ] += 1    ## update usage counters
+     @teams[ values[1] ] += 1
+     @last_round[ :match_count ] += 1    if @last_round
+     @last_group[ :match_count ] += 1    if @last_group
+     true
+  end
+  def find_score!( line )
+    # note: always call after find_dates !!!
+    #  scores match date-like patterns!!  e.g. 10-11  or 10:00 etc.
+    #   -- note: score might have two digits too
+    ScoreFormats.find!( line )
+  end
+  def find_date!( line, start: )
+    ## NB: lets us pass in start_at/end_at date (for event)
+    #   for auto-complete year
+    # extract date from line
+    # and return it
+    # NB: side effect - removes date from line string
+    DateFormats.find!( line, start: start )
+  end
+end # class AutoConfParser
+end # module SportDb

data/lib/sportdb/formats/name_helper.rb ADDED Viewed

@@ -0,0 +1,84 @@
+module SportDb
+  module NameHelper
+  ## note: allow placeholder years to e.g. (-___) or (-????)
+  ##    for marking missing (to be filled in) years
+  ##  e.g. (1887-1911), (-2013),
+  ##      (1946-2001, 2013-) etc.
+  ##  todo/check: make more strict  e.g. only accept 4-digit years? - why? why not?
+  YEAR_RE =  %r{\(
+                  [0-9, ?_-]+?     # note: non-greedy (minimum/first) match
+              \)}x
+  def strip_year( name )
+    ## check for year(s) e.g. (1887-1911), (-2013),
+    ##                        (1946-2001, 2013-) etc.
+    ##  todo/check: only sub once (not global) - why? why not?
+    name.gsub( YEAR_RE, '' ).strip
+  end
+  def has_year?( name ) name =~ YEAR_RE; end
+  LANG_RE =  %r{\[
+                [a-z]{1,2}    # note also allow single-letter [a] or [d] or [e] - why? why not?
+                \]}x
+  def strip_lang( name )
+     name.gsub( LANG_RE, '' ).strip
+  end
+  def has_lang?( name ) name =~ LANG_RE; end
+  def sanitize( name )
+    ## check for year(s) e.g. (1887-1911), (-2013),
+    ##                        (1946-2001,2013-) etc.
+    name = strip_year( name )
+    ## check lang codes e.g. [en], [fr], etc.
+    name = strip_lang( name )
+    name
+  end
+  ## note: also add (),’,−  etc. e.g.
+  ##   Estudiantes (LP) => Estudiantes LP
+  ##   Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
+  ##   Myllykosken Pallo −47 => Myllykosken Pallo 47
+  NORM_RE =  %r{
+                    [.'’º/()_−-]
+                  }x   # note: in [] dash (-) if last doesn't need to get escaped
+  ## note: remove all dots (.), dash (-), ', º, /, etc.
+  #   .  U+002E (46) - FULL STOP
+  #   '  U+0027 (39) - APOSTROPHE
+  #   ’  U+2019 (8217) - RIGHT SINGLE QUOTATION MARK
+  #   º  U+00BA (186) - MASCULINE ORDINAL INDICATOR
+  #   /  U+002F (47) - SOLIDUS
+  #   (  U+0028 (40) - LEFT PARENTHESIS
+  #   )  U+0029 (41) - RIGHT PARENTHESIS
+  #   −  U+2212 (8722) - MINUS SIGN
+  #   -  U+002D (45) - HYPHEN-MINUS
+  ##         for norm(alizing) names
+  def strip_norm( name )
+    name.gsub( NORM_RE, '' )
+  end
+  def normalize( name )
+    # note: do NOT call sanitize here (keep normalize "atomic" for reuse)
+    name = strip_norm( name )
+    name = name.gsub( ' ', '' )  # note: also remove all spaces!!!
+    ## todo/check: use our own downcase - why? why not?
+    name = downcase_i18n( name )     ## do NOT care about upper and lowercase for now
+    name
+  end
+  def variants( name )  Variant.find( name ); end
+  end  # module NameHelper
+end   # module SportDb

data/lib/sportdb/formats/outline_reader.rb CHANGED Viewed

@@ -5,17 +5,45 @@ module SportDb
 class OutlineReader
   def self.read( path )   ## use - rename to read_file or from_file etc. - why? why not?
-    txt = File.open( path, 'r:utf-8' ).read
+    txt = File.open( path, 'r:utf-8' ) {|f| f.read }
     parse( txt )
   end
   def self.parse( txt )
+    new( txt ).parse
+  end
+  def initialize( txt )
+    @txt = txt
+  end
+  ## note: skip "decorative" only heading e.g. ========
+  ##  todo/check:  find a better name e.g. HEADING_EMPTY_RE or HEADING_LINE_RE or ???
+  HEADING_BLANK_RE = %r{\A
+                        ={1,}
+                        \z}x
+  ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
+  HEADING_RE = %r{\A
+                  (?<marker>={1,})       ## 1. leading ======
+                    [ ]*
+                  (?<text>[^=]+)         ## 2. text   (note: for now no "inline" = allowed)
+                    [ ]*
+                    =*                   ## 3. (optional) trailing ====
+                  \z}x
+  def parse
     outline=[]   ## outline structure
+    start_para = true      ## start new para(graph) on new text line?
-    txt.each_line do |line|
+    @txt.each_line do |line|
         line = line.strip      ## todo/fix: keep leading and trailing spaces - why? why not?
-        next if line.empty?    ## todo/fix: keep blank line nodes e.g. just remove comments and process headings?! - why? why not?
+        if line.empty?    ## todo/fix: keep blank line nodes?? and just remove comments and process headings?! - why? why not?
+          start_para = true
+          next
+        end
         break if line == '__END__'
         next if line.start_with?( '#' )   ## skip comments too
@@ -27,23 +55,33 @@ class OutlineReader
         line = line.sub( /#.*/, '' ).strip
         pp line
-        next if line =~ /^={1,}$/          ## skip "decorative" only heading e.g. ========
+        ## todo/check: also use heading blank as paragraph "breaker" or treat it like a comment ?? - why? why not?
+        next if HEADING_BLANK_RE.match( line )  # skip "decorative" only heading e.g. ========
          ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
-         ##  todo/check:  allow ===  Text  =-=-=-=-=-=   too - why? why not?
-        if line =~ /^(={1,})       ## leading ======
-                     ([^=]+?)      ##  text   (note: for now no "inline" = allowed)
-                     =*            ## (optional) trailing ====
-                     $/x
-           heading_marker = $1
-           heading_level  = $1.length   ## count number of = for heading level
-           heading        = $2.strip
+        if m=HEADING_RE.match( line )
+           start_para = true
+           heading_marker = m[:marker]
+           heading_level  = m[:marker].length   ## count number of = for heading level
+           heading        = m[:text].strip
            puts "heading #{heading_level} >#{heading}<"
            outline << [:"h#{heading_level}", heading]
-        else
-           ## assume it's a (plain/regular) text line
-           outline << [:l, line]
+        else    ## assume it's a (plain/regular) text line
+           if start_para
+             outline << [:p, [line]]
+             start_para = false
+           else
+             node = outline[-1]    ## get last entry
+             if node[0] == :p      ##  assert it's a p(aragraph) node!!!
+                node[1] << line    ## add line to p(aragraph)
+             else
+               puts "!! ERROR - invalid outline state / format - expected p(aragraph) node; got:"
+               pp node
+               exit 1
+             end
+           end
         end
     end
     outline

data/lib/sportdb/formats/package.rb CHANGED Viewed

@@ -1,160 +1,172 @@
-module Datafile      # note: keep Datafile in its own top-level module/namespace for now - why? why not?
-  ZIP_RE = %r{ \.zip$
-            }x
-  def self.match_zip( path, pattern: ZIP_RE ) pattern.match( path ); end
-  ## exclude pattern
-  ##  for now exclude all files in directories starting with a dot (e.g. .git/ or .github/ or .build/ etc.)
-  ##  todo/check: rename to EXCLUDE_DOT_DIRS_RE - why? why not?
-  EXCLUDE_RE = %r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
-                        \.[a-zA-Z0-9_-]+  ## (almost) any name BUT must start with dot e.g.  .git, .build, etc.
-                        /
-                     }x
-  def self.match_exclude( path, pattern: EXCLUDE_RE ) pattern.match( path ); end
-class Package; end    ## use a shared base class for DirPackage, ZipPackage, etc.
-class DirPackage < Package    ## todo/check: find a better name e.g. UnzippedPackage, FilesystemPackage, etc. - why? why not?
-class Entry
-  def initialize( pack, path )
-    @pack = pack  ## parent package
-    @path = path
-    ## todo/fix!!!!: calculate @name (cut-off pack.path!!!)
-    @name =  path
-  end
-  def name()  @name; end
-  def read()  File.open( @path, 'r:utf-8' ).read; end
-end  # class DirPackage::Entry
-  attr_reader :name, :path
-  def initialize( path )
-    ## todo/fix:  expand_path ?! - why? why not? if you pass in ./ basename will be . and NOT directory name, for example!!!
-    @path = path   ## rename to root_path or base_path or somehting - why? why not?
-    basename = File.basename( path )   ## note: ALWAYS keeps "extension"-like name if present (e.g. ./austria.zip => austria.zip)
-    @name = basename
-  end
-  def each( pattern:,  extension: 'txt' )    ## todo/check: rename to glob or something - why? why not?
-    ##   use just .* for extension or remove and check if File.file? and skip File.directory? - why? why not?
-    ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
-    Dir.glob( "#{@path}/**/{*,.*}.#{extension}" ).each do |path|
-      ## todo/fix: (auto) skip and check for directories
-      if EXCLUDE_RE.match( path )
-        ## note: skip dot dirs (e.g. .build/, .git/, etc.)
-      elsif pattern.match( path )
-        yield( Entry.new( self, path ))
-      else
-        ## puts "  skipping >#{path}<"
-      end
-    end
-  end
-  def find( name )
-    Entry.new( self, "#{@path}/#{name}" )
-  end
-end  # class DirPackage
-## helper wrapper for datafiles in zips
-class ZipPackage < Package
-class Entry
-  def initialize( pack, entry )
-    @pack  = pack
-    @entry = entry
-  end
-  def name()  @entry.name; end
-  def read
-    txt = @entry.get_input_stream.read
-    ##  puts "** encoding: #{txt.encoding}"  #=> encoding: ASCII-8BIT
-    txt = txt.force_encoding( Encoding::UTF_8 )
-    txt
-  end
-end # class ZipPackage::Entry
-  attr_reader :name, :path
-  def initialize( path )
-    @path = path
-    extname  = File.extname( path )    ## todo/check: double check if extension is .zip - why? why not?
-    basename = File.basename( path, extname )
-    @name = basename
-  end
-  def each( pattern: )
-    Zip::File.open( @path ) do |zipfile|
-      zipfile.each do |entry|
-        if entry.directory?
-          next ## skip
-        elsif entry.file?
-          if EXCLUDE_RE.match( entry.name )
-            ## note: skip dot dirs (e.g. .build/, .git/, etc.)
-          elsif pattern.match( entry.name )
-            yield( Entry.new( self, entry ) )   # wrap entry in uniform access interface / api
-          else
-            ## puts "  skipping >#{entry.name}<"
-          end
-        else
-          puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
-          exit 1
-        end
-      end
-    end
-  end
-  def find( name )
-     entries = match_entry( name )
-     if entries.empty?
-       puts "** !!! ERROR !!! zip entry >#{name}< not found in >#{@path}<; sorry"
-       exit 1
-     elsif entries.size > 1
-       puts "** !!! ERROR !!! ambigious zip entry >#{name}<; found #{entries.size} entries in >#{@path}<:"
-       pp entries
-       exit 1
-     else
-       Entry.new( self, entries[0] )    # wrap entry in uniform access interface / api
-     end
-  end
-private
-  def match_entry( name )
-    ## todo/fix:  use Zip::File.glob or find_entry or something better/faster?  why? why not?
-    pattern = %r{ #{Regexp.escape( name )}    ## match string if ends with name
-                   $
-                }x
-    entries = []
-    Zip::File.open( @path ) do |zipfile|
-      zipfile.each do |entry|
-        if entry.directory?
-          next ## skip
-        elsif entry.file?
-          if EXCLUDE_RE.match( entry.name )
-            ## note: skip dot dirs (e.g. .build/, .git/, etc.)
-          elsif pattern.match( entry.name )
-            entries << entry
-          else
-            ## no match; skip too
-          end
-        else
-          puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
-          exit 1
-        end
-      end
-    end
-    entries
-  end
-end  # class ZipPackage
-end  # module Datafile
+module SportDb
+  class Package
+    CONF_RE = %r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
+        \.conf\.txt$
+    }x
+    LEAGUES_RE = %r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
+       (?: [a-z]{1,4}\. )?   # optional country code/key e.g. eng.clubs.wiki.txt
+        leagues\.txt$
+    }x
+    CLUBS_RE = %r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
+       (?: [a-z]{1,4}\. )?   # optional country code/key e.g. eng.clubs.txt
+        clubs\.txt$
+    }x
+    CLUBS_WIKI_RE = %r{  (?:^|/)               # beginning (^) or beginning of path (/)
+        (?:[a-z]{1,4}\.)?   # optional country code/key e.g. eng.clubs.wiki.txt
+       clubs\.wiki\.txt$
+    }x
+    CLUB_PROPS_RE = %r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
+      (?: [a-z]{1,4}\. )?   # optional country code/key e.g. eng.clubs.props.txt
+        clubs\.props\.txt$
+    }x
+    ## note: if pattern includes directory add here
+    ##     (otherwise move to more "generic" datafile) - why? why not?
+    MATCH_RE = %r{ /(?: \d{4}-\d{2}        ## season folder e.g. /2019-20
+                      | \d{4}(--[^/]+)?    ## season year-only folder e.g. /2019 or /2016--france
+                    )
+                   /[a-z0-9_-]+\.txt$  ## txt e.g /1-premierleague.txt
+                }x
+    ## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
+    def self.find( path, pattern )
+      datafiles = []
+      ## check all txt files
+      ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
+      candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
+      pp candidates
+      candidates.each do |candidate|
+        datafiles << candidate    if pattern.match( candidate )
+      end
+      pp datafiles
+      datafiles
+   end
+   def self.find_clubs( path, pattern: CLUBS_RE )            find( path, pattern ); end
+   def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE )  find( path, pattern ); end
+   def self.match_clubs( path )       CLUBS_RE.match( path ); end
+   def self.match_clubs_wiki( path )  CLUBS_WIKI_RE.match( path ); end
+   def self.match_club_props( path, pattern: CLUB_PROPS_RE ) pattern.match( path ); end
+   def self.find_leagues( path, pattern: LEAGUES_RE )  find( path, pattern ); end
+   def self.match_leagues( path )  LEAGUES_RE.match( path ); end
+   def self.find_conf( path, pattern: CONF_RE )  find( path, pattern ); end
+   def self.match_conf( path )  CONF_RE.match( path ); end
+   class << self
+     alias_method :match_clubs?, :match_clubs
+     alias_method :clubs?,       :match_clubs
+     alias_method :match_clubs_wiki?, :match_clubs_wiki
+     alias_method :clubs_wiki?,       :match_clubs_wiki
+     alias_method :match_club_props?, :match_club_props
+     alias_method :club_props?,       :match_club_props
+     alias_method :match_leagues?, :match_leagues
+     alias_method :leagues?,       :match_leagues
+     alias_method :match_conf?, :match_conf
+     alias_method :conf?,       :match_conf
+   end
+    ## attr_reader :pack     ## allow access to embedded ("low-level") delegate package (or hide!?) - why? why not?
+    attr_accessor :include, :exclude
+    ## private helpers - like select returns true for keeping and false for skipping entry
+    def filter_clause( filter, entry )
+      if filter.is_a?( String )
+        entry.name.index( filter ) ? true : false
+      elsif filter.is_a?( Regexp )
+        filter.match( entry.name )  ? true : false
+      else  ## assume
+        ## todo/check: pass in entry (and NOT entry.name) - why? why not?
+        filter.call( entry )
+      end
+    end
+    def filter( entry )
+      if @include
+        if filter_clause( @include, entry )   ## todo/check: is include a reserved keyword????
+          true  ## todo/check: check for exclude here too - why? why not?
+        else
+          false
+        end
+      else
+        if @exclude && filter_clause( @exclude, entry )
+          false
+        else
+          true
+        end
+      end
+    end
+    def initialize( path_or_pack )
+      @include = nil
+      @exclude = nil
+      if path_or_pack.is_a?( Datafile::Package )
+        @pack = path_or_pack
+      else   ## assume it's a (string) path
+        path = path_or_pack
+        if !File.exist?( path )  ## file or directory
+          puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
+          exit 1
+        end
+        if File.directory?( path )
+          @pack = Datafile::DirPackage.new( path )     ## delegate to "generic" package
+        elsif File.file?( path ) && File.extname( path ) == '.zip'  # note: includes dot (.) eg .zip
+          @pack = Datafile::ZipPackage.new( path )
+        else
+          puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
+          exit 1
+        end
+      end
+    end
+    def each( pattern:, &blk )
+      @pack.each( pattern: pattern ) do |entry|
+        next unless filter( entry )   ## lets you use include/exclude filters
+        blk.call( entry )
+      end
+    end
+    def each_conf( &blk )       each( pattern: CONF_RE, &blk ); end
+    def each_match( &blk )      each( pattern: MATCH_RE, &blk ); end
+    def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
+    def each_leagues( &blk )    each( pattern: LEAGUES_RE, &blk ); end
+    def each_clubs( &blk )      each( pattern: CLUBS_RE, &blk ); end
+    def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
+    ## return all match datafile entries
+    def match()  ary=[]; each_match {|entry| ary << entry  }; ary; end
+    alias_method :matches, :match
+  end   # class Package
+  class DirPackage < Package
+    def initialize( path )   super( Datafile::DirPackage.new( path ) ); end
+  end
+  class ZipPackage < Package
+    def initialize( path )   super( Datafile::ZipPackage.new( path ) ); end
+  end
+end   # module SportDb