sportdb-formats 0.1.7 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 421d04b4b20f49a37bb1a31161bb4ab7b5df3113
4
- data.tar.gz: e533ef2bb8dbb3c1053416c98ede4c9d20c42546
3
+ metadata.gz: 7888347efffbc31760344406232e8f930d8777a0
4
+ data.tar.gz: 8cba378a58d47f5b4ddc34ee933df6bb19e74cc7
5
5
  SHA512:
6
- metadata.gz: 6256ca94b85f87865a44b4f892273cb4c2c9f17e7847e765cd3c73f408a005907181e202ec201498fd3214a603aed6af33b4ecbcf4e2af43cca851f7b1ac9a09
7
- data.tar.gz: 750122be629a4b902356d845658cfcccbb0403e75e3f713e1ff4bd0da4aefb6b2355bc828aecb8e21d20addb5bfb9c1924df58830eb85e4743aa340c560924df
6
+ metadata.gz: 54cf5468032c75452e8a595fcf2efb204edec7796478aeef21fdd571c0503eaf1e412c0b8e51e927cb5fab0e7ea363d286885bdd6e7c62f5b422f5f2a7c806b4
7
+ data.tar.gz: c56db501666da9225f930ed16d9bbdd16f81d968b5d25eea5f04b764bf5c3bed62a33ad6843595c2316102356d89071c324b13db7a3bb8a8e9171f3abf77a534
data/Manifest.txt CHANGED
@@ -6,6 +6,7 @@ lib/sportdb/formats.rb
6
6
  lib/sportdb/formats/datafile.rb
7
7
  lib/sportdb/formats/goals.rb
8
8
  lib/sportdb/formats/outline_reader.rb
9
+ lib/sportdb/formats/package.rb
9
10
  lib/sportdb/formats/scores.rb
10
11
  lib/sportdb/formats/season_utils.rb
11
12
  lib/sportdb/formats/version.rb
@@ -15,5 +16,6 @@ test/test_datafile.rb
15
16
  test/test_datafile_match.rb
16
17
  test/test_goals.rb
17
18
  test/test_outline_reader.rb
19
+ test/test_package.rb
18
20
  test/test_scores.rb
19
21
  test/test_season_utils.rb
data/Rakefile CHANGED
@@ -24,9 +24,11 @@ Hoe.spec 'sportdb-formats' do
24
24
  ['date-formats', '>= 0.2.4'],
25
25
  ['csvreader', '>= 1.2.4'],
26
26
  ['sportdb-langs', '>= 0.0.1'],
27
+
28
+ ['rubyzip', '>= 1.2.4' ],
27
29
  ]
28
30
 
29
31
  self.spec_extras = {
30
- required_ruby_version: '>= 2.2.2'
32
+ required_ruby_version: '>= 2.2.2'
31
33
  }
32
34
  end
@@ -6,6 +6,10 @@ require 'alphabets' # downcase_i18n, unaccent, variants, ...
6
6
  require 'date/formats' # DateFormats.parse, find!, ...
7
7
  require 'csvreader'
8
8
 
9
+ require 'zip' ## todo/check: if zip is alreay included in a required module
10
+
11
+
12
+
9
13
  def read_csv( path )
10
14
  CsvHash.read( path, :header_converters => :symbol )
11
15
  end
@@ -24,6 +28,7 @@ require 'sportdb/langs'
24
28
  require 'sportdb/formats/version' # let version always go first
25
29
  require 'sportdb/formats/outline_reader'
26
30
  require 'sportdb/formats/datafile'
31
+ require 'sportdb/formats/package'
27
32
  require 'sportdb/formats/season_utils'
28
33
 
29
34
 
@@ -19,40 +19,44 @@ module Datafile # note: keep Datafile in its own top-level module/namespace
19
19
  end
20
20
 
21
21
 
22
-
23
- CLUBS_REGEX = %r{ (?:^|/) # beginning (^) or beginning of path (/)
24
- (?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.txt
22
+ CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
23
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
25
24
  clubs\.txt$
26
25
  }x
27
26
 
28
- CLUBS_WIKI_REGEX = %r{ (?:^|/) # beginning (^) or beginning of path (/)
27
+ CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
29
28
  (?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
30
29
  clubs\.wiki\.txt$
31
30
  }x
32
31
 
33
- def self.find_clubs( path, pattern: CLUBS_REGEX ) find( path, pattern ); end
34
- def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_REGEX ) find( path, pattern ); end
32
+ CLUB_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
33
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
34
+ clubs\.props\.txt$
35
+ }x
35
36
 
36
- def self.match_clubs( path ) CLUBS_REGEX.match( path ); end
37
- def self.match_clubs_wiki( path ) CLUBS_WIKI_REGEX.match( path ); end
37
+ def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
38
+ def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
38
39
 
40
+ def self.match_clubs( path ) CLUBS_RE.match( path ); end
41
+ def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
42
+ def self.match_club_props( path, pattern: CLUB_PROPS_RE ) pattern.match( path ); end
39
43
 
40
44
 
41
- LEAGUES_REGEX = %r{ (?:^|/) # beginning (^) or beginning of path (/)
42
- (?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
45
+ LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
46
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.wiki.txt
43
47
  leagues\.txt$
44
48
  }x
45
49
 
46
- def self.find_leagues( path, pattern: LEAGUES_REGEX ) find( path, pattern ); end
47
- def self.match_leagues( path ) LEAGUES_REGEX.match( path ); end
50
+ def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
51
+ def self.match_leagues( path ) LEAGUES_RE.match( path ); end
48
52
 
49
53
 
50
- CONF_REGEX = %r{ (?:^|/) # beginning (^) or beginning of path (/)
54
+ CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
51
55
  \.conf\.txt$
52
56
  }x
53
57
 
54
- def self.find_conf( path, pattern: CONF_REGEX ) find( path, pattern ); end
55
- def self.match_conf( path ) CONF_REGEX.match( path ); end
58
+ def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
59
+ def self.match_conf( path ) CONF_RE.match( path ); end
56
60
 
57
61
 
58
62
 
@@ -0,0 +1,160 @@
1
+
2
+ module Datafile # note: keep Datafile in its own top-level module/namespace for now - why? why not?
3
+
4
+
5
+ ZIP_RE = %r{ \.zip$
6
+ }x
7
+ def self.match_zip( path, pattern: ZIP_RE ) pattern.match( path ); end
8
+
9
+
10
+
11
+ ## exclude pattern
12
+ ## for now exclude all files in directories starting with a dot (e.g. .git/ or .github/ or .build/ etc.)
13
+ ## todo/check: rename to EXCLUDE_DOT_DIRS_RE - why? why not?
14
+ EXCLUDE_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
15
+ \.[a-zA-Z0-9_-]+ ## (almost) any name BUT must start with dot e.g. .git, .build, etc.
16
+ /
17
+ }x
18
+ def self.match_exclude( path, pattern: EXCLUDE_RE ) pattern.match( path ); end
19
+
20
+
21
+
22
+ class Package; end ## use a shared base class for DirPackage, ZipPackage, etc.
23
+
24
+ class DirPackage < Package ## todo/check: find a better name e.g. UnzippedPackage, FilesystemPackage, etc. - why? why not?
25
+ class Entry
26
+ def initialize( pack, path )
27
+ @pack = pack ## parent package
28
+ @path = path
29
+ ## todo/fix!!!!: calculate @name (cut-off pack.path!!!)
30
+ @name = path
31
+ end
32
+ def name() @name; end
33
+ def read() File.open( @path, 'r:utf-8' ).read; end
34
+ end # class DirPackage::Entry
35
+
36
+
37
+ attr_reader :name, :path
38
+
39
+ def initialize( path )
40
+ ## todo/fix: expand_path ?! - why? why not? if you pass in ./ basename will be . and NOT directory name, for example!!!
41
+ @path = path ## rename to root_path or base_path or somehting - why? why not?
42
+
43
+ basename = File.basename( path ) ## note: ALWAYS keeps "extension"-like name if present (e.g. ./austria.zip => austria.zip)
44
+ @name = basename
45
+ end
46
+
47
+ def each( pattern:, extension: 'txt' ) ## todo/check: rename to glob or something - why? why not?
48
+ ## use just .* for extension or remove and check if File.file? and skip File.directory? - why? why not?
49
+ ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
50
+ Dir.glob( "#{@path}/**/{*,.*}.#{extension}" ).each do |path|
51
+ ## todo/fix: (auto) skip and check for directories
52
+ if EXCLUDE_RE.match( path )
53
+ ## note: skip dot dirs (e.g. .build/, .git/, etc.)
54
+ elsif pattern.match( path )
55
+ yield( Entry.new( self, path ))
56
+ else
57
+ ## puts " skipping >#{path}<"
58
+ end
59
+ end
60
+ end
61
+
62
+ def find( name )
63
+ Entry.new( self, "#{@path}/#{name}" )
64
+ end
65
+ end # class DirPackage
66
+
67
+
68
+ ## helper wrapper for datafiles in zips
69
+ class ZipPackage < Package
70
+ class Entry
71
+ def initialize( pack, entry )
72
+ @pack = pack
73
+ @entry = entry
74
+ end
75
+
76
+ def name() @entry.name; end
77
+ def read
78
+ txt = @entry.get_input_stream.read
79
+ ## puts "** encoding: #{txt.encoding}" #=> encoding: ASCII-8BIT
80
+ txt = txt.force_encoding( Encoding::UTF_8 )
81
+ txt
82
+ end
83
+ end # class ZipPackage::Entry
84
+
85
+ attr_reader :name, :path
86
+
87
+ def initialize( path )
88
+ @path = path
89
+
90
+ extname = File.extname( path ) ## todo/check: double check if extension is .zip - why? why not?
91
+ basename = File.basename( path, extname )
92
+ @name = basename
93
+ end
94
+
95
+ def each( pattern: )
96
+ Zip::File.open( @path ) do |zipfile|
97
+ zipfile.each do |entry|
98
+ if entry.directory?
99
+ next ## skip
100
+ elsif entry.file?
101
+ if EXCLUDE_RE.match( entry.name )
102
+ ## note: skip dot dirs (e.g. .build/, .git/, etc.)
103
+ elsif pattern.match( entry.name )
104
+ yield( Entry.new( self, entry ) ) # wrap entry in uniform access interface / api
105
+ else
106
+ ## puts " skipping >#{entry.name}<"
107
+ end
108
+ else
109
+ puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
110
+ exit 1
111
+ end
112
+ end
113
+ end
114
+ end
115
+
116
+ def find( name )
117
+ entries = match_entry( name )
118
+ if entries.empty?
119
+ puts "** !!! ERROR !!! zip entry >#{name}< not found in >#{@path}<; sorry"
120
+ exit 1
121
+ elsif entries.size > 1
122
+ puts "** !!! ERROR !!! ambigious zip entry >#{name}<; found #{entries.size} entries in >#{@path}<:"
123
+ pp entries
124
+ exit 1
125
+ else
126
+ Entry.new( self, entries[0] ) # wrap entry in uniform access interface / api
127
+ end
128
+ end
129
+
130
+ private
131
+ def match_entry( name )
132
+ ## todo/fix: use Zip::File.glob or find_entry or something better/faster? why? why not?
133
+
134
+ pattern = %r{ #{Regexp.escape( name )} ## match string if ends with name
135
+ $
136
+ }x
137
+
138
+ entries = []
139
+ Zip::File.open( @path ) do |zipfile|
140
+ zipfile.each do |entry|
141
+ if entry.directory?
142
+ next ## skip
143
+ elsif entry.file?
144
+ if EXCLUDE_RE.match( entry.name )
145
+ ## note: skip dot dirs (e.g. .build/, .git/, etc.)
146
+ elsif pattern.match( entry.name )
147
+ entries << entry
148
+ else
149
+ ## no match; skip too
150
+ end
151
+ else
152
+ puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
153
+ exit 1
154
+ end
155
+ end
156
+ end
157
+ entries
158
+ end
159
+ end # class ZipPackage
160
+ end # module Datafile
@@ -5,8 +5,8 @@ module SportDb
5
5
  module Formats
6
6
 
7
7
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
8
- MINOR = 1
9
- PATCH = 7
8
+ MINOR = 2
9
+ PATCH = 0
10
10
  VERSION = [MAJOR,MINOR,PATCH].join('.')
11
11
 
12
12
  def self.version
@@ -10,11 +10,26 @@ require 'helper'
10
10
 
11
11
  class TestDatafile < MiniTest::Test
12
12
 
13
+ def test_exclude
14
+ assert Datafile.match_exclude( '.build/' )
15
+ assert Datafile.match_exclude( '.git/' )
16
+
17
+ assert Datafile.match_exclude( '/.build/' )
18
+ assert Datafile.match_exclude( '/.git/' )
19
+
20
+ assert Datafile.match_exclude( '.build/leagues.txt' )
21
+ assert Datafile.match_exclude( '.git/leagues.txt' )
22
+
23
+ assert Datafile.match_exclude( '/.build/leagues.txt' )
24
+ assert Datafile.match_exclude( '/.git/leagues.txt' )
25
+ end
26
+
27
+
28
+
13
29
  CLUBS_DIR = '../../../openfootball/clubs' ## source repo directory path
14
30
  LEAGUES_DIR = '../../../openfootball/leagues'
15
31
  AUSTRIA_DIR = '../../../openfootball/austria'
16
32
 
17
-
18
33
  def test_find
19
34
  datafiles = Datafile.find_clubs( CLUBS_DIR )
20
35
  pp datafiles
@@ -29,6 +44,7 @@ class TestDatafile < MiniTest::Test
29
44
  pp datafiles
30
45
  end
31
46
 
47
+
32
48
  def test_bundle
33
49
  datafiles = Datafile.find_clubs( CLUBS_DIR )
34
50
  pp datafiles
@@ -0,0 +1,27 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_package.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+
11
+ class TestPackage < MiniTest::Test
12
+
13
+ def test_read
14
+ [Datafile::DirPackage.new( '../../../openfootball/england' ),
15
+ Datafile::ZipPackage.new( 'tmp/england-master.zip' )].each do |eng|
16
+ assert eng.find( '2015-16/.conf.txt' ).read.start_with?( '= English Premier League 2015/16' )
17
+ assert eng.find( '2017-18/.conf.txt' ).read.start_with?( '= English Premier League 2017/18' )
18
+ assert eng.find( '2015-16/1-premierleague-i.txt' ).read.start_with?( '= English Premier League 2015/16' )
19
+ end
20
+
21
+ [Datafile::DirPackage.new( '../../../openfootball/austria' ),
22
+ Datafile::ZipPackage.new( 'tmp/austria-master.zip' )].each do |at|
23
+ assert at.find( '2018-19/.conf.txt' ).read.start_with?( '= Österr. Bundesliga 2018/19' )
24
+ end
25
+ end # method test_read
26
+
27
+ end # class TestPackage
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-formats
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: 0.0.1
69
+ - !ruby/object:Gem::Dependency
70
+ name: rubyzip
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: 1.2.4
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: 1.2.4
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: rdoc
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -111,6 +125,7 @@ files:
111
125
  - lib/sportdb/formats/datafile.rb
112
126
  - lib/sportdb/formats/goals.rb
113
127
  - lib/sportdb/formats/outline_reader.rb
128
+ - lib/sportdb/formats/package.rb
114
129
  - lib/sportdb/formats/scores.rb
115
130
  - lib/sportdb/formats/season_utils.rb
116
131
  - lib/sportdb/formats/version.rb
@@ -120,6 +135,7 @@ files:
120
135
  - test/test_datafile_match.rb
121
136
  - test/test_goals.rb
122
137
  - test/test_outline_reader.rb
138
+ - test/test_package.rb
123
139
  - test/test_scores.rb
124
140
  - test/test_season_utils.rb
125
141
  homepage: https://github.com/sportdb/sport.db