sportdb-formats 0.1.7 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 421d04b4b20f49a37bb1a31161bb4ab7b5df3113
4
- data.tar.gz: e533ef2bb8dbb3c1053416c98ede4c9d20c42546
3
+ metadata.gz: 7888347efffbc31760344406232e8f930d8777a0
4
+ data.tar.gz: 8cba378a58d47f5b4ddc34ee933df6bb19e74cc7
5
5
  SHA512:
6
- metadata.gz: 6256ca94b85f87865a44b4f892273cb4c2c9f17e7847e765cd3c73f408a005907181e202ec201498fd3214a603aed6af33b4ecbcf4e2af43cca851f7b1ac9a09
7
- data.tar.gz: 750122be629a4b902356d845658cfcccbb0403e75e3f713e1ff4bd0da4aefb6b2355bc828aecb8e21d20addb5bfb9c1924df58830eb85e4743aa340c560924df
6
+ metadata.gz: 54cf5468032c75452e8a595fcf2efb204edec7796478aeef21fdd571c0503eaf1e412c0b8e51e927cb5fab0e7ea363d286885bdd6e7c62f5b422f5f2a7c806b4
7
+ data.tar.gz: c56db501666da9225f930ed16d9bbdd16f81d968b5d25eea5f04b764bf5c3bed62a33ad6843595c2316102356d89071c324b13db7a3bb8a8e9171f3abf77a534
data/Manifest.txt CHANGED
@@ -6,6 +6,7 @@ lib/sportdb/formats.rb
6
6
  lib/sportdb/formats/datafile.rb
7
7
  lib/sportdb/formats/goals.rb
8
8
  lib/sportdb/formats/outline_reader.rb
9
+ lib/sportdb/formats/package.rb
9
10
  lib/sportdb/formats/scores.rb
10
11
  lib/sportdb/formats/season_utils.rb
11
12
  lib/sportdb/formats/version.rb
@@ -15,5 +16,6 @@ test/test_datafile.rb
15
16
  test/test_datafile_match.rb
16
17
  test/test_goals.rb
17
18
  test/test_outline_reader.rb
19
+ test/test_package.rb
18
20
  test/test_scores.rb
19
21
  test/test_season_utils.rb
data/Rakefile CHANGED
@@ -24,9 +24,11 @@ Hoe.spec 'sportdb-formats' do
24
24
  ['date-formats', '>= 0.2.4'],
25
25
  ['csvreader', '>= 1.2.4'],
26
26
  ['sportdb-langs', '>= 0.0.1'],
27
+
28
+ ['rubyzip', '>= 1.2.4' ],
27
29
  ]
28
30
 
29
31
  self.spec_extras = {
30
- required_ruby_version: '>= 2.2.2'
32
+ required_ruby_version: '>= 2.2.2'
31
33
  }
32
34
  end
@@ -6,6 +6,10 @@ require 'alphabets' # downcase_i18n, unaccent, variants, ...
6
6
  require 'date/formats' # DateFormats.parse, find!, ...
7
7
  require 'csvreader'
8
8
 
9
+ require 'zip' ## todo/check: if zip is alreay included in a required module
10
+
11
+
12
+
9
13
  def read_csv( path )
10
14
  CsvHash.read( path, :header_converters => :symbol )
11
15
  end
@@ -24,6 +28,7 @@ require 'sportdb/langs'
24
28
  require 'sportdb/formats/version' # let version always go first
25
29
  require 'sportdb/formats/outline_reader'
26
30
  require 'sportdb/formats/datafile'
31
+ require 'sportdb/formats/package'
27
32
  require 'sportdb/formats/season_utils'
28
33
 
29
34
 
@@ -19,40 +19,44 @@ module Datafile # note: keep Datafile in its own top-level module/namespace
19
19
  end
20
20
 
21
21
 
22
-
23
- CLUBS_REGEX = %r{ (?:^|/) # beginning (^) or beginning of path (/)
24
- (?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.txt
22
+ CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
23
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
25
24
  clubs\.txt$
26
25
  }x
27
26
 
28
- CLUBS_WIKI_REGEX = %r{ (?:^|/) # beginning (^) or beginning of path (/)
27
+ CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
29
28
  (?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
30
29
  clubs\.wiki\.txt$
31
30
  }x
32
31
 
33
- def self.find_clubs( path, pattern: CLUBS_REGEX ) find( path, pattern ); end
34
- def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_REGEX ) find( path, pattern ); end
32
+ CLUB_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
33
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
34
+ clubs\.props\.txt$
35
+ }x
35
36
 
36
- def self.match_clubs( path ) CLUBS_REGEX.match( path ); end
37
- def self.match_clubs_wiki( path ) CLUBS_WIKI_REGEX.match( path ); end
37
+ def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
38
+ def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
38
39
 
40
+ def self.match_clubs( path ) CLUBS_RE.match( path ); end
41
+ def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
42
+ def self.match_club_props( path, pattern: CLUB_PROPS_RE ) pattern.match( path ); end
39
43
 
40
44
 
41
- LEAGUES_REGEX = %r{ (?:^|/) # beginning (^) or beginning of path (/)
42
- (?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
45
+ LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
46
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.wiki.txt
43
47
  leagues\.txt$
44
48
  }x
45
49
 
46
- def self.find_leagues( path, pattern: LEAGUES_REGEX ) find( path, pattern ); end
47
- def self.match_leagues( path ) LEAGUES_REGEX.match( path ); end
50
+ def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
51
+ def self.match_leagues( path ) LEAGUES_RE.match( path ); end
48
52
 
49
53
 
50
- CONF_REGEX = %r{ (?:^|/) # beginning (^) or beginning of path (/)
54
+ CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
51
55
  \.conf\.txt$
52
56
  }x
53
57
 
54
- def self.find_conf( path, pattern: CONF_REGEX ) find( path, pattern ); end
55
- def self.match_conf( path ) CONF_REGEX.match( path ); end
58
+ def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
59
+ def self.match_conf( path ) CONF_RE.match( path ); end
56
60
 
57
61
 
58
62
 
@@ -0,0 +1,160 @@
1
+
2
+ module Datafile # note: keep Datafile in its own top-level module/namespace for now - why? why not?
3
+
4
+
5
+ ZIP_RE = %r{ \.zip$
6
+ }x
7
+ def self.match_zip( path, pattern: ZIP_RE ) pattern.match( path ); end
8
+
9
+
10
+
11
+ ## exclude pattern
12
+ ## for now exclude all files in directories starting with a dot (e.g. .git/ or .github/ or .build/ etc.)
13
+ ## todo/check: rename to EXCLUDE_DOT_DIRS_RE - why? why not?
14
+ EXCLUDE_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
15
+ \.[a-zA-Z0-9_-]+ ## (almost) any name BUT must start with dot e.g. .git, .build, etc.
16
+ /
17
+ }x
18
+ def self.match_exclude( path, pattern: EXCLUDE_RE ) pattern.match( path ); end
19
+
20
+
21
+
22
+ class Package; end ## use a shared base class for DirPackage, ZipPackage, etc.
23
+
24
+ class DirPackage < Package ## todo/check: find a better name e.g. UnzippedPackage, FilesystemPackage, etc. - why? why not?
25
+ class Entry
26
+ def initialize( pack, path )
27
+ @pack = pack ## parent package
28
+ @path = path
29
+ ## todo/fix!!!!: calculate @name (cut-off pack.path!!!)
30
+ @name = path
31
+ end
32
+ def name() @name; end
33
+ def read() File.open( @path, 'r:utf-8' ).read; end
34
+ end # class DirPackage::Entry
35
+
36
+
37
+ attr_reader :name, :path
38
+
39
+ def initialize( path )
40
+ ## todo/fix: expand_path ?! - why? why not? if you pass in ./ basename will be . and NOT directory name, for example!!!
41
+ @path = path ## rename to root_path or base_path or somehting - why? why not?
42
+
43
+ basename = File.basename( path ) ## note: ALWAYS keeps "extension"-like name if present (e.g. ./austria.zip => austria.zip)
44
+ @name = basename
45
+ end
46
+
47
+ def each( pattern:, extension: 'txt' ) ## todo/check: rename to glob or something - why? why not?
48
+ ## use just .* for extension or remove and check if File.file? and skip File.directory? - why? why not?
49
+ ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
50
+ Dir.glob( "#{@path}/**/{*,.*}.#{extension}" ).each do |path|
51
+ ## todo/fix: (auto) skip and check for directories
52
+ if EXCLUDE_RE.match( path )
53
+ ## note: skip dot dirs (e.g. .build/, .git/, etc.)
54
+ elsif pattern.match( path )
55
+ yield( Entry.new( self, path ))
56
+ else
57
+ ## puts " skipping >#{path}<"
58
+ end
59
+ end
60
+ end
61
+
62
+ def find( name )
63
+ Entry.new( self, "#{@path}/#{name}" )
64
+ end
65
+ end # class DirPackage
66
+
67
+
68
+ ## helper wrapper for datafiles in zips
69
+ class ZipPackage < Package
70
+ class Entry
71
+ def initialize( pack, entry )
72
+ @pack = pack
73
+ @entry = entry
74
+ end
75
+
76
+ def name() @entry.name; end
77
+ def read
78
+ txt = @entry.get_input_stream.read
79
+ ## puts "** encoding: #{txt.encoding}" #=> encoding: ASCII-8BIT
80
+ txt = txt.force_encoding( Encoding::UTF_8 )
81
+ txt
82
+ end
83
+ end # class ZipPackage::Entry
84
+
85
+ attr_reader :name, :path
86
+
87
+ def initialize( path )
88
+ @path = path
89
+
90
+ extname = File.extname( path ) ## todo/check: double check if extension is .zip - why? why not?
91
+ basename = File.basename( path, extname )
92
+ @name = basename
93
+ end
94
+
95
+ def each( pattern: )
96
+ Zip::File.open( @path ) do |zipfile|
97
+ zipfile.each do |entry|
98
+ if entry.directory?
99
+ next ## skip
100
+ elsif entry.file?
101
+ if EXCLUDE_RE.match( entry.name )
102
+ ## note: skip dot dirs (e.g. .build/, .git/, etc.)
103
+ elsif pattern.match( entry.name )
104
+ yield( Entry.new( self, entry ) ) # wrap entry in uniform access interface / api
105
+ else
106
+ ## puts " skipping >#{entry.name}<"
107
+ end
108
+ else
109
+ puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
110
+ exit 1
111
+ end
112
+ end
113
+ end
114
+ end
115
+
116
+ def find( name )
117
+ entries = match_entry( name )
118
+ if entries.empty?
119
+ puts "** !!! ERROR !!! zip entry >#{name}< not found in >#{@path}<; sorry"
120
+ exit 1
121
+ elsif entries.size > 1
122
+ puts "** !!! ERROR !!! ambigious zip entry >#{name}<; found #{entries.size} entries in >#{@path}<:"
123
+ pp entries
124
+ exit 1
125
+ else
126
+ Entry.new( self, entries[0] ) # wrap entry in uniform access interface / api
127
+ end
128
+ end
129
+
130
+ private
131
+ def match_entry( name )
132
+ ## todo/fix: use Zip::File.glob or find_entry or something better/faster? why? why not?
133
+
134
+ pattern = %r{ #{Regexp.escape( name )} ## match string if ends with name
135
+ $
136
+ }x
137
+
138
+ entries = []
139
+ Zip::File.open( @path ) do |zipfile|
140
+ zipfile.each do |entry|
141
+ if entry.directory?
142
+ next ## skip
143
+ elsif entry.file?
144
+ if EXCLUDE_RE.match( entry.name )
145
+ ## note: skip dot dirs (e.g. .build/, .git/, etc.)
146
+ elsif pattern.match( entry.name )
147
+ entries << entry
148
+ else
149
+ ## no match; skip too
150
+ end
151
+ else
152
+ puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
153
+ exit 1
154
+ end
155
+ end
156
+ end
157
+ entries
158
+ end
159
+ end # class ZipPackage
160
+ end # module Datafile
@@ -5,8 +5,8 @@ module SportDb
5
5
  module Formats
6
6
 
7
7
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
8
- MINOR = 1
9
- PATCH = 7
8
+ MINOR = 2
9
+ PATCH = 0
10
10
  VERSION = [MAJOR,MINOR,PATCH].join('.')
11
11
 
12
12
  def self.version
@@ -10,11 +10,26 @@ require 'helper'
10
10
 
11
11
  class TestDatafile < MiniTest::Test
12
12
 
13
+ def test_exclude
14
+ assert Datafile.match_exclude( '.build/' )
15
+ assert Datafile.match_exclude( '.git/' )
16
+
17
+ assert Datafile.match_exclude( '/.build/' )
18
+ assert Datafile.match_exclude( '/.git/' )
19
+
20
+ assert Datafile.match_exclude( '.build/leagues.txt' )
21
+ assert Datafile.match_exclude( '.git/leagues.txt' )
22
+
23
+ assert Datafile.match_exclude( '/.build/leagues.txt' )
24
+ assert Datafile.match_exclude( '/.git/leagues.txt' )
25
+ end
26
+
27
+
28
+
13
29
  CLUBS_DIR = '../../../openfootball/clubs' ## source repo directory path
14
30
  LEAGUES_DIR = '../../../openfootball/leagues'
15
31
  AUSTRIA_DIR = '../../../openfootball/austria'
16
32
 
17
-
18
33
  def test_find
19
34
  datafiles = Datafile.find_clubs( CLUBS_DIR )
20
35
  pp datafiles
@@ -29,6 +44,7 @@ class TestDatafile < MiniTest::Test
29
44
  pp datafiles
30
45
  end
31
46
 
47
+
32
48
  def test_bundle
33
49
  datafiles = Datafile.find_clubs( CLUBS_DIR )
34
50
  pp datafiles
@@ -0,0 +1,27 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_package.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+
11
+ class TestPackage < MiniTest::Test
12
+
13
+ def test_read
14
+ [Datafile::DirPackage.new( '../../../openfootball/england' ),
15
+ Datafile::ZipPackage.new( 'tmp/england-master.zip' )].each do |eng|
16
+ assert eng.find( '2015-16/.conf.txt' ).read.start_with?( '= English Premier League 2015/16' )
17
+ assert eng.find( '2017-18/.conf.txt' ).read.start_with?( '= English Premier League 2017/18' )
18
+ assert eng.find( '2015-16/1-premierleague-i.txt' ).read.start_with?( '= English Premier League 2015/16' )
19
+ end
20
+
21
+ [Datafile::DirPackage.new( '../../../openfootball/austria' ),
22
+ Datafile::ZipPackage.new( 'tmp/austria-master.zip' )].each do |at|
23
+ assert at.find( '2018-19/.conf.txt' ).read.start_with?( '= Österr. Bundesliga 2018/19' )
24
+ end
25
+ end # method test_read
26
+
27
+ end # class TestPackage
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-formats
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: 0.0.1
69
+ - !ruby/object:Gem::Dependency
70
+ name: rubyzip
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: 1.2.4
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: 1.2.4
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: rdoc
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -111,6 +125,7 @@ files:
111
125
  - lib/sportdb/formats/datafile.rb
112
126
  - lib/sportdb/formats/goals.rb
113
127
  - lib/sportdb/formats/outline_reader.rb
128
+ - lib/sportdb/formats/package.rb
114
129
  - lib/sportdb/formats/scores.rb
115
130
  - lib/sportdb/formats/season_utils.rb
116
131
  - lib/sportdb/formats/version.rb
@@ -120,6 +135,7 @@ files:
120
135
  - test/test_datafile_match.rb
121
136
  - test/test_goals.rb
122
137
  - test/test_outline_reader.rb
138
+ - test/test_package.rb
123
139
  - test/test_scores.rb
124
140
  - test/test_season_utils.rb
125
141
  homepage: https://github.com/sportdb/sport.db