sportdb-readers 0.3.2 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fbf4efcbeb2e1af833862fa9654f7832552082d3
4
- data.tar.gz: 108ddbf3e0951307fc5ad1c3c36a9b11e00c16cb
3
+ metadata.gz: 2b15f6d3dbaef2a2d05eca174b86731092ba54b6
4
+ data.tar.gz: 265e5151c0e81d1beaf5b7cad751f4abfef484e5
5
5
  SHA512:
6
- metadata.gz: 720dcb7703b7687eeae81485ee95a702c9ac433d468ba9bba8ab04a3dc0417e97d9a72fc6b862fca4f5e9a29b228ab2310cc1f2a8cbddb215e168e06f6c1a818
7
- data.tar.gz: 193d88f88d0404499f5a2f18c5034fabec41d977a1864936edc46f26b3260552fa44028cd8134940acb4dc912a12a2f47aba8f84642c730ce2ebdd790cd04583
6
+ metadata.gz: b943c21d84d36738e562e68a73f2a5de58e7ec693361ad74a96aa04d9ed0f9b5201c876cea62fae0d8daa532f090a6556ade2f78abfac3d12282c987f6710ef5
7
+ data.tar.gz: 66ee60adbd4ed8bfc34718d3520b24b5840e11193ffeb9f0fab83bc1fec9e7947febc0bc09bfa06123df0b310d1435d74310f5e9a422d3b0a4d0c25f689a5da2
data/Manifest.txt CHANGED
@@ -11,6 +11,7 @@ lib/sportdb/readers/league_outline_reader.rb
11
11
  lib/sportdb/readers/match_linter.rb
12
12
  lib/sportdb/readers/match_parser.rb
13
13
  lib/sportdb/readers/match_reader.rb
14
+ lib/sportdb/readers/package.rb
14
15
  lib/sportdb/readers/version.rb
15
16
  test/helper.rb
16
17
  test/test_match_parser.rb
@@ -8,15 +8,13 @@ module Datafile
8
8
  CONF_RE = %r{ /\.conf\.txt$
9
9
  }x
10
10
 
11
- MATCH_RE = %r{ /\d{4}-\d{2} ## season folder e.g. /2019-20
12
- /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
13
- }x
14
11
 
15
- CLUBS_PROPS_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
12
+ CLUB_PROPS_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
16
13
  (?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.props.txt
17
14
  clubs\.props\.txt$
18
15
  }x
19
- def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end
16
+ def self.match_club_props( path, pattern: CLUB_PROPS_RE ) pattern.match( path ); end
17
+
20
18
 
21
19
  ZIP_RE = %r{ \.zip$
22
20
  }x
@@ -24,124 +22,73 @@ module Datafile
24
22
 
25
23
 
26
24
 
27
- class PackageBase
28
-
29
- ## note: "abstract" methods - each and read required in derived class !!!!
30
-
31
- def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
32
- def each_match( &blk ) each( pattern: MATCH_RE, &blk ); end
33
-
34
-
35
-
36
- def read_clubs_props
37
- each_read( pattern: CLUBS_PROPS_RE ) do |name, txt|
38
- ## todo/fix: add/use SportDb.parse_club_props helper !!!!!!
39
- SportDb::Import::ClubPropsReader.parse( txt )
40
- end
41
- end
42
-
43
- def read_conf( *names,
44
- season: nil, sync: true )
45
- if names.empty? ## no (entry) names passed in; read in all
46
- each_read( pattern: CONF_RE ) do |name, txt|
47
- SportDb.parse_conf( txt, season: season, sync: sync )
48
- end
49
- else
50
- names.each do |name|
51
- txt = read_entry( name )
52
- SportDb.parse_conf( txt, season: season, sync: sync )
53
- end
54
- end
55
- end
56
-
57
- def read_match( *names,
58
- season: nil, sync: true )
59
- if names.empty? ## no (entry) names passed in; read in all
60
- each_read( pattern: MATCH_RE ) do |name, txt|
61
- SportDb.parse_match( txt, season: season, sync: sync )
62
- end
63
- else
64
- names.each do |name|
65
- txt = read_entry( name )
66
- SportDb.parse_match( txt, season: season, sync: sync )
67
- end
68
- end
69
- end
70
-
71
-
72
- def read( *names,
73
- season: nil, sync: true )
74
- if names.empty? ## read all datafiles
75
- read_clubs_props() if sync
76
- read_conf( season: season, sync: sync )
77
- read_match( season: season, sync: sync )
78
- else
79
- names.each do |name|
80
- txt = read_entry( name )
81
- ## fix/todo: add read_clubs_props too!!!
82
- if Datafile.match_conf( name ) ## check if datafile matches conf(iguration) naming (e.g. .conf.txt)
83
- SportDb.parse_conf( txt, season: season, sync: sync )
84
- else ## assume "regular" match datafile
85
- SportDb.parse_match( txt, season: season, sync: sync )
86
- end
87
- end
88
- end
25
+ class DirPackage ## todo/check: find a better name e.g. UnzippedPackage, FilesystemPackage, etc. - why? why not?
26
+ class Entry
27
+ def initialize( pack, path )
28
+ @pack = pack ## parent package
29
+ @path = path
30
+ ## todo/fix!!!!: calculate @name (cut-off pack.path!!!)
31
+ @name = path
89
32
  end
90
- end # class PackageBase
91
-
92
-
33
+ def name() @name; end
34
+ def read() File.open( @path, 'r:utf-8' ).read; end
35
+ end # class DirPackage::Entry
93
36
 
94
37
 
95
- class DirPackage < PackageBase ## todo/check: find a better name e.g. UnzippedPackage, FilesystemPackage, etc. - why? why not?
38
+ attr_reader :name, :path
96
39
 
97
40
  def initialize( path )
98
41
  @path = path ## rename to root_path or base_path or somehting - why? why not?
99
- end
100
42
 
43
+ basename = File.basename( path ) ## note: ALWAYS keeps "extension"-like name if present (e.g. ./austria.zip => austria.zip)
44
+ @name = basename
45
+ end
101
46
 
102
- def each_file( pattern: ) ## todo/check: rename to glob or something - why? why not?
47
+ def each( pattern:, extension: 'txt' ) ## todo/check: rename to glob or something - why? why not?
48
+ ## use just .* for extension or remove and check if File.file? and skip File.directory? - why? why not?
103
49
  ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
104
- Dir.glob( "#{@path}/**/{*,.*}.txt" ).each do |path|
50
+ Dir.glob( "#{@path}/**/{*,.*}.#{extension}" ).each do |path|
105
51
  ## todo/fix: (auto) skip and check for directories
106
52
  if pattern.match( path )
107
- yield( path )
53
+ yield( Entry.new( self, path ))
108
54
  else
109
55
  ## puts " skipping >#{path}<"
110
56
  end
111
57
  end
112
58
  end
113
59
 
60
+ def find( name )
61
+ Entry.new( self, "#{@path}/#{name}" )
62
+ end
63
+ end # class DirPackage
114
64
 
115
- Entry = Struct.new( :name )
116
65
 
117
- def each( pattern: ) ## todo/check: rename to each_entry - why? why not?
118
- each_file( pattern: pattern ) do |path|
119
- ## fix: split path like a "virtual" zip like entry
120
- yield( Entry.new( path ) )
121
- end
66
+ ## helper wrapper for datafiles in zips
67
+ class ZipPackage
68
+ class Entry
69
+ def initialize( pack, entry )
70
+ @pack = pack
71
+ @entry = entry
122
72
  end
123
73
 
124
- def read_entry( name )
125
- txt = File.open( "#{@path}/#{name}", 'r:utf-8').read
74
+ def name() @entry.name; end
75
+ def read
76
+ txt = @entry.get_input_stream.read
77
+ ## puts "** encoding: #{txt.encoding}" #=> encoding: ASCII-8BIT
78
+ txt = txt.force_encoding( Encoding::UTF_8 )
126
79
  txt
127
80
  end
81
+ end # class ZipPackage::Entry
128
82
 
129
- def each_read( pattern: )
130
- each_file( pattern: pattern ) do |path|
131
- txt = File.open( path, 'r:utf-8').read
132
- yield( path, txt ) ## only pass along txt - why? why not? or pass along entry and not just entry.name?
133
- end
134
- end
135
- end # class DirPackage
136
-
137
-
83
+ attr_reader :name, :path
138
84
 
139
- ## helper wrapper for datafiles in zips
140
- class ZipPackage < PackageBase
141
85
  def initialize( path )
142
86
  @path = path
143
- end
144
87
 
88
+ extname = File.extname( path ) ## todo/check: double check if extension is .zip - why? why not?
89
+ basename = File.basename( path, extname )
90
+ @name = basename
91
+ end
145
92
 
146
93
  def each( pattern: )
147
94
  Zip::File.open( @path ) do |zipfile|
@@ -150,21 +97,35 @@ class ZipPackage < PackageBase
150
97
  next ## skip
151
98
  elsif entry.file?
152
99
  if pattern.match( entry.name )
153
- yield( entry )
100
+ yield( Entry.new( self, entry ) ) # wrap entry in uniform access interface / api
154
101
  else
155
102
  ## puts " skipping >#{entry.name}<"
156
103
  end
157
104
  else
158
- puts "** !! ERROR !! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
105
+ puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
159
106
  exit 1
160
107
  end
161
108
  end
162
109
  end
163
110
  end
164
111
 
112
+ def find( name )
113
+ entries = match_entry( name )
114
+ if entries.empty?
115
+ puts "** !!! ERROR !!! zip entry >#{name}< not found in >#{@path}<; sorry"
116
+ exit 1
117
+ elsif entries.size > 1
118
+ puts "** !!! ERROR !!! ambigious zip entry >#{name}<; found #{entries.size} entries in >#{@path}<:"
119
+ pp entries
120
+ exit 1
121
+ else
122
+ Entry.new( self, entries[0] ) # wrap entry in uniform access interface / api
123
+ end
124
+ end
165
125
 
126
+ private
166
127
  def match_entry( name )
167
- ## todo/fix: use Zip::File.glob or find_entry or ? why? why not?
128
+ ## todo/fix: use Zip::File.glob or find_entry or something better/faster? why? why not?
168
129
 
169
130
  pattern = %r{ #{Regexp.escape( name )} ## match string if ends with name
170
131
  $
@@ -180,39 +141,12 @@ class ZipPackage < PackageBase
180
141
  entries << entry
181
142
  end
182
143
  else
183
- puts "** !! ERROR !! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
144
+ puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
184
145
  exit 1
185
146
  end
186
147
  end
187
148
  end
188
149
  entries
189
150
  end
190
-
191
- def read_entry( name )
192
- entries = match_entry( name )
193
- if entries.empty?
194
- puts "** !!! ERROR !!! zip entry >#{name}< not found in >#{@path}<; sorry"
195
- exit 1
196
- elsif entries.size > 1
197
- puts "** !!! ERROR !!! ambigious zip entry >#{name}<; found #{entries.size} entries in >#{@path}<:"
198
- pp entries
199
- exit 1
200
- else
201
- entry = entries[0]
202
- txt = entry.get_input_stream.read
203
- ## puts "** encoding: #{txt.encoding}" #=> encoding: ASCII-8BIT
204
- txt = txt.force_encoding( Encoding::UTF_8 )
205
- end
206
- end
207
-
208
-
209
- def each_read( pattern: )
210
- each( pattern: pattern ) do |entry|
211
- txt = entry.get_input_stream.read
212
- ## puts "** encoding: #{txt.encoding}" #=> encoding: ASCII-8BIT
213
- txt = txt.force_encoding( Encoding::UTF_8 )
214
- yield( "#{@path}!/#{entry.name}", txt ) ## only pass along txt - why? why not? or pass along entry and not just entry.name?
215
- end
216
- end
217
151
  end # class ZipPackage
218
152
  end # module Datafile
@@ -0,0 +1,90 @@
1
+
2
+ module SportDb
3
+ class Package
4
+
5
+ CONF_RE = Datafile::CONF_RE
6
+ CLUB_PROPS_RE = Datafile::CLUB_PROPS_RE
7
+
8
+ ## note: if pattern includes directory add here (otherwise move to more "generic" datafile) - why? why not?
9
+ MATCH_RE = %r{ /\d{4}-\d{2} ## season folder e.g. /2019-20
10
+ /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
11
+ }x
12
+
13
+ def initialize( path )
14
+ if !File.exist?( path ) ## file or directory
15
+ puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
16
+ exit 1
17
+ end
18
+
19
+ if File.directory?( path )
20
+ @pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package
21
+ elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip
22
+ @pack = Datafile::ZipPackage.new( path )
23
+ else
24
+ puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
25
+ exit 1
26
+ end
27
+ end
28
+
29
+ def each_conf( &blk ) @pack.each( pattern: CONF_RE, &blk ); end
30
+ def each_match( &blk ) @pack.each( pattern: MATCH_RE, &blk ); end
31
+ def each_club_props( &blk ) @pack.each( pattern: CLUB_PROPS_RE, &blk ); end
32
+
33
+
34
+ def read_club_props( sync: true )
35
+ each_club_props do |entry|
36
+ SportDb.parse_club_props( entry.read, sync: sync )
37
+ end
38
+ end
39
+
40
+ def read_conf( *names,
41
+ season: nil, sync: true )
42
+ if names.empty? ## no (entry) names passed in; read in all
43
+ each_conf do |entry|
44
+ SportDb.parse_conf( entry.read, season: season, sync: sync )
45
+ end
46
+ else
47
+ names.each do |name|
48
+ entry = @pack.find( name )
49
+ SportDb.parse_conf( entry.read, season: season, sync: sync )
50
+ end
51
+ end
52
+ end
53
+
54
+ def read_match( *names,
55
+ season: nil, sync: true )
56
+ if names.empty? ## no (entry) names passed in; read in all
57
+ each_match do |entry|
58
+ SportDb.parse_match( entry.read, season: season, sync: sync )
59
+ end
60
+ else
61
+ names.each do |name|
62
+ entry = @pack.find( name )
63
+ SportDb.parse_match( entry.read, season: season, sync: sync )
64
+ end
65
+ end
66
+ end
67
+
68
+
69
+ def read( *names,
70
+ season: nil, sync: true )
71
+ if names.empty? ## read all datafiles
72
+ read_club_props( sync: sync )
73
+ read_conf( season: season, sync: sync )
74
+ read_match( season: season, sync: sync )
75
+ else
76
+ names.each do |name|
77
+ entry = @pack.find( name )
78
+ ## fix/todo: add read_clubs_props too!!!
79
+ if Datafile.match_conf( name ) ## check if datafile matches conf(iguration) naming (e.g. .conf.txt)
80
+ SportDb.parse_conf( entry.read, season: season, sync: sync )
81
+ elsif Datafile.match_club_props( name )
82
+ SportDb.parse_club_props( entry.read, sync: sync )
83
+ else ## assume "regular" match datafile
84
+ SportDb.parse_match( entry.read, season: season, sync: sync )
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end # class Package
90
+ end # module SportDb
@@ -6,7 +6,7 @@ module Readers
6
6
 
7
7
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
8
8
  MINOR = 3
9
- PATCH = 2
9
+ PATCH = 3
10
10
  VERSION = [MAJOR,MINOR,PATCH].join('.')
11
11
 
12
12
  def self.version
@@ -1,6 +1,6 @@
1
1
  # encoding: utf-8
2
2
 
3
- require 'zip' ## todo/check: if zip is alreay included in a required module?
3
+ require 'zip' ## todo/check: if zip is alreay included in a required module - move into sportdb-formats and add datafile!!!
4
4
 
5
5
 
6
6
 
@@ -20,6 +20,8 @@ require 'sportdb/readers/match_reader'
20
20
  require 'sportdb/readers/match_linter'
21
21
  require 'sportdb/readers/club_props_reader'
22
22
  require 'sportdb/readers/datafile'
23
+ require 'sportdb/readers/package'
24
+
23
25
 
24
26
 
25
27
 
@@ -32,39 +34,46 @@ module SportDb
32
34
  sync ? ConfReaderV2.read( path, season: season )
33
35
  : ConfLinter.read( path, season: season )
34
36
  end
35
-
36
- def self.read_match( path, season: nil, sync: true ) ### todo/check: add alias read_matches - why? why not?
37
- sync ? MatchReaderV2.read( path, season: season )
38
- : MatchLinter.read( path, season: season )
39
- end
40
-
41
-
42
37
  def self.parse_conf( txt, season: nil, sync: true )
43
38
  sync ? ConfReaderV2.parse( txt, season: season )
44
39
  : ConfLinter.parse( txt, season: season )
45
40
  end
46
41
 
42
+ def self.read_match( path, season: nil, sync: true ) ### todo/check: add alias read_matches - why? why not?
43
+ sync ? MatchReaderV2.read( path, season: season )
44
+ : MatchLinter.read( path, season: season )
45
+ end
47
46
  def self.parse_match( txt, season: nil, sync: true ) ### todo/check: add alias read_matches - why? why not?
48
47
  sync ? MatchReaderV2.parse( txt, season: season )
49
48
  : MatchLinter.parse( txt, season: season )
50
49
  end
51
50
 
51
+ def self.read_club_props( path, sync: true )
52
+ ## note: for now run only if sync (e.g. run with db updates)
53
+ SportDb::Import::ClubPropsReader.read( path ) if sync
54
+ end
55
+ def self.parse_club_props( txt, sync: true )
56
+ ## note: for now run only if sync (e.g. run with db updates)
57
+ SportDb::Import::ClubPropsReader.parse( txt ) if sync
58
+ end
59
+
52
60
 
53
61
  def self.read( path, season: nil, sync: true )
54
62
  pack = if File.directory?( path ) ## if directory assume "unzipped" package
55
- Datafile::DirPackage.new( path )
56
- elsif Datafile.match_zip( path ) ## check if file is a .zip (archive) file
57
- Datafile::ZipPackage.new( path )
63
+ Package.new( path )
64
+ elsif File.file?( path ) && Datafile.match_zip( path ) ## check if file is a .zip (archive) file
65
+ Package.new( path )
58
66
  else ## no package; assume single (standalone) datafile
59
67
  nil
60
68
  end
61
69
 
62
70
  if pack
63
- pack.read_conf( season: season, sync: sync )
64
- pack.read_match( season: season, sync: sync )
71
+ pack.read( season: season, sync: sync )
65
72
  else
66
73
  if Datafile.match_conf( path ) ## check if datafile matches conf(iguration) naming (e.g. .conf.txt)
67
74
  read_conf( path, season: season, sync: sync )
75
+ elsif Datafile.match_club_props( path )
76
+ read_club_props( path, sync: sync )
68
77
  else ## assume "regular" match datafile
69
78
  read_match( path, season: season, sync: sync )
70
79
  end
@@ -72,6 +81,7 @@ module SportDb
72
81
  end # method read
73
82
 
74
83
 
84
+
75
85
  ## (more) convenience helpers for lint(ing)
76
86
  def self.lint( path, season: nil ) read( path, season: season, sync: false ); end
77
87
  def self.lint_conf( path, season: nil ) read_conf( path, season: season, sync: false ); end
data/test/test_package.rb CHANGED
@@ -11,15 +11,17 @@ require 'helper'
11
11
  class TestPackage < MiniTest::Test
12
12
 
13
13
  def test_read
14
- eng = Datafile::DirPackage.new( '../../../openfootball/england' )
15
- ## eng = Datafile::ZipPackage.new( 'tmp/england-master.zip' )
16
- assert eng.read_entry( '2015-16/.conf.txt' ).start_with?( '= English Premier League 2015/16' )
17
- assert eng.read_entry( '2017-18/.conf.txt' ).start_with?( '= English Premier League 2017/18' )
18
- assert eng.read_entry( '2015-16/1-premierleague-i.txt' ).start_with?( '= English Premier League 2015/16' )
19
-
20
- at = Datafile::DirPackage.new( '../../../openfootball/austria' )
21
- ## at = Datafile::ZipPackage.new( 'tmp/austria-master.zip' )
22
- assert at.read_entry( '2018-19/.conf.txt' ).start_with?( '= Österr. Bundesliga 2018/19' )
14
+ [Datafile::DirPackage.new( '../../../openfootball/england' ),
15
+ Datafile::ZipPackage.new( 'tmp/england-master.zip' )].each do |eng|
16
+ assert eng.find( '2015-16/.conf.txt' ).read.start_with?( '= English Premier League 2015/16' )
17
+ assert eng.find( '2017-18/.conf.txt' ).read.start_with?( '= English Premier League 2017/18' )
18
+ assert eng.find( '2015-16/1-premierleague-i.txt' ).read.start_with?( '= English Premier League 2015/16' )
19
+ end
20
+
21
+ [Datafile::DirPackage.new( '../../../openfootball/austria' ),
22
+ Datafile::ZipPackage.new( 'tmp/austria-master.zip' )].each do |at|
23
+ assert at.find( '2018-19/.conf.txt' ).read.start_with?( '= Österr. Bundesliga 2018/19' )
24
+ end
23
25
  end # method test_read
24
26
 
25
27
  end # class TestPackage
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-readers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-17 00:00:00.000000000 Z
11
+ date: 2019-11-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sportdb-config
@@ -103,6 +103,7 @@ files:
103
103
  - lib/sportdb/readers/match_linter.rb
104
104
  - lib/sportdb/readers/match_parser.rb
105
105
  - lib/sportdb/readers/match_reader.rb
106
+ - lib/sportdb/readers/package.rb
106
107
  - lib/sportdb/readers/version.rb
107
108
  - test/helper.rb
108
109
  - test/test_match_parser.rb