sportdb-readers 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fbf4efcbeb2e1af833862fa9654f7832552082d3
4
- data.tar.gz: 108ddbf3e0951307fc5ad1c3c36a9b11e00c16cb
3
+ metadata.gz: 2b15f6d3dbaef2a2d05eca174b86731092ba54b6
4
+ data.tar.gz: 265e5151c0e81d1beaf5b7cad751f4abfef484e5
5
5
  SHA512:
6
- metadata.gz: 720dcb7703b7687eeae81485ee95a702c9ac433d468ba9bba8ab04a3dc0417e97d9a72fc6b862fca4f5e9a29b228ab2310cc1f2a8cbddb215e168e06f6c1a818
7
- data.tar.gz: 193d88f88d0404499f5a2f18c5034fabec41d977a1864936edc46f26b3260552fa44028cd8134940acb4dc912a12a2f47aba8f84642c730ce2ebdd790cd04583
6
+ metadata.gz: b943c21d84d36738e562e68a73f2a5de58e7ec693361ad74a96aa04d9ed0f9b5201c876cea62fae0d8daa532f090a6556ade2f78abfac3d12282c987f6710ef5
7
+ data.tar.gz: 66ee60adbd4ed8bfc34718d3520b24b5840e11193ffeb9f0fab83bc1fec9e7947febc0bc09bfa06123df0b310d1435d74310f5e9a422d3b0a4d0c25f689a5da2
data/Manifest.txt CHANGED
@@ -11,6 +11,7 @@ lib/sportdb/readers/league_outline_reader.rb
11
11
  lib/sportdb/readers/match_linter.rb
12
12
  lib/sportdb/readers/match_parser.rb
13
13
  lib/sportdb/readers/match_reader.rb
14
+ lib/sportdb/readers/package.rb
14
15
  lib/sportdb/readers/version.rb
15
16
  test/helper.rb
16
17
  test/test_match_parser.rb
@@ -8,15 +8,13 @@ module Datafile
8
8
  CONF_RE = %r{ /\.conf\.txt$
9
9
  }x
10
10
 
11
- MATCH_RE = %r{ /\d{4}-\d{2} ## season folder e.g. /2019-20
12
- /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
13
- }x
14
11
 
15
- CLUBS_PROPS_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
12
+ CLUB_PROPS_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
16
13
  (?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.props.txt
17
14
  clubs\.props\.txt$
18
15
  }x
19
- def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end
16
+ def self.match_club_props( path, pattern: CLUB_PROPS_RE ) pattern.match( path ); end
17
+
20
18
 
21
19
  ZIP_RE = %r{ \.zip$
22
20
  }x
@@ -24,124 +22,73 @@ module Datafile
24
22
 
25
23
 
26
24
 
27
- class PackageBase
28
-
29
- ## note: "abstract" methods - each and read required in derived class !!!!
30
-
31
- def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
32
- def each_match( &blk ) each( pattern: MATCH_RE, &blk ); end
33
-
34
-
35
-
36
- def read_clubs_props
37
- each_read( pattern: CLUBS_PROPS_RE ) do |name, txt|
38
- ## todo/fix: add/use SportDb.parse_club_props helper !!!!!!
39
- SportDb::Import::ClubPropsReader.parse( txt )
40
- end
41
- end
42
-
43
- def read_conf( *names,
44
- season: nil, sync: true )
45
- if names.empty? ## no (entry) names passed in; read in all
46
- each_read( pattern: CONF_RE ) do |name, txt|
47
- SportDb.parse_conf( txt, season: season, sync: sync )
48
- end
49
- else
50
- names.each do |name|
51
- txt = read_entry( name )
52
- SportDb.parse_conf( txt, season: season, sync: sync )
53
- end
54
- end
55
- end
56
-
57
- def read_match( *names,
58
- season: nil, sync: true )
59
- if names.empty? ## no (entry) names passed in; read in all
60
- each_read( pattern: MATCH_RE ) do |name, txt|
61
- SportDb.parse_match( txt, season: season, sync: sync )
62
- end
63
- else
64
- names.each do |name|
65
- txt = read_entry( name )
66
- SportDb.parse_match( txt, season: season, sync: sync )
67
- end
68
- end
69
- end
70
-
71
-
72
- def read( *names,
73
- season: nil, sync: true )
74
- if names.empty? ## read all datafiles
75
- read_clubs_props() if sync
76
- read_conf( season: season, sync: sync )
77
- read_match( season: season, sync: sync )
78
- else
79
- names.each do |name|
80
- txt = read_entry( name )
81
- ## fix/todo: add read_clubs_props too!!!
82
- if Datafile.match_conf( name ) ## check if datafile matches conf(iguration) naming (e.g. .conf.txt)
83
- SportDb.parse_conf( txt, season: season, sync: sync )
84
- else ## assume "regular" match datafile
85
- SportDb.parse_match( txt, season: season, sync: sync )
86
- end
87
- end
88
- end
25
+ class DirPackage ## todo/check: find a better name e.g. UnzippedPackage, FilesystemPackage, etc. - why? why not?
26
+ class Entry
27
+ def initialize( pack, path )
28
+ @pack = pack ## parent package
29
+ @path = path
30
+ ## todo/fix!!!!: calculate @name (cut-off pack.path!!!)
31
+ @name = path
89
32
  end
90
- end # class PackageBase
91
-
92
-
33
+ def name() @name; end
34
+ def read() File.open( @path, 'r:utf-8' ).read; end
35
+ end # class DirPackage::Entry
93
36
 
94
37
 
95
- class DirPackage < PackageBase ## todo/check: find a better name e.g. UnzippedPackage, FilesystemPackage, etc. - why? why not?
38
+ attr_reader :name, :path
96
39
 
97
40
  def initialize( path )
98
41
  @path = path ## rename to root_path or base_path or somehting - why? why not?
99
- end
100
42
 
43
+ basename = File.basename( path ) ## note: ALWAYS keeps "extension"-like name if present (e.g. ./austria.zip => austria.zip)
44
+ @name = basename
45
+ end
101
46
 
102
- def each_file( pattern: ) ## todo/check: rename to glob or something - why? why not?
47
+ def each( pattern:, extension: 'txt' ) ## todo/check: rename to glob or something - why? why not?
48
+ ## use just .* for extension or remove and check if File.file? and skip File.directory? - why? why not?
103
49
  ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
104
- Dir.glob( "#{@path}/**/{*,.*}.txt" ).each do |path|
50
+ Dir.glob( "#{@path}/**/{*,.*}.#{extension}" ).each do |path|
105
51
  ## todo/fix: (auto) skip and check for directories
106
52
  if pattern.match( path )
107
- yield( path )
53
+ yield( Entry.new( self, path ))
108
54
  else
109
55
  ## puts " skipping >#{path}<"
110
56
  end
111
57
  end
112
58
  end
113
59
 
60
+ def find( name )
61
+ Entry.new( self, "#{@path}/#{name}" )
62
+ end
63
+ end # class DirPackage
114
64
 
115
- Entry = Struct.new( :name )
116
65
 
117
- def each( pattern: ) ## todo/check: rename to each_entry - why? why not?
118
- each_file( pattern: pattern ) do |path|
119
- ## fix: split path like a "virtual" zip like entry
120
- yield( Entry.new( path ) )
121
- end
66
+ ## helper wrapper for datafiles in zips
67
+ class ZipPackage
68
+ class Entry
69
+ def initialize( pack, entry )
70
+ @pack = pack
71
+ @entry = entry
122
72
  end
123
73
 
124
- def read_entry( name )
125
- txt = File.open( "#{@path}/#{name}", 'r:utf-8').read
74
+ def name() @entry.name; end
75
+ def read
76
+ txt = @entry.get_input_stream.read
77
+ ## puts "** encoding: #{txt.encoding}" #=> encoding: ASCII-8BIT
78
+ txt = txt.force_encoding( Encoding::UTF_8 )
126
79
  txt
127
80
  end
81
+ end # class ZipPackage::Entry
128
82
 
129
- def each_read( pattern: )
130
- each_file( pattern: pattern ) do |path|
131
- txt = File.open( path, 'r:utf-8').read
132
- yield( path, txt ) ## only pass along txt - why? why not? or pass along entry and not just entry.name?
133
- end
134
- end
135
- end # class DirPackage
136
-
137
-
83
+ attr_reader :name, :path
138
84
 
139
- ## helper wrapper for datafiles in zips
140
- class ZipPackage < PackageBase
141
85
  def initialize( path )
142
86
  @path = path
143
- end
144
87
 
88
+ extname = File.extname( path ) ## todo/check: double check if extension is .zip - why? why not?
89
+ basename = File.basename( path, extname )
90
+ @name = basename
91
+ end
145
92
 
146
93
  def each( pattern: )
147
94
  Zip::File.open( @path ) do |zipfile|
@@ -150,21 +97,35 @@ class ZipPackage < PackageBase
150
97
  next ## skip
151
98
  elsif entry.file?
152
99
  if pattern.match( entry.name )
153
- yield( entry )
100
+ yield( Entry.new( self, entry ) ) # wrap entry in uniform access interface / api
154
101
  else
155
102
  ## puts " skipping >#{entry.name}<"
156
103
  end
157
104
  else
158
- puts "** !! ERROR !! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
105
+ puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
159
106
  exit 1
160
107
  end
161
108
  end
162
109
  end
163
110
  end
164
111
 
112
+ def find( name )
113
+ entries = match_entry( name )
114
+ if entries.empty?
115
+ puts "** !!! ERROR !!! zip entry >#{name}< not found in >#{@path}<; sorry"
116
+ exit 1
117
+ elsif entries.size > 1
118
+ puts "** !!! ERROR !!! ambigious zip entry >#{name}<; found #{entries.size} entries in >#{@path}<:"
119
+ pp entries
120
+ exit 1
121
+ else
122
+ Entry.new( self, entries[0] ) # wrap entry in uniform access interface / api
123
+ end
124
+ end
165
125
 
126
+ private
166
127
  def match_entry( name )
167
- ## todo/fix: use Zip::File.glob or find_entry or ? why? why not?
128
+ ## todo/fix: use Zip::File.glob or find_entry or something better/faster? why? why not?
168
129
 
169
130
  pattern = %r{ #{Regexp.escape( name )} ## match string if ends with name
170
131
  $
@@ -180,39 +141,12 @@ class ZipPackage < PackageBase
180
141
  entries << entry
181
142
  end
182
143
  else
183
- puts "** !! ERROR !! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
144
+ puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
184
145
  exit 1
185
146
  end
186
147
  end
187
148
  end
188
149
  entries
189
150
  end
190
-
191
- def read_entry( name )
192
- entries = match_entry( name )
193
- if entries.empty?
194
- puts "** !!! ERROR !!! zip entry >#{name}< not found in >#{@path}<; sorry"
195
- exit 1
196
- elsif entries.size > 1
197
- puts "** !!! ERROR !!! ambigious zip entry >#{name}<; found #{entries.size} entries in >#{@path}<:"
198
- pp entries
199
- exit 1
200
- else
201
- entry = entries[0]
202
- txt = entry.get_input_stream.read
203
- ## puts "** encoding: #{txt.encoding}" #=> encoding: ASCII-8BIT
204
- txt = txt.force_encoding( Encoding::UTF_8 )
205
- end
206
- end
207
-
208
-
209
- def each_read( pattern: )
210
- each( pattern: pattern ) do |entry|
211
- txt = entry.get_input_stream.read
212
- ## puts "** encoding: #{txt.encoding}" #=> encoding: ASCII-8BIT
213
- txt = txt.force_encoding( Encoding::UTF_8 )
214
- yield( "#{@path}!/#{entry.name}", txt ) ## only pass along txt - why? why not? or pass along entry and not just entry.name?
215
- end
216
- end
217
151
  end # class ZipPackage
218
152
  end # module Datafile
@@ -0,0 +1,90 @@
1
+
2
+ module SportDb
3
+ class Package
4
+
5
+ CONF_RE = Datafile::CONF_RE
6
+ CLUB_PROPS_RE = Datafile::CLUB_PROPS_RE
7
+
8
+ ## note: if pattern includes directory add here (otherwise move to more "generic" datafile) - why? why not?
9
+ MATCH_RE = %r{ /\d{4}-\d{2} ## season folder e.g. /2019-20
10
+ /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
11
+ }x
12
+
13
+ def initialize( path )
14
+ if !File.exist?( path ) ## file or directory
15
+ puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
16
+ exit 1
17
+ end
18
+
19
+ if File.directory?( path )
20
+ @pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package
21
+ elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip
22
+ @pack = Datafile::ZipPackage.new( path )
23
+ else
24
+ puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
25
+ exit 1
26
+ end
27
+ end
28
+
29
+ def each_conf( &blk ) @pack.each( pattern: CONF_RE, &blk ); end
30
+ def each_match( &blk ) @pack.each( pattern: MATCH_RE, &blk ); end
31
+ def each_club_props( &blk ) @pack.each( pattern: CLUB_PROPS_RE, &blk ); end
32
+
33
+
34
+ def read_club_props( sync: true )
35
+ each_club_props do |entry|
36
+ SportDb.parse_club_props( entry.read, sync: sync )
37
+ end
38
+ end
39
+
40
+ def read_conf( *names,
41
+ season: nil, sync: true )
42
+ if names.empty? ## no (entry) names passed in; read in all
43
+ each_conf do |entry|
44
+ SportDb.parse_conf( entry.read, season: season, sync: sync )
45
+ end
46
+ else
47
+ names.each do |name|
48
+ entry = @pack.find( name )
49
+ SportDb.parse_conf( entry.read, season: season, sync: sync )
50
+ end
51
+ end
52
+ end
53
+
54
+ def read_match( *names,
55
+ season: nil, sync: true )
56
+ if names.empty? ## no (entry) names passed in; read in all
57
+ each_match do |entry|
58
+ SportDb.parse_match( entry.read, season: season, sync: sync )
59
+ end
60
+ else
61
+ names.each do |name|
62
+ entry = @pack.find( name )
63
+ SportDb.parse_match( entry.read, season: season, sync: sync )
64
+ end
65
+ end
66
+ end
67
+
68
+
69
+ def read( *names,
70
+ season: nil, sync: true )
71
+ if names.empty? ## read all datafiles
72
+ read_club_props( sync: sync )
73
+ read_conf( season: season, sync: sync )
74
+ read_match( season: season, sync: sync )
75
+ else
76
+ names.each do |name|
77
+ entry = @pack.find( name )
78
+ ## fix/todo: add read_clubs_props too!!!
79
+ if Datafile.match_conf( name ) ## check if datafile matches conf(iguration) naming (e.g. .conf.txt)
80
+ SportDb.parse_conf( entry.read, season: season, sync: sync )
81
+ elsif Datafile.match_club_props( name )
82
+ SportDb.parse_club_props( entry.read, sync: sync )
83
+ else ## assume "regular" match datafile
84
+ SportDb.parse_match( entry.read, season: season, sync: sync )
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end # class Package
90
+ end # module SportDb
@@ -6,7 +6,7 @@ module Readers
6
6
 
7
7
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
8
8
  MINOR = 3
9
- PATCH = 2
9
+ PATCH = 3
10
10
  VERSION = [MAJOR,MINOR,PATCH].join('.')
11
11
 
12
12
  def self.version
@@ -1,6 +1,6 @@
1
1
  # encoding: utf-8
2
2
 
3
- require 'zip' ## todo/check: if zip is alreay included in a required module?
3
+ require 'zip' ## todo/check: if zip is alreay included in a required module - move into sportdb-formats and add datafile!!!
4
4
 
5
5
 
6
6
 
@@ -20,6 +20,8 @@ require 'sportdb/readers/match_reader'
20
20
  require 'sportdb/readers/match_linter'
21
21
  require 'sportdb/readers/club_props_reader'
22
22
  require 'sportdb/readers/datafile'
23
+ require 'sportdb/readers/package'
24
+
23
25
 
24
26
 
25
27
 
@@ -32,39 +34,46 @@ module SportDb
32
34
  sync ? ConfReaderV2.read( path, season: season )
33
35
  : ConfLinter.read( path, season: season )
34
36
  end
35
-
36
- def self.read_match( path, season: nil, sync: true ) ### todo/check: add alias read_matches - why? why not?
37
- sync ? MatchReaderV2.read( path, season: season )
38
- : MatchLinter.read( path, season: season )
39
- end
40
-
41
-
42
37
  def self.parse_conf( txt, season: nil, sync: true )
43
38
  sync ? ConfReaderV2.parse( txt, season: season )
44
39
  : ConfLinter.parse( txt, season: season )
45
40
  end
46
41
 
42
+ def self.read_match( path, season: nil, sync: true ) ### todo/check: add alias read_matches - why? why not?
43
+ sync ? MatchReaderV2.read( path, season: season )
44
+ : MatchLinter.read( path, season: season )
45
+ end
47
46
  def self.parse_match( txt, season: nil, sync: true ) ### todo/check: add alias read_matches - why? why not?
48
47
  sync ? MatchReaderV2.parse( txt, season: season )
49
48
  : MatchLinter.parse( txt, season: season )
50
49
  end
51
50
 
51
+ def self.read_club_props( path, sync: true )
52
+ ## note: for now run only if sync (e.g. run with db updates)
53
+ SportDb::Import::ClubPropsReader.read( path ) if sync
54
+ end
55
+ def self.parse_club_props( txt, sync: true )
56
+ ## note: for now run only if sync (e.g. run with db updates)
57
+ SportDb::Import::ClubPropsReader.parse( txt ) if sync
58
+ end
59
+
52
60
 
53
61
  def self.read( path, season: nil, sync: true )
54
62
  pack = if File.directory?( path ) ## if directory assume "unzipped" package
55
- Datafile::DirPackage.new( path )
56
- elsif Datafile.match_zip( path ) ## check if file is a .zip (archive) file
57
- Datafile::ZipPackage.new( path )
63
+ Package.new( path )
64
+ elsif File.file?( path ) && Datafile.match_zip( path ) ## check if file is a .zip (archive) file
65
+ Package.new( path )
58
66
  else ## no package; assume single (standalone) datafile
59
67
  nil
60
68
  end
61
69
 
62
70
  if pack
63
- pack.read_conf( season: season, sync: sync )
64
- pack.read_match( season: season, sync: sync )
71
+ pack.read( season: season, sync: sync )
65
72
  else
66
73
  if Datafile.match_conf( path ) ## check if datafile matches conf(iguration) naming (e.g. .conf.txt)
67
74
  read_conf( path, season: season, sync: sync )
75
+ elsif Datafile.match_club_props( path )
76
+ read_club_props( path, sync: sync )
68
77
  else ## assume "regular" match datafile
69
78
  read_match( path, season: season, sync: sync )
70
79
  end
@@ -72,6 +81,7 @@ module SportDb
72
81
  end # method read
73
82
 
74
83
 
84
+
75
85
  ## (more) convenience helpers for lint(ing)
76
86
  def self.lint( path, season: nil ) read( path, season: season, sync: false ); end
77
87
  def self.lint_conf( path, season: nil ) read_conf( path, season: season, sync: false ); end
data/test/test_package.rb CHANGED
@@ -11,15 +11,17 @@ require 'helper'
11
11
  class TestPackage < MiniTest::Test
12
12
 
13
13
  def test_read
14
- eng = Datafile::DirPackage.new( '../../../openfootball/england' )
15
- ## eng = Datafile::ZipPackage.new( 'tmp/england-master.zip' )
16
- assert eng.read_entry( '2015-16/.conf.txt' ).start_with?( '= English Premier League 2015/16' )
17
- assert eng.read_entry( '2017-18/.conf.txt' ).start_with?( '= English Premier League 2017/18' )
18
- assert eng.read_entry( '2015-16/1-premierleague-i.txt' ).start_with?( '= English Premier League 2015/16' )
19
-
20
- at = Datafile::DirPackage.new( '../../../openfootball/austria' )
21
- ## at = Datafile::ZipPackage.new( 'tmp/austria-master.zip' )
22
- assert at.read_entry( '2018-19/.conf.txt' ).start_with?( '= Österr. Bundesliga 2018/19' )
14
+ [Datafile::DirPackage.new( '../../../openfootball/england' ),
15
+ Datafile::ZipPackage.new( 'tmp/england-master.zip' )].each do |eng|
16
+ assert eng.find( '2015-16/.conf.txt' ).read.start_with?( '= English Premier League 2015/16' )
17
+ assert eng.find( '2017-18/.conf.txt' ).read.start_with?( '= English Premier League 2017/18' )
18
+ assert eng.find( '2015-16/1-premierleague-i.txt' ).read.start_with?( '= English Premier League 2015/16' )
19
+ end
20
+
21
+ [Datafile::DirPackage.new( '../../../openfootball/austria' ),
22
+ Datafile::ZipPackage.new( 'tmp/austria-master.zip' )].each do |at|
23
+ assert at.find( '2018-19/.conf.txt' ).read.start_with?( '= Österr. Bundesliga 2018/19' )
24
+ end
23
25
  end # method test_read
24
26
 
25
27
  end # class TestPackage
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-readers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-17 00:00:00.000000000 Z
11
+ date: 2019-11-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sportdb-config
@@ -103,6 +103,7 @@ files:
103
103
  - lib/sportdb/readers/match_linter.rb
104
104
  - lib/sportdb/readers/match_parser.rb
105
105
  - lib/sportdb/readers/match_reader.rb
106
+ - lib/sportdb/readers/package.rb
106
107
  - lib/sportdb/readers/version.rb
107
108
  - test/helper.rb
108
109
  - test/test_match_parser.rb