sportdb-formats 0.1.7 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +2 -0
- data/Rakefile +3 -1
- data/lib/sportdb/formats.rb +5 -0
- data/lib/sportdb/formats/datafile.rb +19 -15
- data/lib/sportdb/formats/package.rb +160 -0
- data/lib/sportdb/formats/version.rb +2 -2
- data/test/test_datafile.rb +17 -1
- data/test/test_package.rb +27 -0
- metadata +17 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7888347efffbc31760344406232e8f930d8777a0
|
4
|
+
data.tar.gz: 8cba378a58d47f5b4ddc34ee933df6bb19e74cc7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 54cf5468032c75452e8a595fcf2efb204edec7796478aeef21fdd571c0503eaf1e412c0b8e51e927cb5fab0e7ea363d286885bdd6e7c62f5b422f5f2a7c806b4
|
7
|
+
data.tar.gz: c56db501666da9225f930ed16d9bbdd16f81d968b5d25eea5f04b764bf5c3bed62a33ad6843595c2316102356d89071c324b13db7a3bb8a8e9171f3abf77a534
|
data/Manifest.txt
CHANGED
@@ -6,6 +6,7 @@ lib/sportdb/formats.rb
|
|
6
6
|
lib/sportdb/formats/datafile.rb
|
7
7
|
lib/sportdb/formats/goals.rb
|
8
8
|
lib/sportdb/formats/outline_reader.rb
|
9
|
+
lib/sportdb/formats/package.rb
|
9
10
|
lib/sportdb/formats/scores.rb
|
10
11
|
lib/sportdb/formats/season_utils.rb
|
11
12
|
lib/sportdb/formats/version.rb
|
@@ -15,5 +16,6 @@ test/test_datafile.rb
|
|
15
16
|
test/test_datafile_match.rb
|
16
17
|
test/test_goals.rb
|
17
18
|
test/test_outline_reader.rb
|
19
|
+
test/test_package.rb
|
18
20
|
test/test_scores.rb
|
19
21
|
test/test_season_utils.rb
|
data/Rakefile
CHANGED
@@ -24,9 +24,11 @@ Hoe.spec 'sportdb-formats' do
|
|
24
24
|
['date-formats', '>= 0.2.4'],
|
25
25
|
['csvreader', '>= 1.2.4'],
|
26
26
|
['sportdb-langs', '>= 0.0.1'],
|
27
|
+
|
28
|
+
['rubyzip', '>= 1.2.4' ],
|
27
29
|
]
|
28
30
|
|
29
31
|
self.spec_extras = {
|
30
|
-
|
32
|
+
required_ruby_version: '>= 2.2.2'
|
31
33
|
}
|
32
34
|
end
|
data/lib/sportdb/formats.rb
CHANGED
@@ -6,6 +6,10 @@ require 'alphabets' # downcase_i18n, unaccent, variants, ...
|
|
6
6
|
require 'date/formats' # DateFormats.parse, find!, ...
|
7
7
|
require 'csvreader'
|
8
8
|
|
9
|
+
require 'zip' ## todo/check: if zip is alreay included in a required module
|
10
|
+
|
11
|
+
|
12
|
+
|
9
13
|
def read_csv( path )
|
10
14
|
CsvHash.read( path, :header_converters => :symbol )
|
11
15
|
end
|
@@ -24,6 +28,7 @@ require 'sportdb/langs'
|
|
24
28
|
require 'sportdb/formats/version' # let version always go first
|
25
29
|
require 'sportdb/formats/outline_reader'
|
26
30
|
require 'sportdb/formats/datafile'
|
31
|
+
require 'sportdb/formats/package'
|
27
32
|
require 'sportdb/formats/season_utils'
|
28
33
|
|
29
34
|
|
@@ -19,40 +19,44 @@ module Datafile # note: keep Datafile in its own top-level module/namespace
|
|
19
19
|
end
|
20
20
|
|
21
21
|
|
22
|
-
|
23
|
-
|
24
|
-
(?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.txt
|
22
|
+
CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
23
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
|
25
24
|
clubs\.txt$
|
26
25
|
}x
|
27
26
|
|
28
|
-
|
27
|
+
CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
|
29
28
|
(?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
|
30
29
|
clubs\.wiki\.txt$
|
31
30
|
}x
|
32
31
|
|
33
|
-
|
34
|
-
|
32
|
+
CLUB_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
33
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
|
34
|
+
clubs\.props\.txt$
|
35
|
+
}x
|
35
36
|
|
36
|
-
def self.
|
37
|
-
def self.
|
37
|
+
def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
|
38
|
+
def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
|
38
39
|
|
40
|
+
def self.match_clubs( path ) CLUBS_RE.match( path ); end
|
41
|
+
def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
|
42
|
+
def self.match_club_props( path, pattern: CLUB_PROPS_RE ) pattern.match( path ); end
|
39
43
|
|
40
44
|
|
41
|
-
|
42
|
-
(?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
|
45
|
+
LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
46
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.wiki.txt
|
43
47
|
leagues\.txt$
|
44
48
|
}x
|
45
49
|
|
46
|
-
def self.find_leagues( path, pattern:
|
47
|
-
def self.match_leagues( path )
|
50
|
+
def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
|
51
|
+
def self.match_leagues( path ) LEAGUES_RE.match( path ); end
|
48
52
|
|
49
53
|
|
50
|
-
|
54
|
+
CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
51
55
|
\.conf\.txt$
|
52
56
|
}x
|
53
57
|
|
54
|
-
def self.find_conf( path, pattern:
|
55
|
-
def self.match_conf( path )
|
58
|
+
def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
|
59
|
+
def self.match_conf( path ) CONF_RE.match( path ); end
|
56
60
|
|
57
61
|
|
58
62
|
|
@@ -0,0 +1,160 @@
|
|
1
|
+
|
2
|
+
module Datafile # note: keep Datafile in its own top-level module/namespace for now - why? why not?
|
3
|
+
|
4
|
+
|
5
|
+
ZIP_RE = %r{ \.zip$
|
6
|
+
}x
|
7
|
+
def self.match_zip( path, pattern: ZIP_RE ) pattern.match( path ); end
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
## exclude pattern
|
12
|
+
## for now exclude all files in directories starting with a dot (e.g. .git/ or .github/ or .build/ etc.)
|
13
|
+
## todo/check: rename to EXCLUDE_DOT_DIRS_RE - why? why not?
|
14
|
+
EXCLUDE_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
15
|
+
\.[a-zA-Z0-9_-]+ ## (almost) any name BUT must start with dot e.g. .git, .build, etc.
|
16
|
+
/
|
17
|
+
}x
|
18
|
+
def self.match_exclude( path, pattern: EXCLUDE_RE ) pattern.match( path ); end
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
class Package; end ## use a shared base class for DirPackage, ZipPackage, etc.
|
23
|
+
|
24
|
+
class DirPackage < Package ## todo/check: find a better name e.g. UnzippedPackage, FilesystemPackage, etc. - why? why not?
|
25
|
+
class Entry
|
26
|
+
def initialize( pack, path )
|
27
|
+
@pack = pack ## parent package
|
28
|
+
@path = path
|
29
|
+
## todo/fix!!!!: calculate @name (cut-off pack.path!!!)
|
30
|
+
@name = path
|
31
|
+
end
|
32
|
+
def name() @name; end
|
33
|
+
def read() File.open( @path, 'r:utf-8' ).read; end
|
34
|
+
end # class DirPackage::Entry
|
35
|
+
|
36
|
+
|
37
|
+
attr_reader :name, :path
|
38
|
+
|
39
|
+
def initialize( path )
|
40
|
+
## todo/fix: expand_path ?! - why? why not? if you pass in ./ basename will be . and NOT directory name, for example!!!
|
41
|
+
@path = path ## rename to root_path or base_path or somehting - why? why not?
|
42
|
+
|
43
|
+
basename = File.basename( path ) ## note: ALWAYS keeps "extension"-like name if present (e.g. ./austria.zip => austria.zip)
|
44
|
+
@name = basename
|
45
|
+
end
|
46
|
+
|
47
|
+
def each( pattern:, extension: 'txt' ) ## todo/check: rename to glob or something - why? why not?
|
48
|
+
## use just .* for extension or remove and check if File.file? and skip File.directory? - why? why not?
|
49
|
+
## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
|
50
|
+
Dir.glob( "#{@path}/**/{*,.*}.#{extension}" ).each do |path|
|
51
|
+
## todo/fix: (auto) skip and check for directories
|
52
|
+
if EXCLUDE_RE.match( path )
|
53
|
+
## note: skip dot dirs (e.g. .build/, .git/, etc.)
|
54
|
+
elsif pattern.match( path )
|
55
|
+
yield( Entry.new( self, path ))
|
56
|
+
else
|
57
|
+
## puts " skipping >#{path}<"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def find( name )
|
63
|
+
Entry.new( self, "#{@path}/#{name}" )
|
64
|
+
end
|
65
|
+
end # class DirPackage
|
66
|
+
|
67
|
+
|
68
|
+
## helper wrapper for datafiles in zips
|
69
|
+
class ZipPackage < Package
|
70
|
+
class Entry
|
71
|
+
def initialize( pack, entry )
|
72
|
+
@pack = pack
|
73
|
+
@entry = entry
|
74
|
+
end
|
75
|
+
|
76
|
+
def name() @entry.name; end
|
77
|
+
def read
|
78
|
+
txt = @entry.get_input_stream.read
|
79
|
+
## puts "** encoding: #{txt.encoding}" #=> encoding: ASCII-8BIT
|
80
|
+
txt = txt.force_encoding( Encoding::UTF_8 )
|
81
|
+
txt
|
82
|
+
end
|
83
|
+
end # class ZipPackage::Entry
|
84
|
+
|
85
|
+
attr_reader :name, :path
|
86
|
+
|
87
|
+
def initialize( path )
|
88
|
+
@path = path
|
89
|
+
|
90
|
+
extname = File.extname( path ) ## todo/check: double check if extension is .zip - why? why not?
|
91
|
+
basename = File.basename( path, extname )
|
92
|
+
@name = basename
|
93
|
+
end
|
94
|
+
|
95
|
+
def each( pattern: )
|
96
|
+
Zip::File.open( @path ) do |zipfile|
|
97
|
+
zipfile.each do |entry|
|
98
|
+
if entry.directory?
|
99
|
+
next ## skip
|
100
|
+
elsif entry.file?
|
101
|
+
if EXCLUDE_RE.match( entry.name )
|
102
|
+
## note: skip dot dirs (e.g. .build/, .git/, etc.)
|
103
|
+
elsif pattern.match( entry.name )
|
104
|
+
yield( Entry.new( self, entry ) ) # wrap entry in uniform access interface / api
|
105
|
+
else
|
106
|
+
## puts " skipping >#{entry.name}<"
|
107
|
+
end
|
108
|
+
else
|
109
|
+
puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
|
110
|
+
exit 1
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def find( name )
|
117
|
+
entries = match_entry( name )
|
118
|
+
if entries.empty?
|
119
|
+
puts "** !!! ERROR !!! zip entry >#{name}< not found in >#{@path}<; sorry"
|
120
|
+
exit 1
|
121
|
+
elsif entries.size > 1
|
122
|
+
puts "** !!! ERROR !!! ambigious zip entry >#{name}<; found #{entries.size} entries in >#{@path}<:"
|
123
|
+
pp entries
|
124
|
+
exit 1
|
125
|
+
else
|
126
|
+
Entry.new( self, entries[0] ) # wrap entry in uniform access interface / api
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
private
|
131
|
+
def match_entry( name )
|
132
|
+
## todo/fix: use Zip::File.glob or find_entry or something better/faster? why? why not?
|
133
|
+
|
134
|
+
pattern = %r{ #{Regexp.escape( name )} ## match string if ends with name
|
135
|
+
$
|
136
|
+
}x
|
137
|
+
|
138
|
+
entries = []
|
139
|
+
Zip::File.open( @path ) do |zipfile|
|
140
|
+
zipfile.each do |entry|
|
141
|
+
if entry.directory?
|
142
|
+
next ## skip
|
143
|
+
elsif entry.file?
|
144
|
+
if EXCLUDE_RE.match( entry.name )
|
145
|
+
## note: skip dot dirs (e.g. .build/, .git/, etc.)
|
146
|
+
elsif pattern.match( entry.name )
|
147
|
+
entries << entry
|
148
|
+
else
|
149
|
+
## no match; skip too
|
150
|
+
end
|
151
|
+
else
|
152
|
+
puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
|
153
|
+
exit 1
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
entries
|
158
|
+
end
|
159
|
+
end # class ZipPackage
|
160
|
+
end # module Datafile
|
data/test/test_datafile.rb
CHANGED
@@ -10,11 +10,26 @@ require 'helper'
|
|
10
10
|
|
11
11
|
class TestDatafile < MiniTest::Test
|
12
12
|
|
13
|
+
def test_exclude
|
14
|
+
assert Datafile.match_exclude( '.build/' )
|
15
|
+
assert Datafile.match_exclude( '.git/' )
|
16
|
+
|
17
|
+
assert Datafile.match_exclude( '/.build/' )
|
18
|
+
assert Datafile.match_exclude( '/.git/' )
|
19
|
+
|
20
|
+
assert Datafile.match_exclude( '.build/leagues.txt' )
|
21
|
+
assert Datafile.match_exclude( '.git/leagues.txt' )
|
22
|
+
|
23
|
+
assert Datafile.match_exclude( '/.build/leagues.txt' )
|
24
|
+
assert Datafile.match_exclude( '/.git/leagues.txt' )
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
|
13
29
|
CLUBS_DIR = '../../../openfootball/clubs' ## source repo directory path
|
14
30
|
LEAGUES_DIR = '../../../openfootball/leagues'
|
15
31
|
AUSTRIA_DIR = '../../../openfootball/austria'
|
16
32
|
|
17
|
-
|
18
33
|
def test_find
|
19
34
|
datafiles = Datafile.find_clubs( CLUBS_DIR )
|
20
35
|
pp datafiles
|
@@ -29,6 +44,7 @@ class TestDatafile < MiniTest::Test
|
|
29
44
|
pp datafiles
|
30
45
|
end
|
31
46
|
|
47
|
+
|
32
48
|
def test_bundle
|
33
49
|
datafiles = Datafile.find_clubs( CLUBS_DIR )
|
34
50
|
pp datafiles
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_package.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
|
11
|
+
class TestPackage < MiniTest::Test
|
12
|
+
|
13
|
+
def test_read
|
14
|
+
[Datafile::DirPackage.new( '../../../openfootball/england' ),
|
15
|
+
Datafile::ZipPackage.new( 'tmp/england-master.zip' )].each do |eng|
|
16
|
+
assert eng.find( '2015-16/.conf.txt' ).read.start_with?( '= English Premier League 2015/16' )
|
17
|
+
assert eng.find( '2017-18/.conf.txt' ).read.start_with?( '= English Premier League 2017/18' )
|
18
|
+
assert eng.find( '2015-16/1-premierleague-i.txt' ).read.start_with?( '= English Premier League 2015/16' )
|
19
|
+
end
|
20
|
+
|
21
|
+
[Datafile::DirPackage.new( '../../../openfootball/austria' ),
|
22
|
+
Datafile::ZipPackage.new( 'tmp/austria-master.zip' )].each do |at|
|
23
|
+
assert at.find( '2018-19/.conf.txt' ).read.start_with?( '= Österr. Bundesliga 2018/19' )
|
24
|
+
end
|
25
|
+
end # method test_read
|
26
|
+
|
27
|
+
end # class TestPackage
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-formats
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: 0.0.1
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rubyzip
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 1.2.4
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 1.2.4
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: rdoc
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -111,6 +125,7 @@ files:
|
|
111
125
|
- lib/sportdb/formats/datafile.rb
|
112
126
|
- lib/sportdb/formats/goals.rb
|
113
127
|
- lib/sportdb/formats/outline_reader.rb
|
128
|
+
- lib/sportdb/formats/package.rb
|
114
129
|
- lib/sportdb/formats/scores.rb
|
115
130
|
- lib/sportdb/formats/season_utils.rb
|
116
131
|
- lib/sportdb/formats/version.rb
|
@@ -120,6 +135,7 @@ files:
|
|
120
135
|
- test/test_datafile_match.rb
|
121
136
|
- test/test_goals.rb
|
122
137
|
- test/test_outline_reader.rb
|
138
|
+
- test/test_package.rb
|
123
139
|
- test/test_scores.rb
|
124
140
|
- test/test_season_utils.rb
|
125
141
|
homepage: https://github.com/sportdb/sport.db
|