sportdb-formats 0.1.7 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +2 -0
- data/Rakefile +3 -1
- data/lib/sportdb/formats.rb +5 -0
- data/lib/sportdb/formats/datafile.rb +19 -15
- data/lib/sportdb/formats/package.rb +160 -0
- data/lib/sportdb/formats/version.rb +2 -2
- data/test/test_datafile.rb +17 -1
- data/test/test_package.rb +27 -0
- metadata +17 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7888347efffbc31760344406232e8f930d8777a0
|
4
|
+
data.tar.gz: 8cba378a58d47f5b4ddc34ee933df6bb19e74cc7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 54cf5468032c75452e8a595fcf2efb204edec7796478aeef21fdd571c0503eaf1e412c0b8e51e927cb5fab0e7ea363d286885bdd6e7c62f5b422f5f2a7c806b4
|
7
|
+
data.tar.gz: c56db501666da9225f930ed16d9bbdd16f81d968b5d25eea5f04b764bf5c3bed62a33ad6843595c2316102356d89071c324b13db7a3bb8a8e9171f3abf77a534
|
data/Manifest.txt
CHANGED
@@ -6,6 +6,7 @@ lib/sportdb/formats.rb
|
|
6
6
|
lib/sportdb/formats/datafile.rb
|
7
7
|
lib/sportdb/formats/goals.rb
|
8
8
|
lib/sportdb/formats/outline_reader.rb
|
9
|
+
lib/sportdb/formats/package.rb
|
9
10
|
lib/sportdb/formats/scores.rb
|
10
11
|
lib/sportdb/formats/season_utils.rb
|
11
12
|
lib/sportdb/formats/version.rb
|
@@ -15,5 +16,6 @@ test/test_datafile.rb
|
|
15
16
|
test/test_datafile_match.rb
|
16
17
|
test/test_goals.rb
|
17
18
|
test/test_outline_reader.rb
|
19
|
+
test/test_package.rb
|
18
20
|
test/test_scores.rb
|
19
21
|
test/test_season_utils.rb
|
data/Rakefile
CHANGED
@@ -24,9 +24,11 @@ Hoe.spec 'sportdb-formats' do
|
|
24
24
|
['date-formats', '>= 0.2.4'],
|
25
25
|
['csvreader', '>= 1.2.4'],
|
26
26
|
['sportdb-langs', '>= 0.0.1'],
|
27
|
+
|
28
|
+
['rubyzip', '>= 1.2.4' ],
|
27
29
|
]
|
28
30
|
|
29
31
|
self.spec_extras = {
|
30
|
-
|
32
|
+
required_ruby_version: '>= 2.2.2'
|
31
33
|
}
|
32
34
|
end
|
data/lib/sportdb/formats.rb
CHANGED
@@ -6,6 +6,10 @@ require 'alphabets' # downcase_i18n, unaccent, variants, ...
|
|
6
6
|
require 'date/formats' # DateFormats.parse, find!, ...
|
7
7
|
require 'csvreader'
|
8
8
|
|
9
|
+
require 'zip' ## todo/check: if zip is alreay included in a required module
|
10
|
+
|
11
|
+
|
12
|
+
|
9
13
|
def read_csv( path )
|
10
14
|
CsvHash.read( path, :header_converters => :symbol )
|
11
15
|
end
|
@@ -24,6 +28,7 @@ require 'sportdb/langs'
|
|
24
28
|
require 'sportdb/formats/version' # let version always go first
|
25
29
|
require 'sportdb/formats/outline_reader'
|
26
30
|
require 'sportdb/formats/datafile'
|
31
|
+
require 'sportdb/formats/package'
|
27
32
|
require 'sportdb/formats/season_utils'
|
28
33
|
|
29
34
|
|
@@ -19,40 +19,44 @@ module Datafile # note: keep Datafile in its own top-level module/namespace
|
|
19
19
|
end
|
20
20
|
|
21
21
|
|
22
|
-
|
23
|
-
|
24
|
-
(?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.txt
|
22
|
+
CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
23
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
|
25
24
|
clubs\.txt$
|
26
25
|
}x
|
27
26
|
|
28
|
-
|
27
|
+
CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
|
29
28
|
(?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
|
30
29
|
clubs\.wiki\.txt$
|
31
30
|
}x
|
32
31
|
|
33
|
-
|
34
|
-
|
32
|
+
CLUB_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
33
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
|
34
|
+
clubs\.props\.txt$
|
35
|
+
}x
|
35
36
|
|
36
|
-
def self.
|
37
|
-
def self.
|
37
|
+
def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
|
38
|
+
def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
|
38
39
|
|
40
|
+
def self.match_clubs( path ) CLUBS_RE.match( path ); end
|
41
|
+
def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
|
42
|
+
def self.match_club_props( path, pattern: CLUB_PROPS_RE ) pattern.match( path ); end
|
39
43
|
|
40
44
|
|
41
|
-
|
42
|
-
(?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
|
45
|
+
LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
46
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.wiki.txt
|
43
47
|
leagues\.txt$
|
44
48
|
}x
|
45
49
|
|
46
|
-
def self.find_leagues( path, pattern:
|
47
|
-
def self.match_leagues( path )
|
50
|
+
def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
|
51
|
+
def self.match_leagues( path ) LEAGUES_RE.match( path ); end
|
48
52
|
|
49
53
|
|
50
|
-
|
54
|
+
CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
51
55
|
\.conf\.txt$
|
52
56
|
}x
|
53
57
|
|
54
|
-
def self.find_conf( path, pattern:
|
55
|
-
def self.match_conf( path )
|
58
|
+
def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
|
59
|
+
def self.match_conf( path ) CONF_RE.match( path ); end
|
56
60
|
|
57
61
|
|
58
62
|
|
@@ -0,0 +1,160 @@
|
|
1
|
+
|
2
|
+
module Datafile # note: keep Datafile in its own top-level module/namespace for now - why? why not?
|
3
|
+
|
4
|
+
|
5
|
+
ZIP_RE = %r{ \.zip$
|
6
|
+
}x
|
7
|
+
def self.match_zip( path, pattern: ZIP_RE ) pattern.match( path ); end
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
## exclude pattern
|
12
|
+
## for now exclude all files in directories starting with a dot (e.g. .git/ or .github/ or .build/ etc.)
|
13
|
+
## todo/check: rename to EXCLUDE_DOT_DIRS_RE - why? why not?
|
14
|
+
EXCLUDE_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
15
|
+
\.[a-zA-Z0-9_-]+ ## (almost) any name BUT must start with dot e.g. .git, .build, etc.
|
16
|
+
/
|
17
|
+
}x
|
18
|
+
def self.match_exclude( path, pattern: EXCLUDE_RE ) pattern.match( path ); end
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
class Package; end ## use a shared base class for DirPackage, ZipPackage, etc.
|
23
|
+
|
24
|
+
class DirPackage < Package ## todo/check: find a better name e.g. UnzippedPackage, FilesystemPackage, etc. - why? why not?
|
25
|
+
class Entry
|
26
|
+
def initialize( pack, path )
|
27
|
+
@pack = pack ## parent package
|
28
|
+
@path = path
|
29
|
+
## todo/fix!!!!: calculate @name (cut-off pack.path!!!)
|
30
|
+
@name = path
|
31
|
+
end
|
32
|
+
def name() @name; end
|
33
|
+
def read() File.open( @path, 'r:utf-8' ).read; end
|
34
|
+
end # class DirPackage::Entry
|
35
|
+
|
36
|
+
|
37
|
+
attr_reader :name, :path
|
38
|
+
|
39
|
+
def initialize( path )
|
40
|
+
## todo/fix: expand_path ?! - why? why not? if you pass in ./ basename will be . and NOT directory name, for example!!!
|
41
|
+
@path = path ## rename to root_path or base_path or somehting - why? why not?
|
42
|
+
|
43
|
+
basename = File.basename( path ) ## note: ALWAYS keeps "extension"-like name if present (e.g. ./austria.zip => austria.zip)
|
44
|
+
@name = basename
|
45
|
+
end
|
46
|
+
|
47
|
+
def each( pattern:, extension: 'txt' ) ## todo/check: rename to glob or something - why? why not?
|
48
|
+
## use just .* for extension or remove and check if File.file? and skip File.directory? - why? why not?
|
49
|
+
## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
|
50
|
+
Dir.glob( "#{@path}/**/{*,.*}.#{extension}" ).each do |path|
|
51
|
+
## todo/fix: (auto) skip and check for directories
|
52
|
+
if EXCLUDE_RE.match( path )
|
53
|
+
## note: skip dot dirs (e.g. .build/, .git/, etc.)
|
54
|
+
elsif pattern.match( path )
|
55
|
+
yield( Entry.new( self, path ))
|
56
|
+
else
|
57
|
+
## puts " skipping >#{path}<"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def find( name )
|
63
|
+
Entry.new( self, "#{@path}/#{name}" )
|
64
|
+
end
|
65
|
+
end # class DirPackage
|
66
|
+
|
67
|
+
|
68
|
+
## helper wrapper for datafiles in zips
|
69
|
+
class ZipPackage < Package
|
70
|
+
class Entry
|
71
|
+
def initialize( pack, entry )
|
72
|
+
@pack = pack
|
73
|
+
@entry = entry
|
74
|
+
end
|
75
|
+
|
76
|
+
def name() @entry.name; end
|
77
|
+
def read
|
78
|
+
txt = @entry.get_input_stream.read
|
79
|
+
## puts "** encoding: #{txt.encoding}" #=> encoding: ASCII-8BIT
|
80
|
+
txt = txt.force_encoding( Encoding::UTF_8 )
|
81
|
+
txt
|
82
|
+
end
|
83
|
+
end # class ZipPackage::Entry
|
84
|
+
|
85
|
+
attr_reader :name, :path
|
86
|
+
|
87
|
+
def initialize( path )
|
88
|
+
@path = path
|
89
|
+
|
90
|
+
extname = File.extname( path ) ## todo/check: double check if extension is .zip - why? why not?
|
91
|
+
basename = File.basename( path, extname )
|
92
|
+
@name = basename
|
93
|
+
end
|
94
|
+
|
95
|
+
def each( pattern: )
|
96
|
+
Zip::File.open( @path ) do |zipfile|
|
97
|
+
zipfile.each do |entry|
|
98
|
+
if entry.directory?
|
99
|
+
next ## skip
|
100
|
+
elsif entry.file?
|
101
|
+
if EXCLUDE_RE.match( entry.name )
|
102
|
+
## note: skip dot dirs (e.g. .build/, .git/, etc.)
|
103
|
+
elsif pattern.match( entry.name )
|
104
|
+
yield( Entry.new( self, entry ) ) # wrap entry in uniform access interface / api
|
105
|
+
else
|
106
|
+
## puts " skipping >#{entry.name}<"
|
107
|
+
end
|
108
|
+
else
|
109
|
+
puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
|
110
|
+
exit 1
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def find( name )
|
117
|
+
entries = match_entry( name )
|
118
|
+
if entries.empty?
|
119
|
+
puts "** !!! ERROR !!! zip entry >#{name}< not found in >#{@path}<; sorry"
|
120
|
+
exit 1
|
121
|
+
elsif entries.size > 1
|
122
|
+
puts "** !!! ERROR !!! ambigious zip entry >#{name}<; found #{entries.size} entries in >#{@path}<:"
|
123
|
+
pp entries
|
124
|
+
exit 1
|
125
|
+
else
|
126
|
+
Entry.new( self, entries[0] ) # wrap entry in uniform access interface / api
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
private
|
131
|
+
def match_entry( name )
|
132
|
+
## todo/fix: use Zip::File.glob or find_entry or something better/faster? why? why not?
|
133
|
+
|
134
|
+
pattern = %r{ #{Regexp.escape( name )} ## match string if ends with name
|
135
|
+
$
|
136
|
+
}x
|
137
|
+
|
138
|
+
entries = []
|
139
|
+
Zip::File.open( @path ) do |zipfile|
|
140
|
+
zipfile.each do |entry|
|
141
|
+
if entry.directory?
|
142
|
+
next ## skip
|
143
|
+
elsif entry.file?
|
144
|
+
if EXCLUDE_RE.match( entry.name )
|
145
|
+
## note: skip dot dirs (e.g. .build/, .git/, etc.)
|
146
|
+
elsif pattern.match( entry.name )
|
147
|
+
entries << entry
|
148
|
+
else
|
149
|
+
## no match; skip too
|
150
|
+
end
|
151
|
+
else
|
152
|
+
puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
|
153
|
+
exit 1
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
entries
|
158
|
+
end
|
159
|
+
end # class ZipPackage
|
160
|
+
end # module Datafile
|
data/test/test_datafile.rb
CHANGED
@@ -10,11 +10,26 @@ require 'helper'
|
|
10
10
|
|
11
11
|
class TestDatafile < MiniTest::Test
|
12
12
|
|
13
|
+
def test_exclude
|
14
|
+
assert Datafile.match_exclude( '.build/' )
|
15
|
+
assert Datafile.match_exclude( '.git/' )
|
16
|
+
|
17
|
+
assert Datafile.match_exclude( '/.build/' )
|
18
|
+
assert Datafile.match_exclude( '/.git/' )
|
19
|
+
|
20
|
+
assert Datafile.match_exclude( '.build/leagues.txt' )
|
21
|
+
assert Datafile.match_exclude( '.git/leagues.txt' )
|
22
|
+
|
23
|
+
assert Datafile.match_exclude( '/.build/leagues.txt' )
|
24
|
+
assert Datafile.match_exclude( '/.git/leagues.txt' )
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
|
13
29
|
CLUBS_DIR = '../../../openfootball/clubs' ## source repo directory path
|
14
30
|
LEAGUES_DIR = '../../../openfootball/leagues'
|
15
31
|
AUSTRIA_DIR = '../../../openfootball/austria'
|
16
32
|
|
17
|
-
|
18
33
|
def test_find
|
19
34
|
datafiles = Datafile.find_clubs( CLUBS_DIR )
|
20
35
|
pp datafiles
|
@@ -29,6 +44,7 @@ class TestDatafile < MiniTest::Test
|
|
29
44
|
pp datafiles
|
30
45
|
end
|
31
46
|
|
47
|
+
|
32
48
|
def test_bundle
|
33
49
|
datafiles = Datafile.find_clubs( CLUBS_DIR )
|
34
50
|
pp datafiles
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_package.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
|
11
|
+
class TestPackage < MiniTest::Test
|
12
|
+
|
13
|
+
def test_read
|
14
|
+
[Datafile::DirPackage.new( '../../../openfootball/england' ),
|
15
|
+
Datafile::ZipPackage.new( 'tmp/england-master.zip' )].each do |eng|
|
16
|
+
assert eng.find( '2015-16/.conf.txt' ).read.start_with?( '= English Premier League 2015/16' )
|
17
|
+
assert eng.find( '2017-18/.conf.txt' ).read.start_with?( '= English Premier League 2017/18' )
|
18
|
+
assert eng.find( '2015-16/1-premierleague-i.txt' ).read.start_with?( '= English Premier League 2015/16' )
|
19
|
+
end
|
20
|
+
|
21
|
+
[Datafile::DirPackage.new( '../../../openfootball/austria' ),
|
22
|
+
Datafile::ZipPackage.new( 'tmp/austria-master.zip' )].each do |at|
|
23
|
+
assert at.find( '2018-19/.conf.txt' ).read.start_with?( '= Österr. Bundesliga 2018/19' )
|
24
|
+
end
|
25
|
+
end # method test_read
|
26
|
+
|
27
|
+
end # class TestPackage
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-formats
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: 0.0.1
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rubyzip
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 1.2.4
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 1.2.4
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: rdoc
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -111,6 +125,7 @@ files:
|
|
111
125
|
- lib/sportdb/formats/datafile.rb
|
112
126
|
- lib/sportdb/formats/goals.rb
|
113
127
|
- lib/sportdb/formats/outline_reader.rb
|
128
|
+
- lib/sportdb/formats/package.rb
|
114
129
|
- lib/sportdb/formats/scores.rb
|
115
130
|
- lib/sportdb/formats/season_utils.rb
|
116
131
|
- lib/sportdb/formats/version.rb
|
@@ -120,6 +135,7 @@ files:
|
|
120
135
|
- test/test_datafile_match.rb
|
121
136
|
- test/test_goals.rb
|
122
137
|
- test/test_outline_reader.rb
|
138
|
+
- test/test_package.rb
|
123
139
|
- test/test_scores.rb
|
124
140
|
- test/test_season_utils.rb
|
125
141
|
homepage: https://github.com/sportdb/sport.db
|