sportdb-readers 0.3.6 → 0.3.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +2 -0
- data/lib/sportdb/readers.rb +1 -0
- data/lib/sportdb/readers/conf_parser_auto.rb +129 -0
- data/lib/sportdb/readers/datafile.rb +26 -5
- data/lib/sportdb/readers/package.rb +1 -2
- data/lib/sportdb/readers/version.rb +1 -1
- data/test/test_conf_parser_auto.rb +129 -0
- data/test/test_package.rb +14 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0b82ac613158881351d512a4ffc9e7d15f34b133
|
4
|
+
data.tar.gz: 52251959f919b175feec37cadcc51e56c70e5037
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 32c7921845dd7db5c2771a5941ab7ca0b27839eb88c80d71703354f6f533ca1c2eb99b47671e2462cbddcddb443fab3d0e115a4991f3d6155185a081d500796e
|
7
|
+
data.tar.gz: 072a747031829fb495773946c13ec6a43627c7b6de4b21b752c40eb2d1962c1fb3a047982fe83f1257be0b9be3f9115d0eed0d798de1b3dae2f93518e2ac02d3
|
data/Manifest.txt
CHANGED
@@ -5,6 +5,7 @@ Rakefile
|
|
5
5
|
lib/sportdb/readers.rb
|
6
6
|
lib/sportdb/readers/club_props_reader.rb
|
7
7
|
lib/sportdb/readers/conf_linter.rb
|
8
|
+
lib/sportdb/readers/conf_parser_auto.rb
|
8
9
|
lib/sportdb/readers/conf_reader.rb
|
9
10
|
lib/sportdb/readers/datafile.rb
|
10
11
|
lib/sportdb/readers/league_outline_reader.rb
|
@@ -14,6 +15,7 @@ lib/sportdb/readers/match_reader.rb
|
|
14
15
|
lib/sportdb/readers/package.rb
|
15
16
|
lib/sportdb/readers/version.rb
|
16
17
|
test/helper.rb
|
18
|
+
test/test_conf_parser_auto.rb
|
17
19
|
test/test_match_parser.rb
|
18
20
|
test/test_package.rb
|
19
21
|
test/test_props.rb
|
data/lib/sportdb/readers.rb
CHANGED
@@ -13,6 +13,7 @@ require 'sportdb/sync'
|
|
13
13
|
# our own code
|
14
14
|
require 'sportdb/readers/version' # let version always go first
|
15
15
|
require 'sportdb/readers/league_outline_reader'
|
16
|
+
require 'sportdb/readers/conf_parser_auto'
|
16
17
|
require 'sportdb/readers/conf_reader'
|
17
18
|
require 'sportdb/readers/conf_linter'
|
18
19
|
require 'sportdb/readers/match_parser'
|
@@ -0,0 +1,129 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module SportDb
|
4
|
+
|
5
|
+
|
6
|
+
class AutoConfParser
|
7
|
+
|
8
|
+
include LogUtils::Logging
|
9
|
+
|
10
|
+
def initialize( lines, start )
|
11
|
+
@lines = lines ## todo/check: change to text instead of array of lines - why? why not?
|
12
|
+
@start = start
|
13
|
+
end
|
14
|
+
|
15
|
+
def parse
|
16
|
+
## try to find all clubs in match schedule
|
17
|
+
@clubs = Hash.new(0) ## keep track of usage counter
|
18
|
+
|
19
|
+
@lines.each do |line|
|
20
|
+
if is_round?( line )
|
21
|
+
logger.info "skipping matched round line: >#{line}<"
|
22
|
+
elsif try_parse_game( line )
|
23
|
+
# do nothing here
|
24
|
+
else
|
25
|
+
logger.info "skipping line (no match found): >#{line}<"
|
26
|
+
end
|
27
|
+
end # lines.each
|
28
|
+
|
29
|
+
@clubs
|
30
|
+
end
|
31
|
+
|
32
|
+
def is_round?( line )
|
33
|
+
## note: =~ return nil if not match found, and 0,1, etc for match
|
34
|
+
(line =~ SportDb.lang.regex_round) != nil
|
35
|
+
end
|
36
|
+
|
37
|
+
def try_parse_game( line )
|
38
|
+
# note: clone line; for possible test do NOT modify in place for now
|
39
|
+
# note: returns true if parsed, false if no match
|
40
|
+
parse_game( line.dup )
|
41
|
+
end
|
42
|
+
|
43
|
+
def parse_game( line )
|
44
|
+
logger.debug "parsing game (fixture) line: >#{line}<"
|
45
|
+
|
46
|
+
## remove all protected text runs e.g. []
|
47
|
+
## fix: add [ to end-of-line too
|
48
|
+
|
49
|
+
line = line.gsub( /\[
|
50
|
+
[^\]]+?
|
51
|
+
\]/x, '' ).strip
|
52
|
+
return true if line.empty? ## note: return true (for valid line with no match/clubs)
|
53
|
+
|
54
|
+
## split by geo (@) - remove for now
|
55
|
+
values = line.split( '@' )
|
56
|
+
line = values[0]
|
57
|
+
|
58
|
+
|
59
|
+
## try find date
|
60
|
+
date = find_date!( line, start: @start )
|
61
|
+
if date ## if found remove tagged run too; note using singular sub (NOT global gsub)
|
62
|
+
line = line.sub( /\[
|
63
|
+
[^\]]+?
|
64
|
+
\]/x, '' ).strip
|
65
|
+
|
66
|
+
return true if line.empty? ## note: return true (for valid line with no match/clubs)
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
scores = find_scores!( line )
|
71
|
+
|
72
|
+
logger.debug " line: >#{line}<"
|
73
|
+
|
74
|
+
line = line.sub( /\[
|
75
|
+
[^\]]+?
|
76
|
+
\]/x, '$$' ) # note: replace first score tag with $$
|
77
|
+
line = line.gsub( /\[
|
78
|
+
[^\]]+?
|
79
|
+
\]/x, '' ) # note: replace/remove all other score tags with nothing
|
80
|
+
|
81
|
+
## clean-up remove all text run inside () or empty () too
|
82
|
+
line = line.gsub( /\(
|
83
|
+
[^)]*?
|
84
|
+
\)/x, '' )
|
85
|
+
|
86
|
+
|
87
|
+
## check for more match separators e.g. - or vs for now
|
88
|
+
line = line.sub( / \s+
|
89
|
+
(-|vs|v)
|
90
|
+
\s+
|
91
|
+
/ix, '$$' )
|
92
|
+
|
93
|
+
values = line.split( '$$' )
|
94
|
+
values = values.map { |value| value.strip } ## strip spaces
|
95
|
+
values = values.select { |value| !value.empty? } ## remove empty strings
|
96
|
+
|
97
|
+
return true if values.size == 0 ## note: return true (for valid line with no match/clubs)
|
98
|
+
return false if values.size != 2
|
99
|
+
|
100
|
+
puts "(auto config) try matching clubs:"
|
101
|
+
pp values
|
102
|
+
@clubs[ values[0] ] += 1 ## update usage counters
|
103
|
+
@clubs[ values[1] ] += 1
|
104
|
+
|
105
|
+
true
|
106
|
+
end
|
107
|
+
|
108
|
+
|
109
|
+
|
110
|
+
def find_scores!( line, opts={} )
|
111
|
+
# note: always call after find_dates !!!
|
112
|
+
# scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
|
113
|
+
# -- note: score might have two digits too
|
114
|
+
|
115
|
+
finder = ScoresFinder.new
|
116
|
+
finder.find!( line, opts )
|
117
|
+
end
|
118
|
+
|
119
|
+
def find_date!( line, start: )
|
120
|
+
## NB: lets us pass in start_at/end_at date (for event)
|
121
|
+
# for auto-complete year
|
122
|
+
|
123
|
+
# extract date from line
|
124
|
+
# and return it
|
125
|
+
# NB: side effect - removes date from line string
|
126
|
+
DateFormats.find!( line, start: start )
|
127
|
+
end
|
128
|
+
end # class AutoConfParser
|
129
|
+
end # module SportDb
|
@@ -22,7 +22,19 @@ module Datafile
|
|
22
22
|
|
23
23
|
|
24
24
|
|
25
|
-
|
25
|
+
## exclude pattern
|
26
|
+
## for now exclude all files in directories starting with a dot (e.g. .git/ or .github/ or .build/ etc.)
|
27
|
+
## todo/check: rename to EXCLUDE_DOT_DIRS_RE - why? why not?
|
28
|
+
EXCLUDE_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
|
29
|
+
\.[a-zA-Z0-9_-]+ ## (almost) any name BUT must start with dot e.g. .git, .build, etc.
|
30
|
+
/
|
31
|
+
}x
|
32
|
+
def self.match_exclude( path, pattern: EXCLUDE_RE ) pattern.match( path ); end
|
33
|
+
|
34
|
+
|
35
|
+
class Package; end ## use a shared base class for DirPackage, ZipPackage, etc.
|
36
|
+
|
37
|
+
class DirPackage < Package ## todo/check: find a better name e.g. UnzippedPackage, FilesystemPackage, etc. - why? why not?
|
26
38
|
class Entry
|
27
39
|
def initialize( pack, path )
|
28
40
|
@pack = pack ## parent package
|
@@ -38,6 +50,7 @@ end # class DirPackage::Entry
|
|
38
50
|
attr_reader :name, :path
|
39
51
|
|
40
52
|
def initialize( path )
|
53
|
+
## todo/fix: expand_path ?! - why? why not? if you pass in ./ basename will be . and NOT directory name, for example!!!
|
41
54
|
@path = path ## rename to root_path or base_path or somehting - why? why not?
|
42
55
|
|
43
56
|
basename = File.basename( path ) ## note: ALWAYS keeps "extension"-like name if present (e.g. ./austria.zip => austria.zip)
|
@@ -49,7 +62,9 @@ end # class DirPackage::Entry
|
|
49
62
|
## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
|
50
63
|
Dir.glob( "#{@path}/**/{*,.*}.#{extension}" ).each do |path|
|
51
64
|
## todo/fix: (auto) skip and check for directories
|
52
|
-
if
|
65
|
+
if EXCLUDE_RE.match( path )
|
66
|
+
## note: skip dot dirs (e.g. .build/, .git/, etc.)
|
67
|
+
elsif pattern.match( path )
|
53
68
|
yield( Entry.new( self, path ))
|
54
69
|
else
|
55
70
|
## puts " skipping >#{path}<"
|
@@ -64,7 +79,7 @@ end # class DirPackage
|
|
64
79
|
|
65
80
|
|
66
81
|
## helper wrapper for datafiles in zips
|
67
|
-
class ZipPackage
|
82
|
+
class ZipPackage < Package
|
68
83
|
class Entry
|
69
84
|
def initialize( pack, entry )
|
70
85
|
@pack = pack
|
@@ -96,7 +111,9 @@ end # class ZipPackage::Entry
|
|
96
111
|
if entry.directory?
|
97
112
|
next ## skip
|
98
113
|
elsif entry.file?
|
99
|
-
if
|
114
|
+
if EXCLUDE_RE.match( entry.name )
|
115
|
+
## note: skip dot dirs (e.g. .build/, .git/, etc.)
|
116
|
+
elsif pattern.match( entry.name )
|
100
117
|
yield( Entry.new( self, entry ) ) # wrap entry in uniform access interface / api
|
101
118
|
else
|
102
119
|
## puts " skipping >#{entry.name}<"
|
@@ -137,8 +154,12 @@ private
|
|
137
154
|
if entry.directory?
|
138
155
|
next ## skip
|
139
156
|
elsif entry.file?
|
140
|
-
if
|
157
|
+
if EXCLUDE_RE.match( entry.name )
|
158
|
+
## note: skip dot dirs (e.g. .build/, .git/, etc.)
|
159
|
+
elsif pattern.match( entry.name )
|
141
160
|
entries << entry
|
161
|
+
else
|
162
|
+
## no match; skip too
|
142
163
|
end
|
143
164
|
else
|
144
165
|
puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
|
@@ -14,8 +14,7 @@ module SportDb
|
|
14
14
|
}x
|
15
15
|
|
16
16
|
def initialize( path_or_pack )
|
17
|
-
if path_or_pack.is_a?( Datafile::
|
18
|
-
path_or_pack.is_a?( Datafile::ZipPackage )
|
17
|
+
if path_or_pack.is_a?( Datafile::Package )
|
19
18
|
@pack = path_or_pack
|
20
19
|
else ## assume it's a (string) path
|
21
20
|
path = path_or_pack
|
@@ -0,0 +1,129 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_conf_parser_auto.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
class TestAutoConfParser < MiniTest::Test
|
13
|
+
|
14
|
+
def test_eng
|
15
|
+
txt = <<TXT
|
16
|
+
Matchday 1
|
17
|
+
|
18
|
+
[Fri Aug/11]
|
19
|
+
Arsenal FC 4-3 Leicester City
|
20
|
+
[Sat Aug/12]
|
21
|
+
Watford FC 3-3 Liverpool FC
|
22
|
+
Chelsea FC 2-3 Burnley FC
|
23
|
+
Crystal Palace 0-3 Huddersfield Town
|
24
|
+
Everton FC 1-0 Stoke City
|
25
|
+
Southampton FC 0-0 Swansea City
|
26
|
+
West Bromwich Albion 1-0 AFC Bournemouth
|
27
|
+
Brighton & Hove Albion 0-2 Manchester City
|
28
|
+
[Sun Aug/13]
|
29
|
+
Newcastle United 0-2 Tottenham Hotspur
|
30
|
+
Manchester United 4-0 West Ham United
|
31
|
+
|
32
|
+
|
33
|
+
Matchday 2
|
34
|
+
|
35
|
+
[Sat Aug/19]
|
36
|
+
Swansea City 0-4 Manchester United
|
37
|
+
AFC Bournemouth 0-2 Watford FC
|
38
|
+
Burnley FC 0-1 West Bromwich Albion
|
39
|
+
Leicester City 2-0 Brighton & Hove Albion
|
40
|
+
Liverpool FC 1-0 Crystal Palace
|
41
|
+
Southampton FC 3-2 West Ham United
|
42
|
+
Stoke City 1-0 Arsenal FC
|
43
|
+
[Sun Aug/20]
|
44
|
+
Huddersfield Town 1-0 Newcastle United
|
45
|
+
Tottenham Hotspur 1-2 Chelsea FC
|
46
|
+
[Mon Aug/21]
|
47
|
+
Manchester City 1-1 Everton FC
|
48
|
+
TXT
|
49
|
+
|
50
|
+
clubs = parse( txt )
|
51
|
+
|
52
|
+
assert_equal Hash(
|
53
|
+
'Arsenal FC' => 2,
|
54
|
+
'Leicester City' => 2,
|
55
|
+
'Watford FC' => 2,
|
56
|
+
'Liverpool FC' => 2,
|
57
|
+
'Chelsea FC' => 2,
|
58
|
+
'Burnley FC' => 2,
|
59
|
+
'Crystal Palace' => 2,
|
60
|
+
'Huddersfield Town' => 2,
|
61
|
+
'Everton FC' => 2,
|
62
|
+
'Stoke City' => 2,
|
63
|
+
'Southampton FC' => 2,
|
64
|
+
'Swansea City' => 2,
|
65
|
+
'West Bromwich Albion' => 2,
|
66
|
+
'AFC Bournemouth' => 2,
|
67
|
+
'Brighton & Hove Albion' => 2,
|
68
|
+
'Manchester City' => 2,
|
69
|
+
'Newcastle United' => 2,
|
70
|
+
'Tottenham Hotspur' => 2,
|
71
|
+
'Manchester United' => 2,
|
72
|
+
'West Ham United' => 2 ), clubs
|
73
|
+
end # method test_parse
|
74
|
+
|
75
|
+
def test_mauritius
|
76
|
+
txt = <<TXT
|
77
|
+
Preliminary Round
|
78
|
+
[Mon Jun/22]
|
79
|
+
Pointe-aux-Sables Mates 3-4 AS Port-Louis 2000 @ St. François Xavier Stadium, Port Louis
|
80
|
+
|
81
|
+
Quarterfinals
|
82
|
+
[Wed Jun/24]
|
83
|
+
Rivière du Rempart 3-1 pen (1-1) La Cure Sylvester @ Auguste Vollaire Stadium, Central Flacq
|
84
|
+
Chamarel SC 3-4 Petite Rivière Noire @ Germain Comarmond Stadium, Bambous
|
85
|
+
[Thu Jun/25]
|
86
|
+
Pamplemousses 2-0 AS Port-Louis 2000 @ Auguste Vollaire Stadium, Central Flacq
|
87
|
+
[Sat Jun/27]
|
88
|
+
Savanne SC 3-6 Entente Boulet Rouge @ Anjalay Stadium, Mapou
|
89
|
+
|
90
|
+
Semifinals
|
91
|
+
[Wed Jul/15]
|
92
|
+
Rivière du Rempart 2-3 Petite Rivière Noire @ New George V Stadium, Curepipe
|
93
|
+
Entente Boulet Rouge 0-2 Pamplemousses @ Germain Comarmond Stadium, Bambous
|
94
|
+
|
95
|
+
Final
|
96
|
+
[Sun Jul/19]
|
97
|
+
Petite Rivière Noire 2-0 Pamplemousses @ New George V Stadium, Curepipe
|
98
|
+
TXT
|
99
|
+
|
100
|
+
clubs = parse( txt )
|
101
|
+
|
102
|
+
assert_equal Hash(
|
103
|
+
'Pointe-aux-Sables Mates' => 1,
|
104
|
+
'AS Port-Louis 2000' => 2,
|
105
|
+
'Rivière du Rempart' => 2,
|
106
|
+
'La Cure Sylvester' => 1,
|
107
|
+
'Chamarel SC' => 1,
|
108
|
+
'Petite Rivière Noire' => 3,
|
109
|
+
'Pamplemousses' => 3,
|
110
|
+
'Savanne SC' => 1,
|
111
|
+
'Entente Boulet Rouge' => 2), clubs
|
112
|
+
end
|
113
|
+
|
114
|
+
|
115
|
+
################
|
116
|
+
## helper
|
117
|
+
def parse( txt, lang: 'en' )
|
118
|
+
lines = txt.split( /\n+/ ) # note: removes/strips empty lines
|
119
|
+
pp lines
|
120
|
+
|
121
|
+
start = Date.new( 2017, 7, 1 )
|
122
|
+
|
123
|
+
DateFormats.lang = lang # e.g. 'en'
|
124
|
+
parser = SportDb::AutoConfParser.new( lines, start )
|
125
|
+
clubs = parser.parse
|
126
|
+
pp clubs
|
127
|
+
clubs
|
128
|
+
end
|
129
|
+
end # class AutoConfParser
|
data/test/test_package.rb
CHANGED
@@ -10,6 +10,20 @@ require 'helper'
|
|
10
10
|
|
11
11
|
class TestPackage < MiniTest::Test
|
12
12
|
|
13
|
+
def test_exclude
|
14
|
+
assert Datafile.match_exclude( '.build/' )
|
15
|
+
assert Datafile.match_exclude( '.git/' )
|
16
|
+
|
17
|
+
assert Datafile.match_exclude( '/.build/' )
|
18
|
+
assert Datafile.match_exclude( '/.git/' )
|
19
|
+
|
20
|
+
assert Datafile.match_exclude( '.build/leagues.txt' )
|
21
|
+
assert Datafile.match_exclude( '.git/leagues.txt' )
|
22
|
+
|
23
|
+
assert Datafile.match_exclude( '/.build/leagues.txt' )
|
24
|
+
assert Datafile.match_exclude( '/.git/leagues.txt' )
|
25
|
+
end
|
26
|
+
|
13
27
|
def test_read
|
14
28
|
[Datafile::DirPackage.new( '../../../openfootball/england' ),
|
15
29
|
Datafile::ZipPackage.new( 'tmp/england-master.zip' )].each do |eng|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-readers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-11-
|
11
|
+
date: 2019-11-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sportdb-config
|
@@ -97,6 +97,7 @@ files:
|
|
97
97
|
- lib/sportdb/readers.rb
|
98
98
|
- lib/sportdb/readers/club_props_reader.rb
|
99
99
|
- lib/sportdb/readers/conf_linter.rb
|
100
|
+
- lib/sportdb/readers/conf_parser_auto.rb
|
100
101
|
- lib/sportdb/readers/conf_reader.rb
|
101
102
|
- lib/sportdb/readers/datafile.rb
|
102
103
|
- lib/sportdb/readers/league_outline_reader.rb
|
@@ -106,6 +107,7 @@ files:
|
|
106
107
|
- lib/sportdb/readers/package.rb
|
107
108
|
- lib/sportdb/readers/version.rb
|
108
109
|
- test/helper.rb
|
110
|
+
- test/test_conf_parser_auto.rb
|
109
111
|
- test/test_match_parser.rb
|
110
112
|
- test/test_package.rb
|
111
113
|
- test/test_props.rb
|