sportdb-readers 0.3.6 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +2 -0
- data/lib/sportdb/readers.rb +1 -0
- data/lib/sportdb/readers/conf_parser_auto.rb +129 -0
- data/lib/sportdb/readers/datafile.rb +26 -5
- data/lib/sportdb/readers/package.rb +1 -2
- data/lib/sportdb/readers/version.rb +1 -1
- data/test/test_conf_parser_auto.rb +129 -0
- data/test/test_package.rb +14 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0b82ac613158881351d512a4ffc9e7d15f34b133
|
4
|
+
data.tar.gz: 52251959f919b175feec37cadcc51e56c70e5037
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 32c7921845dd7db5c2771a5941ab7ca0b27839eb88c80d71703354f6f533ca1c2eb99b47671e2462cbddcddb443fab3d0e115a4991f3d6155185a081d500796e
|
7
|
+
data.tar.gz: 072a747031829fb495773946c13ec6a43627c7b6de4b21b752c40eb2d1962c1fb3a047982fe83f1257be0b9be3f9115d0eed0d798de1b3dae2f93518e2ac02d3
|
data/Manifest.txt
CHANGED
@@ -5,6 +5,7 @@ Rakefile
|
|
5
5
|
lib/sportdb/readers.rb
|
6
6
|
lib/sportdb/readers/club_props_reader.rb
|
7
7
|
lib/sportdb/readers/conf_linter.rb
|
8
|
+
lib/sportdb/readers/conf_parser_auto.rb
|
8
9
|
lib/sportdb/readers/conf_reader.rb
|
9
10
|
lib/sportdb/readers/datafile.rb
|
10
11
|
lib/sportdb/readers/league_outline_reader.rb
|
@@ -14,6 +15,7 @@ lib/sportdb/readers/match_reader.rb
|
|
14
15
|
lib/sportdb/readers/package.rb
|
15
16
|
lib/sportdb/readers/version.rb
|
16
17
|
test/helper.rb
|
18
|
+
test/test_conf_parser_auto.rb
|
17
19
|
test/test_match_parser.rb
|
18
20
|
test/test_package.rb
|
19
21
|
test/test_props.rb
|
data/lib/sportdb/readers.rb
CHANGED
@@ -13,6 +13,7 @@ require 'sportdb/sync'
|
|
13
13
|
# our own code
|
14
14
|
require 'sportdb/readers/version' # let version always go first
|
15
15
|
require 'sportdb/readers/league_outline_reader'
|
16
|
+
require 'sportdb/readers/conf_parser_auto'
|
16
17
|
require 'sportdb/readers/conf_reader'
|
17
18
|
require 'sportdb/readers/conf_linter'
|
18
19
|
require 'sportdb/readers/match_parser'
|
@@ -0,0 +1,129 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module SportDb
|
4
|
+
|
5
|
+
|
6
|
+
class AutoConfParser
|
7
|
+
|
8
|
+
include LogUtils::Logging
|
9
|
+
|
10
|
+
def initialize( lines, start )
|
11
|
+
@lines = lines ## todo/check: change to text instead of array of lines - why? why not?
|
12
|
+
@start = start
|
13
|
+
end
|
14
|
+
|
15
|
+
def parse
|
16
|
+
## try to find all clubs in match schedule
|
17
|
+
@clubs = Hash.new(0) ## keep track of usage counter
|
18
|
+
|
19
|
+
@lines.each do |line|
|
20
|
+
if is_round?( line )
|
21
|
+
logger.info "skipping matched round line: >#{line}<"
|
22
|
+
elsif try_parse_game( line )
|
23
|
+
# do nothing here
|
24
|
+
else
|
25
|
+
logger.info "skipping line (no match found): >#{line}<"
|
26
|
+
end
|
27
|
+
end # lines.each
|
28
|
+
|
29
|
+
@clubs
|
30
|
+
end
|
31
|
+
|
32
|
+
def is_round?( line )
|
33
|
+
## note: =~ return nil if not match found, and 0,1, etc for match
|
34
|
+
(line =~ SportDb.lang.regex_round) != nil
|
35
|
+
end
|
36
|
+
|
37
|
+
def try_parse_game( line )
|
38
|
+
# note: clone line; for possible test do NOT modify in place for now
|
39
|
+
# note: returns true if parsed, false if no match
|
40
|
+
parse_game( line.dup )
|
41
|
+
end
|
42
|
+
|
43
|
+
def parse_game( line )
|
44
|
+
logger.debug "parsing game (fixture) line: >#{line}<"
|
45
|
+
|
46
|
+
## remove all protected text runs e.g. []
|
47
|
+
## fix: add [ to end-of-line too
|
48
|
+
|
49
|
+
line = line.gsub( /\[
|
50
|
+
[^\]]+?
|
51
|
+
\]/x, '' ).strip
|
52
|
+
return true if line.empty? ## note: return true (for valid line with no match/clubs)
|
53
|
+
|
54
|
+
## split by geo (@) - remove for now
|
55
|
+
values = line.split( '@' )
|
56
|
+
line = values[0]
|
57
|
+
|
58
|
+
|
59
|
+
## try find date
|
60
|
+
date = find_date!( line, start: @start )
|
61
|
+
if date ## if found remove tagged run too; note using singular sub (NOT global gsub)
|
62
|
+
line = line.sub( /\[
|
63
|
+
[^\]]+?
|
64
|
+
\]/x, '' ).strip
|
65
|
+
|
66
|
+
return true if line.empty? ## note: return true (for valid line with no match/clubs)
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
scores = find_scores!( line )
|
71
|
+
|
72
|
+
logger.debug " line: >#{line}<"
|
73
|
+
|
74
|
+
line = line.sub( /\[
|
75
|
+
[^\]]+?
|
76
|
+
\]/x, '$$' ) # note: replace first score tag with $$
|
77
|
+
line = line.gsub( /\[
|
78
|
+
[^\]]+?
|
79
|
+
\]/x, '' ) # note: replace/remove all other score tags with nothing
|
80
|
+
|
81
|
+
## clean-up remove all text run inside () or empty () too
|
82
|
+
line = line.gsub( /\(
|
83
|
+
[^)]*?
|
84
|
+
\)/x, '' )
|
85
|
+
|
86
|
+
|
87
|
+
## check for more match separators e.g. - or vs for now
|
88
|
+
line = line.sub( / \s+
|
89
|
+
(-|vs|v)
|
90
|
+
\s+
|
91
|
+
/ix, '$$' )
|
92
|
+
|
93
|
+
values = line.split( '$$' )
|
94
|
+
values = values.map { |value| value.strip } ## strip spaces
|
95
|
+
values = values.select { |value| !value.empty? } ## remove empty strings
|
96
|
+
|
97
|
+
return true if values.size == 0 ## note: return true (for valid line with no match/clubs)
|
98
|
+
return false if values.size != 2
|
99
|
+
|
100
|
+
puts "(auto config) try matching clubs:"
|
101
|
+
pp values
|
102
|
+
@clubs[ values[0] ] += 1 ## update usage counters
|
103
|
+
@clubs[ values[1] ] += 1
|
104
|
+
|
105
|
+
true
|
106
|
+
end
|
107
|
+
|
108
|
+
|
109
|
+
|
110
|
+
def find_scores!( line, opts={} )
|
111
|
+
# note: always call after find_dates !!!
|
112
|
+
# scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
|
113
|
+
# -- note: score might have two digits too
|
114
|
+
|
115
|
+
finder = ScoresFinder.new
|
116
|
+
finder.find!( line, opts )
|
117
|
+
end
|
118
|
+
|
119
|
+
def find_date!( line, start: )
|
120
|
+
## NB: lets us pass in start_at/end_at date (for event)
|
121
|
+
# for auto-complete year
|
122
|
+
|
123
|
+
# extract date from line
|
124
|
+
# and return it
|
125
|
+
# NB: side effect - removes date from line string
|
126
|
+
DateFormats.find!( line, start: start )
|
127
|
+
end
|
128
|
+
end # class AutoConfParser
|
129
|
+
end # module SportDb
|
@@ -22,7 +22,19 @@ module Datafile
|
|
22
22
|
|
23
23
|
|
24
24
|
|
25
|
-
|
25
|
+
## exclude pattern
|
26
|
+
## for now exclude all files in directories starting with a dot (e.g. .git/ or .github/ or .build/ etc.)
|
27
|
+
## todo/check: rename to EXCLUDE_DOT_DIRS_RE - why? why not?
|
28
|
+
EXCLUDE_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
|
29
|
+
\.[a-zA-Z0-9_-]+ ## (almost) any name BUT must start with dot e.g. .git, .build, etc.
|
30
|
+
/
|
31
|
+
}x
|
32
|
+
def self.match_exclude( path, pattern: EXCLUDE_RE ) pattern.match( path ); end
|
33
|
+
|
34
|
+
|
35
|
+
class Package; end ## use a shared base class for DirPackage, ZipPackage, etc.
|
36
|
+
|
37
|
+
class DirPackage < Package ## todo/check: find a better name e.g. UnzippedPackage, FilesystemPackage, etc. - why? why not?
|
26
38
|
class Entry
|
27
39
|
def initialize( pack, path )
|
28
40
|
@pack = pack ## parent package
|
@@ -38,6 +50,7 @@ end # class DirPackage::Entry
|
|
38
50
|
attr_reader :name, :path
|
39
51
|
|
40
52
|
def initialize( path )
|
53
|
+
## todo/fix: expand_path ?! - why? why not? if you pass in ./ basename will be . and NOT directory name, for example!!!
|
41
54
|
@path = path ## rename to root_path or base_path or somehting - why? why not?
|
42
55
|
|
43
56
|
basename = File.basename( path ) ## note: ALWAYS keeps "extension"-like name if present (e.g. ./austria.zip => austria.zip)
|
@@ -49,7 +62,9 @@ end # class DirPackage::Entry
|
|
49
62
|
## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
|
50
63
|
Dir.glob( "#{@path}/**/{*,.*}.#{extension}" ).each do |path|
|
51
64
|
## todo/fix: (auto) skip and check for directories
|
52
|
-
if
|
65
|
+
if EXCLUDE_RE.match( path )
|
66
|
+
## note: skip dot dirs (e.g. .build/, .git/, etc.)
|
67
|
+
elsif pattern.match( path )
|
53
68
|
yield( Entry.new( self, path ))
|
54
69
|
else
|
55
70
|
## puts " skipping >#{path}<"
|
@@ -64,7 +79,7 @@ end # class DirPackage
|
|
64
79
|
|
65
80
|
|
66
81
|
## helper wrapper for datafiles in zips
|
67
|
-
class ZipPackage
|
82
|
+
class ZipPackage < Package
|
68
83
|
class Entry
|
69
84
|
def initialize( pack, entry )
|
70
85
|
@pack = pack
|
@@ -96,7 +111,9 @@ end # class ZipPackage::Entry
|
|
96
111
|
if entry.directory?
|
97
112
|
next ## skip
|
98
113
|
elsif entry.file?
|
99
|
-
if
|
114
|
+
if EXCLUDE_RE.match( entry.name )
|
115
|
+
## note: skip dot dirs (e.g. .build/, .git/, etc.)
|
116
|
+
elsif pattern.match( entry.name )
|
100
117
|
yield( Entry.new( self, entry ) ) # wrap entry in uniform access interface / api
|
101
118
|
else
|
102
119
|
## puts " skipping >#{entry.name}<"
|
@@ -137,8 +154,12 @@ private
|
|
137
154
|
if entry.directory?
|
138
155
|
next ## skip
|
139
156
|
elsif entry.file?
|
140
|
-
if
|
157
|
+
if EXCLUDE_RE.match( entry.name )
|
158
|
+
## note: skip dot dirs (e.g. .build/, .git/, etc.)
|
159
|
+
elsif pattern.match( entry.name )
|
141
160
|
entries << entry
|
161
|
+
else
|
162
|
+
## no match; skip too
|
142
163
|
end
|
143
164
|
else
|
144
165
|
puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
|
@@ -14,8 +14,7 @@ module SportDb
|
|
14
14
|
}x
|
15
15
|
|
16
16
|
def initialize( path_or_pack )
|
17
|
-
if path_or_pack.is_a?( Datafile::
|
18
|
-
path_or_pack.is_a?( Datafile::ZipPackage )
|
17
|
+
if path_or_pack.is_a?( Datafile::Package )
|
19
18
|
@pack = path_or_pack
|
20
19
|
else ## assume it's a (string) path
|
21
20
|
path = path_or_pack
|
@@ -0,0 +1,129 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_conf_parser_auto.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
class TestAutoConfParser < MiniTest::Test
|
13
|
+
|
14
|
+
def test_eng
|
15
|
+
txt = <<TXT
|
16
|
+
Matchday 1
|
17
|
+
|
18
|
+
[Fri Aug/11]
|
19
|
+
Arsenal FC 4-3 Leicester City
|
20
|
+
[Sat Aug/12]
|
21
|
+
Watford FC 3-3 Liverpool FC
|
22
|
+
Chelsea FC 2-3 Burnley FC
|
23
|
+
Crystal Palace 0-3 Huddersfield Town
|
24
|
+
Everton FC 1-0 Stoke City
|
25
|
+
Southampton FC 0-0 Swansea City
|
26
|
+
West Bromwich Albion 1-0 AFC Bournemouth
|
27
|
+
Brighton & Hove Albion 0-2 Manchester City
|
28
|
+
[Sun Aug/13]
|
29
|
+
Newcastle United 0-2 Tottenham Hotspur
|
30
|
+
Manchester United 4-0 West Ham United
|
31
|
+
|
32
|
+
|
33
|
+
Matchday 2
|
34
|
+
|
35
|
+
[Sat Aug/19]
|
36
|
+
Swansea City 0-4 Manchester United
|
37
|
+
AFC Bournemouth 0-2 Watford FC
|
38
|
+
Burnley FC 0-1 West Bromwich Albion
|
39
|
+
Leicester City 2-0 Brighton & Hove Albion
|
40
|
+
Liverpool FC 1-0 Crystal Palace
|
41
|
+
Southampton FC 3-2 West Ham United
|
42
|
+
Stoke City 1-0 Arsenal FC
|
43
|
+
[Sun Aug/20]
|
44
|
+
Huddersfield Town 1-0 Newcastle United
|
45
|
+
Tottenham Hotspur 1-2 Chelsea FC
|
46
|
+
[Mon Aug/21]
|
47
|
+
Manchester City 1-1 Everton FC
|
48
|
+
TXT
|
49
|
+
|
50
|
+
clubs = parse( txt )
|
51
|
+
|
52
|
+
assert_equal Hash(
|
53
|
+
'Arsenal FC' => 2,
|
54
|
+
'Leicester City' => 2,
|
55
|
+
'Watford FC' => 2,
|
56
|
+
'Liverpool FC' => 2,
|
57
|
+
'Chelsea FC' => 2,
|
58
|
+
'Burnley FC' => 2,
|
59
|
+
'Crystal Palace' => 2,
|
60
|
+
'Huddersfield Town' => 2,
|
61
|
+
'Everton FC' => 2,
|
62
|
+
'Stoke City' => 2,
|
63
|
+
'Southampton FC' => 2,
|
64
|
+
'Swansea City' => 2,
|
65
|
+
'West Bromwich Albion' => 2,
|
66
|
+
'AFC Bournemouth' => 2,
|
67
|
+
'Brighton & Hove Albion' => 2,
|
68
|
+
'Manchester City' => 2,
|
69
|
+
'Newcastle United' => 2,
|
70
|
+
'Tottenham Hotspur' => 2,
|
71
|
+
'Manchester United' => 2,
|
72
|
+
'West Ham United' => 2 ), clubs
|
73
|
+
end # method test_parse
|
74
|
+
|
75
|
+
def test_mauritius
|
76
|
+
txt = <<TXT
|
77
|
+
Preliminary Round
|
78
|
+
[Mon Jun/22]
|
79
|
+
Pointe-aux-Sables Mates 3-4 AS Port-Louis 2000 @ St. François Xavier Stadium, Port Louis
|
80
|
+
|
81
|
+
Quarterfinals
|
82
|
+
[Wed Jun/24]
|
83
|
+
Rivière du Rempart 3-1 pen (1-1) La Cure Sylvester @ Auguste Vollaire Stadium, Central Flacq
|
84
|
+
Chamarel SC 3-4 Petite Rivière Noire @ Germain Comarmond Stadium, Bambous
|
85
|
+
[Thu Jun/25]
|
86
|
+
Pamplemousses 2-0 AS Port-Louis 2000 @ Auguste Vollaire Stadium, Central Flacq
|
87
|
+
[Sat Jun/27]
|
88
|
+
Savanne SC 3-6 Entente Boulet Rouge @ Anjalay Stadium, Mapou
|
89
|
+
|
90
|
+
Semifinals
|
91
|
+
[Wed Jul/15]
|
92
|
+
Rivière du Rempart 2-3 Petite Rivière Noire @ New George V Stadium, Curepipe
|
93
|
+
Entente Boulet Rouge 0-2 Pamplemousses @ Germain Comarmond Stadium, Bambous
|
94
|
+
|
95
|
+
Final
|
96
|
+
[Sun Jul/19]
|
97
|
+
Petite Rivière Noire 2-0 Pamplemousses @ New George V Stadium, Curepipe
|
98
|
+
TXT
|
99
|
+
|
100
|
+
clubs = parse( txt )
|
101
|
+
|
102
|
+
assert_equal Hash(
|
103
|
+
'Pointe-aux-Sables Mates' => 1,
|
104
|
+
'AS Port-Louis 2000' => 2,
|
105
|
+
'Rivière du Rempart' => 2,
|
106
|
+
'La Cure Sylvester' => 1,
|
107
|
+
'Chamarel SC' => 1,
|
108
|
+
'Petite Rivière Noire' => 3,
|
109
|
+
'Pamplemousses' => 3,
|
110
|
+
'Savanne SC' => 1,
|
111
|
+
'Entente Boulet Rouge' => 2), clubs
|
112
|
+
end
|
113
|
+
|
114
|
+
|
115
|
+
################
|
116
|
+
## helper
|
117
|
+
def parse( txt, lang: 'en' )
|
118
|
+
lines = txt.split( /\n+/ ) # note: removes/strips empty lines
|
119
|
+
pp lines
|
120
|
+
|
121
|
+
start = Date.new( 2017, 7, 1 )
|
122
|
+
|
123
|
+
DateFormats.lang = lang # e.g. 'en'
|
124
|
+
parser = SportDb::AutoConfParser.new( lines, start )
|
125
|
+
clubs = parser.parse
|
126
|
+
pp clubs
|
127
|
+
clubs
|
128
|
+
end
|
129
|
+
end # class AutoConfParser
|
data/test/test_package.rb
CHANGED
@@ -10,6 +10,20 @@ require 'helper'
|
|
10
10
|
|
11
11
|
class TestPackage < MiniTest::Test
|
12
12
|
|
13
|
+
def test_exclude
|
14
|
+
assert Datafile.match_exclude( '.build/' )
|
15
|
+
assert Datafile.match_exclude( '.git/' )
|
16
|
+
|
17
|
+
assert Datafile.match_exclude( '/.build/' )
|
18
|
+
assert Datafile.match_exclude( '/.git/' )
|
19
|
+
|
20
|
+
assert Datafile.match_exclude( '.build/leagues.txt' )
|
21
|
+
assert Datafile.match_exclude( '.git/leagues.txt' )
|
22
|
+
|
23
|
+
assert Datafile.match_exclude( '/.build/leagues.txt' )
|
24
|
+
assert Datafile.match_exclude( '/.git/leagues.txt' )
|
25
|
+
end
|
26
|
+
|
13
27
|
def test_read
|
14
28
|
[Datafile::DirPackage.new( '../../../openfootball/england' ),
|
15
29
|
Datafile::ZipPackage.new( 'tmp/england-master.zip' )].each do |eng|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-readers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-11-
|
11
|
+
date: 2019-11-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sportdb-config
|
@@ -97,6 +97,7 @@ files:
|
|
97
97
|
- lib/sportdb/readers.rb
|
98
98
|
- lib/sportdb/readers/club_props_reader.rb
|
99
99
|
- lib/sportdb/readers/conf_linter.rb
|
100
|
+
- lib/sportdb/readers/conf_parser_auto.rb
|
100
101
|
- lib/sportdb/readers/conf_reader.rb
|
101
102
|
- lib/sportdb/readers/datafile.rb
|
102
103
|
- lib/sportdb/readers/league_outline_reader.rb
|
@@ -106,6 +107,7 @@ files:
|
|
106
107
|
- lib/sportdb/readers/package.rb
|
107
108
|
- lib/sportdb/readers/version.rb
|
108
109
|
- test/helper.rb
|
110
|
+
- test/test_conf_parser_auto.rb
|
109
111
|
- test/test_match_parser.rb
|
110
112
|
- test/test_package.rb
|
111
113
|
- test/test_props.rb
|