sportdb-readers 0.3.6 → 0.3.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: aa4225ef65ba9a0bddadba178cae48f142b2894d
4
- data.tar.gz: '0966b042258895e134ce4236c1cf211588fd42d5'
3
+ metadata.gz: 0b82ac613158881351d512a4ffc9e7d15f34b133
4
+ data.tar.gz: 52251959f919b175feec37cadcc51e56c70e5037
5
5
  SHA512:
6
- metadata.gz: 79eec4198136a2368667f49cf0b782551cca03f1d5cfd7e93ad6cd35f818589e8b3e6e35f9c3516cc441cd454da54516377fb1f04100b14b930aac2e275e441c
7
- data.tar.gz: 3f599edf0c5aab6234c0bed6c8e73156b6ee9c844d944fb019aa906861abff067398dc2b0316e00aea1ed3545494aa9cfeb6cae0eea0465039503dc6476f5369
6
+ metadata.gz: 32c7921845dd7db5c2771a5941ab7ca0b27839eb88c80d71703354f6f533ca1c2eb99b47671e2462cbddcddb443fab3d0e115a4991f3d6155185a081d500796e
7
+ data.tar.gz: 072a747031829fb495773946c13ec6a43627c7b6de4b21b752c40eb2d1962c1fb3a047982fe83f1257be0b9be3f9115d0eed0d798de1b3dae2f93518e2ac02d3
data/Manifest.txt CHANGED
@@ -5,6 +5,7 @@ Rakefile
5
5
  lib/sportdb/readers.rb
6
6
  lib/sportdb/readers/club_props_reader.rb
7
7
  lib/sportdb/readers/conf_linter.rb
8
+ lib/sportdb/readers/conf_parser_auto.rb
8
9
  lib/sportdb/readers/conf_reader.rb
9
10
  lib/sportdb/readers/datafile.rb
10
11
  lib/sportdb/readers/league_outline_reader.rb
@@ -14,6 +15,7 @@ lib/sportdb/readers/match_reader.rb
14
15
  lib/sportdb/readers/package.rb
15
16
  lib/sportdb/readers/version.rb
16
17
  test/helper.rb
18
+ test/test_conf_parser_auto.rb
17
19
  test/test_match_parser.rb
18
20
  test/test_package.rb
19
21
  test/test_props.rb
@@ -13,6 +13,7 @@ require 'sportdb/sync'
13
13
  # our own code
14
14
  require 'sportdb/readers/version' # let version always go first
15
15
  require 'sportdb/readers/league_outline_reader'
16
+ require 'sportdb/readers/conf_parser_auto'
16
17
  require 'sportdb/readers/conf_reader'
17
18
  require 'sportdb/readers/conf_linter'
18
19
  require 'sportdb/readers/match_parser'
@@ -0,0 +1,129 @@
1
+ # encoding: utf-8
2
+
3
+ module SportDb
4
+
5
+
6
+ class AutoConfParser
7
+
8
+ include LogUtils::Logging
9
+
10
+ def initialize( lines, start )
11
+ @lines = lines ## todo/check: change to text instead of array of lines - why? why not?
12
+ @start = start
13
+ end
14
+
15
+ def parse
16
+ ## try to find all clubs in match schedule
17
+ @clubs = Hash.new(0) ## keep track of usage counter
18
+
19
+ @lines.each do |line|
20
+ if is_round?( line )
21
+ logger.info "skipping matched round line: >#{line}<"
22
+ elsif try_parse_game( line )
23
+ # do nothing here
24
+ else
25
+ logger.info "skipping line (no match found): >#{line}<"
26
+ end
27
+ end # lines.each
28
+
29
+ @clubs
30
+ end
31
+
32
+ def is_round?( line )
33
+ ## note: =~ return nil if not match found, and 0,1, etc for match
34
+ (line =~ SportDb.lang.regex_round) != nil
35
+ end
36
+
37
+ def try_parse_game( line )
38
+ # note: clone line; for possible test do NOT modify in place for now
39
+ # note: returns true if parsed, false if no match
40
+ parse_game( line.dup )
41
+ end
42
+
43
+ def parse_game( line )
44
+ logger.debug "parsing game (fixture) line: >#{line}<"
45
+
46
+ ## remove all protected text runs e.g. []
47
+ ## fix: add [ to end-of-line too
48
+
49
+ line = line.gsub( /\[
50
+ [^\]]+?
51
+ \]/x, '' ).strip
52
+ return true if line.empty? ## note: return true (for valid line with no match/clubs)
53
+
54
+ ## split by geo (@) - remove for now
55
+ values = line.split( '@' )
56
+ line = values[0]
57
+
58
+
59
+ ## try find date
60
+ date = find_date!( line, start: @start )
61
+ if date ## if found remove tagged run too; note using singular sub (NOT global gsub)
62
+ line = line.sub( /\[
63
+ [^\]]+?
64
+ \]/x, '' ).strip
65
+
66
+ return true if line.empty? ## note: return true (for valid line with no match/clubs)
67
+ end
68
+
69
+
70
+ scores = find_scores!( line )
71
+
72
+ logger.debug " line: >#{line}<"
73
+
74
+ line = line.sub( /\[
75
+ [^\]]+?
76
+ \]/x, '$$' ) # note: replace first score tag with $$
77
+ line = line.gsub( /\[
78
+ [^\]]+?
79
+ \]/x, '' ) # note: replace/remove all other score tags with nothing
80
+
81
+ ## clean-up remove all text run inside () or empty () too
82
+ line = line.gsub( /\(
83
+ [^)]*?
84
+ \)/x, '' )
85
+
86
+
87
+ ## check for more match separators e.g. - or vs for now
88
+ line = line.sub( / \s+
89
+ (-|vs|v)
90
+ \s+
91
+ /ix, '$$' )
92
+
93
+ values = line.split( '$$' )
94
+ values = values.map { |value| value.strip } ## strip spaces
95
+ values = values.select { |value| !value.empty? } ## remove empty strings
96
+
97
+ return true if values.size == 0 ## note: return true (for valid line with no match/clubs)
98
+ return false if values.size != 2
99
+
100
+ puts "(auto config) try matching clubs:"
101
+ pp values
102
+ @clubs[ values[0] ] += 1 ## update usage counters
103
+ @clubs[ values[1] ] += 1
104
+
105
+ true
106
+ end
107
+
108
+
109
+
110
+ def find_scores!( line, opts={} )
111
+ # note: always call after find_dates !!!
112
+ # scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
113
+ # -- note: score might have two digits too
114
+
115
+ finder = ScoresFinder.new
116
+ finder.find!( line, opts )
117
+ end
118
+
119
+ def find_date!( line, start: )
120
+ ## NB: lets us pass in start_at/end_at date (for event)
121
+ # for auto-complete year
122
+
123
+ # extract date from line
124
+ # and return it
125
+ # NB: side effect - removes date from line string
126
+ DateFormats.find!( line, start: start )
127
+ end
128
+ end # class AutoConfParser
129
+ end # module SportDb
@@ -22,7 +22,19 @@ module Datafile
22
22
 
23
23
 
24
24
 
25
- class DirPackage ## todo/check: find a better name e.g. UnzippedPackage, FilesystemPackage, etc. - why? why not?
25
+ ## exclude pattern
26
+ ## for now exclude all files in directories starting with a dot (e.g. .git/ or .github/ or .build/ etc.)
27
+ ## todo/check: rename to EXCLUDE_DOT_DIRS_RE - why? why not?
28
+ EXCLUDE_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
29
+ \.[a-zA-Z0-9_-]+ ## (almost) any name BUT must start with dot e.g. .git, .build, etc.
30
+ /
31
+ }x
32
+ def self.match_exclude( path, pattern: EXCLUDE_RE ) pattern.match( path ); end
33
+
34
+
35
+ class Package; end ## use a shared base class for DirPackage, ZipPackage, etc.
36
+
37
+ class DirPackage < Package ## todo/check: find a better name e.g. UnzippedPackage, FilesystemPackage, etc. - why? why not?
26
38
  class Entry
27
39
  def initialize( pack, path )
28
40
  @pack = pack ## parent package
@@ -38,6 +50,7 @@ end # class DirPackage::Entry
38
50
  attr_reader :name, :path
39
51
 
40
52
  def initialize( path )
53
+ ## todo/fix: expand_path ?! - why? why not? if you pass in ./ basename will be . and NOT directory name, for example!!!
41
54
  @path = path ## rename to root_path or base_path or somehting - why? why not?
42
55
 
43
56
  basename = File.basename( path ) ## note: ALWAYS keeps "extension"-like name if present (e.g. ./austria.zip => austria.zip)
@@ -49,7 +62,9 @@ end # class DirPackage::Entry
49
62
  ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
50
63
  Dir.glob( "#{@path}/**/{*,.*}.#{extension}" ).each do |path|
51
64
  ## todo/fix: (auto) skip and check for directories
52
- if pattern.match( path )
65
+ if EXCLUDE_RE.match( path )
66
+ ## note: skip dot dirs (e.g. .build/, .git/, etc.)
67
+ elsif pattern.match( path )
53
68
  yield( Entry.new( self, path ))
54
69
  else
55
70
  ## puts " skipping >#{path}<"
@@ -64,7 +79,7 @@ end # class DirPackage
64
79
 
65
80
 
66
81
  ## helper wrapper for datafiles in zips
67
- class ZipPackage
82
+ class ZipPackage < Package
68
83
  class Entry
69
84
  def initialize( pack, entry )
70
85
  @pack = pack
@@ -96,7 +111,9 @@ end # class ZipPackage::Entry
96
111
  if entry.directory?
97
112
  next ## skip
98
113
  elsif entry.file?
99
- if pattern.match( entry.name )
114
+ if EXCLUDE_RE.match( entry.name )
115
+ ## note: skip dot dirs (e.g. .build/, .git/, etc.)
116
+ elsif pattern.match( entry.name )
100
117
  yield( Entry.new( self, entry ) ) # wrap entry in uniform access interface / api
101
118
  else
102
119
  ## puts " skipping >#{entry.name}<"
@@ -137,8 +154,12 @@ private
137
154
  if entry.directory?
138
155
  next ## skip
139
156
  elsif entry.file?
140
- if pattern.match( entry.name )
157
+ if EXCLUDE_RE.match( entry.name )
158
+ ## note: skip dot dirs (e.g. .build/, .git/, etc.)
159
+ elsif pattern.match( entry.name )
141
160
  entries << entry
161
+ else
162
+ ## no match; skip too
142
163
  end
143
164
  else
144
165
  puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
@@ -14,8 +14,7 @@ module SportDb
14
14
  }x
15
15
 
16
16
  def initialize( path_or_pack )
17
- if path_or_pack.is_a?( Datafile::DirPackage ) ||
18
- path_or_pack.is_a?( Datafile::ZipPackage )
17
+ if path_or_pack.is_a?( Datafile::Package )
19
18
  @pack = path_or_pack
20
19
  else ## assume it's a (string) path
21
20
  path = path_or_pack
@@ -6,7 +6,7 @@ module Readers
6
6
 
7
7
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
8
8
  MINOR = 3
9
- PATCH = 6
9
+ PATCH = 7
10
10
  VERSION = [MAJOR,MINOR,PATCH].join('.')
11
11
 
12
12
  def self.version
@@ -0,0 +1,129 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_conf_parser_auto.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+
11
+
12
+ class TestAutoConfParser < MiniTest::Test
13
+
14
+ def test_eng
15
+ txt = <<TXT
16
+ Matchday 1
17
+
18
+ [Fri Aug/11]
19
+ Arsenal FC 4-3 Leicester City
20
+ [Sat Aug/12]
21
+ Watford FC 3-3 Liverpool FC
22
+ Chelsea FC 2-3 Burnley FC
23
+ Crystal Palace 0-3 Huddersfield Town
24
+ Everton FC 1-0 Stoke City
25
+ Southampton FC 0-0 Swansea City
26
+ West Bromwich Albion 1-0 AFC Bournemouth
27
+ Brighton & Hove Albion 0-2 Manchester City
28
+ [Sun Aug/13]
29
+ Newcastle United 0-2 Tottenham Hotspur
30
+ Manchester United 4-0 West Ham United
31
+
32
+
33
+ Matchday 2
34
+
35
+ [Sat Aug/19]
36
+ Swansea City 0-4 Manchester United
37
+ AFC Bournemouth 0-2 Watford FC
38
+ Burnley FC 0-1 West Bromwich Albion
39
+ Leicester City 2-0 Brighton & Hove Albion
40
+ Liverpool FC 1-0 Crystal Palace
41
+ Southampton FC 3-2 West Ham United
42
+ Stoke City 1-0 Arsenal FC
43
+ [Sun Aug/20]
44
+ Huddersfield Town 1-0 Newcastle United
45
+ Tottenham Hotspur 1-2 Chelsea FC
46
+ [Mon Aug/21]
47
+ Manchester City 1-1 Everton FC
48
+ TXT
49
+
50
+ clubs = parse( txt )
51
+
52
+ assert_equal Hash(
53
+ 'Arsenal FC' => 2,
54
+ 'Leicester City' => 2,
55
+ 'Watford FC' => 2,
56
+ 'Liverpool FC' => 2,
57
+ 'Chelsea FC' => 2,
58
+ 'Burnley FC' => 2,
59
+ 'Crystal Palace' => 2,
60
+ 'Huddersfield Town' => 2,
61
+ 'Everton FC' => 2,
62
+ 'Stoke City' => 2,
63
+ 'Southampton FC' => 2,
64
+ 'Swansea City' => 2,
65
+ 'West Bromwich Albion' => 2,
66
+ 'AFC Bournemouth' => 2,
67
+ 'Brighton & Hove Albion' => 2,
68
+ 'Manchester City' => 2,
69
+ 'Newcastle United' => 2,
70
+ 'Tottenham Hotspur' => 2,
71
+ 'Manchester United' => 2,
72
+ 'West Ham United' => 2 ), clubs
73
+ end # method test_parse
74
+
75
+ def test_mauritius
76
+ txt = <<TXT
77
+ Preliminary Round
78
+ [Mon Jun/22]
79
+ Pointe-aux-Sables Mates 3-4 AS Port-Louis 2000 @ St. François Xavier Stadium, Port Louis
80
+
81
+ Quarterfinals
82
+ [Wed Jun/24]
83
+ Rivière du Rempart 3-1 pen (1-1) La Cure Sylvester @ Auguste Vollaire Stadium, Central Flacq
84
+ Chamarel SC 3-4 Petite Rivière Noire @ Germain Comarmond Stadium, Bambous
85
+ [Thu Jun/25]
86
+ Pamplemousses 2-0 AS Port-Louis 2000 @ Auguste Vollaire Stadium, Central Flacq
87
+ [Sat Jun/27]
88
+ Savanne SC 3-6 Entente Boulet Rouge @ Anjalay Stadium, Mapou
89
+
90
+ Semifinals
91
+ [Wed Jul/15]
92
+ Rivière du Rempart 2-3 Petite Rivière Noire @ New George V Stadium, Curepipe
93
+ Entente Boulet Rouge 0-2 Pamplemousses @ Germain Comarmond Stadium, Bambous
94
+
95
+ Final
96
+ [Sun Jul/19]
97
+ Petite Rivière Noire 2-0 Pamplemousses @ New George V Stadium, Curepipe
98
+ TXT
99
+
100
+ clubs = parse( txt )
101
+
102
+ assert_equal Hash(
103
+ 'Pointe-aux-Sables Mates' => 1,
104
+ 'AS Port-Louis 2000' => 2,
105
+ 'Rivière du Rempart' => 2,
106
+ 'La Cure Sylvester' => 1,
107
+ 'Chamarel SC' => 1,
108
+ 'Petite Rivière Noire' => 3,
109
+ 'Pamplemousses' => 3,
110
+ 'Savanne SC' => 1,
111
+ 'Entente Boulet Rouge' => 2), clubs
112
+ end
113
+
114
+
115
+ ################
116
+ ## helper
117
+ def parse( txt, lang: 'en' )
118
+ lines = txt.split( /\n+/ ) # note: removes/strips empty lines
119
+ pp lines
120
+
121
+ start = Date.new( 2017, 7, 1 )
122
+
123
+ DateFormats.lang = lang # e.g. 'en'
124
+ parser = SportDb::AutoConfParser.new( lines, start )
125
+ clubs = parser.parse
126
+ pp clubs
127
+ clubs
128
+ end
129
+ end # class AutoConfParser
data/test/test_package.rb CHANGED
@@ -10,6 +10,20 @@ require 'helper'
10
10
 
11
11
  class TestPackage < MiniTest::Test
12
12
 
13
+ def test_exclude
14
+ assert Datafile.match_exclude( '.build/' )
15
+ assert Datafile.match_exclude( '.git/' )
16
+
17
+ assert Datafile.match_exclude( '/.build/' )
18
+ assert Datafile.match_exclude( '/.git/' )
19
+
20
+ assert Datafile.match_exclude( '.build/leagues.txt' )
21
+ assert Datafile.match_exclude( '.git/leagues.txt' )
22
+
23
+ assert Datafile.match_exclude( '/.build/leagues.txt' )
24
+ assert Datafile.match_exclude( '/.git/leagues.txt' )
25
+ end
26
+
13
27
  def test_read
14
28
  [Datafile::DirPackage.new( '../../../openfootball/england' ),
15
29
  Datafile::ZipPackage.new( 'tmp/england-master.zip' )].each do |eng|
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-readers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.6
4
+ version: 0.3.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-22 00:00:00.000000000 Z
11
+ date: 2019-11-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sportdb-config
@@ -97,6 +97,7 @@ files:
97
97
  - lib/sportdb/readers.rb
98
98
  - lib/sportdb/readers/club_props_reader.rb
99
99
  - lib/sportdb/readers/conf_linter.rb
100
+ - lib/sportdb/readers/conf_parser_auto.rb
100
101
  - lib/sportdb/readers/conf_reader.rb
101
102
  - lib/sportdb/readers/datafile.rb
102
103
  - lib/sportdb/readers/league_outline_reader.rb
@@ -106,6 +107,7 @@ files:
106
107
  - lib/sportdb/readers/package.rb
107
108
  - lib/sportdb/readers/version.rb
108
109
  - test/helper.rb
110
+ - test/test_conf_parser_auto.rb
109
111
  - test/test_match_parser.rb
110
112
  - test/test_package.rb
111
113
  - test/test_props.rb