sportdb-readers 0.3.6 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: aa4225ef65ba9a0bddadba178cae48f142b2894d
4
- data.tar.gz: '0966b042258895e134ce4236c1cf211588fd42d5'
3
+ metadata.gz: 0b82ac613158881351d512a4ffc9e7d15f34b133
4
+ data.tar.gz: 52251959f919b175feec37cadcc51e56c70e5037
5
5
  SHA512:
6
- metadata.gz: 79eec4198136a2368667f49cf0b782551cca03f1d5cfd7e93ad6cd35f818589e8b3e6e35f9c3516cc441cd454da54516377fb1f04100b14b930aac2e275e441c
7
- data.tar.gz: 3f599edf0c5aab6234c0bed6c8e73156b6ee9c844d944fb019aa906861abff067398dc2b0316e00aea1ed3545494aa9cfeb6cae0eea0465039503dc6476f5369
6
+ metadata.gz: 32c7921845dd7db5c2771a5941ab7ca0b27839eb88c80d71703354f6f533ca1c2eb99b47671e2462cbddcddb443fab3d0e115a4991f3d6155185a081d500796e
7
+ data.tar.gz: 072a747031829fb495773946c13ec6a43627c7b6de4b21b752c40eb2d1962c1fb3a047982fe83f1257be0b9be3f9115d0eed0d798de1b3dae2f93518e2ac02d3
data/Manifest.txt CHANGED
@@ -5,6 +5,7 @@ Rakefile
5
5
  lib/sportdb/readers.rb
6
6
  lib/sportdb/readers/club_props_reader.rb
7
7
  lib/sportdb/readers/conf_linter.rb
8
+ lib/sportdb/readers/conf_parser_auto.rb
8
9
  lib/sportdb/readers/conf_reader.rb
9
10
  lib/sportdb/readers/datafile.rb
10
11
  lib/sportdb/readers/league_outline_reader.rb
@@ -14,6 +15,7 @@ lib/sportdb/readers/match_reader.rb
14
15
  lib/sportdb/readers/package.rb
15
16
  lib/sportdb/readers/version.rb
16
17
  test/helper.rb
18
+ test/test_conf_parser_auto.rb
17
19
  test/test_match_parser.rb
18
20
  test/test_package.rb
19
21
  test/test_props.rb
@@ -13,6 +13,7 @@ require 'sportdb/sync'
13
13
  # our own code
14
14
  require 'sportdb/readers/version' # let version always go first
15
15
  require 'sportdb/readers/league_outline_reader'
16
+ require 'sportdb/readers/conf_parser_auto'
16
17
  require 'sportdb/readers/conf_reader'
17
18
  require 'sportdb/readers/conf_linter'
18
19
  require 'sportdb/readers/match_parser'
@@ -0,0 +1,129 @@
1
+ # encoding: utf-8
2
+
3
+ module SportDb
4
+
5
+
6
+ class AutoConfParser
7
+
8
+ include LogUtils::Logging
9
+
10
+ def initialize( lines, start )
11
+ @lines = lines ## todo/check: change to text instead of array of lines - why? why not?
12
+ @start = start
13
+ end
14
+
15
+ def parse
16
+ ## try to find all clubs in match schedule
17
+ @clubs = Hash.new(0) ## keep track of usage counter
18
+
19
+ @lines.each do |line|
20
+ if is_round?( line )
21
+ logger.info "skipping matched round line: >#{line}<"
22
+ elsif try_parse_game( line )
23
+ # do nothing here
24
+ else
25
+ logger.info "skipping line (no match found): >#{line}<"
26
+ end
27
+ end # lines.each
28
+
29
+ @clubs
30
+ end
31
+
32
+ def is_round?( line )
33
+ ## note: =~ return nil if not match found, and 0,1, etc for match
34
+ (line =~ SportDb.lang.regex_round) != nil
35
+ end
36
+
37
+ def try_parse_game( line )
38
+ # note: clone line; for possible test do NOT modify in place for now
39
+ # note: returns true if parsed, false if no match
40
+ parse_game( line.dup )
41
+ end
42
+
43
+ def parse_game( line )
44
+ logger.debug "parsing game (fixture) line: >#{line}<"
45
+
46
+ ## remove all protected text runs e.g. []
47
+ ## fix: add [ to end-of-line too
48
+
49
+ line = line.gsub( /\[
50
+ [^\]]+?
51
+ \]/x, '' ).strip
52
+ return true if line.empty? ## note: return true (for valid line with no match/clubs)
53
+
54
+ ## split by geo (@) - remove for now
55
+ values = line.split( '@' )
56
+ line = values[0]
57
+
58
+
59
+ ## try find date
60
+ date = find_date!( line, start: @start )
61
+ if date ## if found remove tagged run too; note using singular sub (NOT global gsub)
62
+ line = line.sub( /\[
63
+ [^\]]+?
64
+ \]/x, '' ).strip
65
+
66
+ return true if line.empty? ## note: return true (for valid line with no match/clubs)
67
+ end
68
+
69
+
70
+ scores = find_scores!( line )
71
+
72
+ logger.debug " line: >#{line}<"
73
+
74
+ line = line.sub( /\[
75
+ [^\]]+?
76
+ \]/x, '$$' ) # note: replace first score tag with $$
77
+ line = line.gsub( /\[
78
+ [^\]]+?
79
+ \]/x, '' ) # note: replace/remove all other score tags with nothing
80
+
81
+ ## clean-up remove all text run inside () or empty () too
82
+ line = line.gsub( /\(
83
+ [^)]*?
84
+ \)/x, '' )
85
+
86
+
87
+ ## check for more match separators e.g. - or vs for now
88
+ line = line.sub( / \s+
89
+ (-|vs|v)
90
+ \s+
91
+ /ix, '$$' )
92
+
93
+ values = line.split( '$$' )
94
+ values = values.map { |value| value.strip } ## strip spaces
95
+ values = values.select { |value| !value.empty? } ## remove empty strings
96
+
97
+ return true if values.size == 0 ## note: return true (for valid line with no match/clubs)
98
+ return false if values.size != 2
99
+
100
+ puts "(auto config) try matching clubs:"
101
+ pp values
102
+ @clubs[ values[0] ] += 1 ## update usage counters
103
+ @clubs[ values[1] ] += 1
104
+
105
+ true
106
+ end
107
+
108
+
109
+
110
+ def find_scores!( line, opts={} )
111
+ # note: always call after find_dates !!!
112
+ # scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
113
+ # -- note: score might have two digits too
114
+
115
+ finder = ScoresFinder.new
116
+ finder.find!( line, opts )
117
+ end
118
+
119
+ def find_date!( line, start: )
120
+ ## NB: lets us pass in start_at/end_at date (for event)
121
+ # for auto-complete year
122
+
123
+ # extract date from line
124
+ # and return it
125
+ # NB: side effect - removes date from line string
126
+ DateFormats.find!( line, start: start )
127
+ end
128
+ end # class AutoConfParser
129
+ end # module SportDb
@@ -22,7 +22,19 @@ module Datafile
22
22
 
23
23
 
24
24
 
25
- class DirPackage ## todo/check: find a better name e.g. UnzippedPackage, FilesystemPackage, etc. - why? why not?
25
+ ## exclude pattern
26
+ ## for now exclude all files in directories starting with a dot (e.g. .git/ or .github/ or .build/ etc.)
27
+ ## todo/check: rename to EXCLUDE_DOT_DIRS_RE - why? why not?
28
+ EXCLUDE_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
29
+ \.[a-zA-Z0-9_-]+ ## (almost) any name BUT must start with dot e.g. .git, .build, etc.
30
+ /
31
+ }x
32
+ def self.match_exclude( path, pattern: EXCLUDE_RE ) pattern.match( path ); end
33
+
34
+
35
+ class Package; end ## use a shared base class for DirPackage, ZipPackage, etc.
36
+
37
+ class DirPackage < Package ## todo/check: find a better name e.g. UnzippedPackage, FilesystemPackage, etc. - why? why not?
26
38
  class Entry
27
39
  def initialize( pack, path )
28
40
  @pack = pack ## parent package
@@ -38,6 +50,7 @@ end # class DirPackage::Entry
38
50
  attr_reader :name, :path
39
51
 
40
52
  def initialize( path )
53
+ ## todo/fix: expand_path ?! - why? why not? if you pass in ./ basename will be . and NOT directory name, for example!!!
41
54
  @path = path ## rename to root_path or base_path or somehting - why? why not?
42
55
 
43
56
  basename = File.basename( path ) ## note: ALWAYS keeps "extension"-like name if present (e.g. ./austria.zip => austria.zip)
@@ -49,7 +62,9 @@ end # class DirPackage::Entry
49
62
  ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
50
63
  Dir.glob( "#{@path}/**/{*,.*}.#{extension}" ).each do |path|
51
64
  ## todo/fix: (auto) skip and check for directories
52
- if pattern.match( path )
65
+ if EXCLUDE_RE.match( path )
66
+ ## note: skip dot dirs (e.g. .build/, .git/, etc.)
67
+ elsif pattern.match( path )
53
68
  yield( Entry.new( self, path ))
54
69
  else
55
70
  ## puts " skipping >#{path}<"
@@ -64,7 +79,7 @@ end # class DirPackage
64
79
 
65
80
 
66
81
  ## helper wrapper for datafiles in zips
67
- class ZipPackage
82
+ class ZipPackage < Package
68
83
  class Entry
69
84
  def initialize( pack, entry )
70
85
  @pack = pack
@@ -96,7 +111,9 @@ end # class ZipPackage::Entry
96
111
  if entry.directory?
97
112
  next ## skip
98
113
  elsif entry.file?
99
- if pattern.match( entry.name )
114
+ if EXCLUDE_RE.match( entry.name )
115
+ ## note: skip dot dirs (e.g. .build/, .git/, etc.)
116
+ elsif pattern.match( entry.name )
100
117
  yield( Entry.new( self, entry ) ) # wrap entry in uniform access interface / api
101
118
  else
102
119
  ## puts " skipping >#{entry.name}<"
@@ -137,8 +154,12 @@ private
137
154
  if entry.directory?
138
155
  next ## skip
139
156
  elsif entry.file?
140
- if pattern.match( entry.name )
157
+ if EXCLUDE_RE.match( entry.name )
158
+ ## note: skip dot dirs (e.g. .build/, .git/, etc.)
159
+ elsif pattern.match( entry.name )
141
160
  entries << entry
161
+ else
162
+ ## no match; skip too
142
163
  end
143
164
  else
144
165
  puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
@@ -14,8 +14,7 @@ module SportDb
14
14
  }x
15
15
 
16
16
  def initialize( path_or_pack )
17
- if path_or_pack.is_a?( Datafile::DirPackage ) ||
18
- path_or_pack.is_a?( Datafile::ZipPackage )
17
+ if path_or_pack.is_a?( Datafile::Package )
19
18
  @pack = path_or_pack
20
19
  else ## assume it's a (string) path
21
20
  path = path_or_pack
@@ -6,7 +6,7 @@ module Readers
6
6
 
7
7
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
8
8
  MINOR = 3
9
- PATCH = 6
9
+ PATCH = 7
10
10
  VERSION = [MAJOR,MINOR,PATCH].join('.')
11
11
 
12
12
  def self.version
@@ -0,0 +1,129 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_conf_parser_auto.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+
11
+
12
+ class TestAutoConfParser < MiniTest::Test
13
+
14
+ def test_eng
15
+ txt = <<TXT
16
+ Matchday 1
17
+
18
+ [Fri Aug/11]
19
+ Arsenal FC 4-3 Leicester City
20
+ [Sat Aug/12]
21
+ Watford FC 3-3 Liverpool FC
22
+ Chelsea FC 2-3 Burnley FC
23
+ Crystal Palace 0-3 Huddersfield Town
24
+ Everton FC 1-0 Stoke City
25
+ Southampton FC 0-0 Swansea City
26
+ West Bromwich Albion 1-0 AFC Bournemouth
27
+ Brighton & Hove Albion 0-2 Manchester City
28
+ [Sun Aug/13]
29
+ Newcastle United 0-2 Tottenham Hotspur
30
+ Manchester United 4-0 West Ham United
31
+
32
+
33
+ Matchday 2
34
+
35
+ [Sat Aug/19]
36
+ Swansea City 0-4 Manchester United
37
+ AFC Bournemouth 0-2 Watford FC
38
+ Burnley FC 0-1 West Bromwich Albion
39
+ Leicester City 2-0 Brighton & Hove Albion
40
+ Liverpool FC 1-0 Crystal Palace
41
+ Southampton FC 3-2 West Ham United
42
+ Stoke City 1-0 Arsenal FC
43
+ [Sun Aug/20]
44
+ Huddersfield Town 1-0 Newcastle United
45
+ Tottenham Hotspur 1-2 Chelsea FC
46
+ [Mon Aug/21]
47
+ Manchester City 1-1 Everton FC
48
+ TXT
49
+
50
+ clubs = parse( txt )
51
+
52
+ assert_equal Hash(
53
+ 'Arsenal FC' => 2,
54
+ 'Leicester City' => 2,
55
+ 'Watford FC' => 2,
56
+ 'Liverpool FC' => 2,
57
+ 'Chelsea FC' => 2,
58
+ 'Burnley FC' => 2,
59
+ 'Crystal Palace' => 2,
60
+ 'Huddersfield Town' => 2,
61
+ 'Everton FC' => 2,
62
+ 'Stoke City' => 2,
63
+ 'Southampton FC' => 2,
64
+ 'Swansea City' => 2,
65
+ 'West Bromwich Albion' => 2,
66
+ 'AFC Bournemouth' => 2,
67
+ 'Brighton & Hove Albion' => 2,
68
+ 'Manchester City' => 2,
69
+ 'Newcastle United' => 2,
70
+ 'Tottenham Hotspur' => 2,
71
+ 'Manchester United' => 2,
72
+ 'West Ham United' => 2 ), clubs
73
+ end # method test_parse
74
+
75
+ def test_mauritius
76
+ txt = <<TXT
77
+ Preliminary Round
78
+ [Mon Jun/22]
79
+ Pointe-aux-Sables Mates 3-4 AS Port-Louis 2000 @ St. François Xavier Stadium, Port Louis
80
+
81
+ Quarterfinals
82
+ [Wed Jun/24]
83
+ Rivière du Rempart 3-1 pen (1-1) La Cure Sylvester @ Auguste Vollaire Stadium, Central Flacq
84
+ Chamarel SC 3-4 Petite Rivière Noire @ Germain Comarmond Stadium, Bambous
85
+ [Thu Jun/25]
86
+ Pamplemousses 2-0 AS Port-Louis 2000 @ Auguste Vollaire Stadium, Central Flacq
87
+ [Sat Jun/27]
88
+ Savanne SC 3-6 Entente Boulet Rouge @ Anjalay Stadium, Mapou
89
+
90
+ Semifinals
91
+ [Wed Jul/15]
92
+ Rivière du Rempart 2-3 Petite Rivière Noire @ New George V Stadium, Curepipe
93
+ Entente Boulet Rouge 0-2 Pamplemousses @ Germain Comarmond Stadium, Bambous
94
+
95
+ Final
96
+ [Sun Jul/19]
97
+ Petite Rivière Noire 2-0 Pamplemousses @ New George V Stadium, Curepipe
98
+ TXT
99
+
100
+ clubs = parse( txt )
101
+
102
+ assert_equal Hash(
103
+ 'Pointe-aux-Sables Mates' => 1,
104
+ 'AS Port-Louis 2000' => 2,
105
+ 'Rivière du Rempart' => 2,
106
+ 'La Cure Sylvester' => 1,
107
+ 'Chamarel SC' => 1,
108
+ 'Petite Rivière Noire' => 3,
109
+ 'Pamplemousses' => 3,
110
+ 'Savanne SC' => 1,
111
+ 'Entente Boulet Rouge' => 2), clubs
112
+ end
113
+
114
+
115
+ ################
116
+ ## helper
117
+ def parse( txt, lang: 'en' )
118
+ lines = txt.split( /\n+/ ) # note: removes/strips empty lines
119
+ pp lines
120
+
121
+ start = Date.new( 2017, 7, 1 )
122
+
123
+ DateFormats.lang = lang # e.g. 'en'
124
+ parser = SportDb::AutoConfParser.new( lines, start )
125
+ clubs = parser.parse
126
+ pp clubs
127
+ clubs
128
+ end
129
+ end # class AutoConfParser
data/test/test_package.rb CHANGED
@@ -10,6 +10,20 @@ require 'helper'
10
10
 
11
11
  class TestPackage < MiniTest::Test
12
12
 
13
+ def test_exclude
14
+ assert Datafile.match_exclude( '.build/' )
15
+ assert Datafile.match_exclude( '.git/' )
16
+
17
+ assert Datafile.match_exclude( '/.build/' )
18
+ assert Datafile.match_exclude( '/.git/' )
19
+
20
+ assert Datafile.match_exclude( '.build/leagues.txt' )
21
+ assert Datafile.match_exclude( '.git/leagues.txt' )
22
+
23
+ assert Datafile.match_exclude( '/.build/leagues.txt' )
24
+ assert Datafile.match_exclude( '/.git/leagues.txt' )
25
+ end
26
+
13
27
  def test_read
14
28
  [Datafile::DirPackage.new( '../../../openfootball/england' ),
15
29
  Datafile::ZipPackage.new( 'tmp/england-master.zip' )].each do |eng|
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-readers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.6
4
+ version: 0.3.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-22 00:00:00.000000000 Z
11
+ date: 2019-11-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sportdb-config
@@ -97,6 +97,7 @@ files:
97
97
  - lib/sportdb/readers.rb
98
98
  - lib/sportdb/readers/club_props_reader.rb
99
99
  - lib/sportdb/readers/conf_linter.rb
100
+ - lib/sportdb/readers/conf_parser_auto.rb
100
101
  - lib/sportdb/readers/conf_reader.rb
101
102
  - lib/sportdb/readers/datafile.rb
102
103
  - lib/sportdb/readers/league_outline_reader.rb
@@ -106,6 +107,7 @@ files:
106
107
  - lib/sportdb/readers/package.rb
107
108
  - lib/sportdb/readers/version.rb
108
109
  - test/helper.rb
110
+ - test/test_conf_parser_auto.rb
109
111
  - test/test_match_parser.rb
110
112
  - test/test_package.rb
111
113
  - test/test_props.rb