sportdb-formats 0.4.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +24 -4
  3. data/Rakefile +3 -3
  4. data/lib/sportdb/formats.rb +25 -2
  5. data/lib/sportdb/formats/config.rb +40 -0
  6. data/lib/sportdb/formats/datafile.rb +42 -62
  7. data/lib/sportdb/formats/datafile_package.rb +160 -0
  8. data/lib/sportdb/formats/match/conf_parser.rb +120 -0
  9. data/lib/sportdb/formats/match/mapper.rb +319 -0
  10. data/lib/sportdb/formats/match/mapper_teams.rb +23 -0
  11. data/lib/sportdb/formats/match/match_parser.rb +659 -0
  12. data/lib/sportdb/formats/match/match_parser_auto_conf.rb +202 -0
  13. data/lib/sportdb/formats/name_helper.rb +84 -0
  14. data/lib/sportdb/formats/outline_reader.rb +53 -15
  15. data/lib/sportdb/formats/package.rb +172 -160
  16. data/lib/sportdb/formats/parser_helper.rb +81 -0
  17. data/lib/sportdb/formats/score/score_formats.rb +180 -0
  18. data/lib/sportdb/formats/score/score_parser.rb +196 -0
  19. data/lib/sportdb/formats/structs/country.rb +1 -43
  20. data/lib/sportdb/formats/structs/group.rb +25 -0
  21. data/lib/sportdb/formats/structs/league.rb +7 -26
  22. data/lib/sportdb/formats/structs/match.rb +72 -51
  23. data/lib/sportdb/formats/structs/round.rb +14 -4
  24. data/lib/sportdb/formats/structs/season.rb +3 -0
  25. data/lib/sportdb/formats/structs/team.rb +144 -0
  26. data/lib/sportdb/formats/version.rb +2 -2
  27. data/test/helper.rb +83 -1
  28. data/test/test_clubs.rb +3 -3
  29. data/test/test_conf.rb +65 -0
  30. data/test/test_datafile.rb +21 -30
  31. data/test/test_match.rb +0 -6
  32. data/test/test_match_auto.rb +72 -0
  33. data/test/test_match_auto_champs.rb +45 -0
  34. data/test/test_match_auto_euro.rb +37 -0
  35. data/test/test_match_auto_worldcup.rb +61 -0
  36. data/test/test_match_champs.rb +27 -0
  37. data/test/test_match_eng.rb +26 -0
  38. data/test/test_match_euro.rb +27 -0
  39. data/test/test_match_worldcup.rb +27 -0
  40. data/test/test_name_helper.rb +67 -0
  41. data/test/test_outline_reader.rb +3 -3
  42. data/test/test_package.rb +21 -2
  43. data/test/test_package_match.rb +78 -0
  44. data/test/test_scores.rb +67 -51
  45. metadata +32 -12
  46. data/lib/sportdb/formats/scores.rb +0 -253
  47. data/lib/sportdb/formats/structs/club.rb +0 -213
  48. data/test/test_club_helpers.rb +0 -63
  49. data/test/test_datafile_match.rb +0 -65
@@ -0,0 +1,81 @@
1
+
2
+ module SportDb
3
+ module ParserHelper
4
+
5
+
6
+ def read_lines( txt ) ## todo/check: add alias preproc_lines or build_lines or prep_lines etc. - why? why not?
7
+ ## returns an array of lines with comments and empty lines striped / removed
8
+ lines = []
9
+ txt.each_line do |line| ## preprocess
10
+ line = line.strip
11
+
12
+ next if line.empty? || line.start_with?('#') ### skip empty lines and comments
13
+ line = line.sub( /#.*/, '' ).strip ### cut-off end-of line comments too
14
+ lines << line
15
+ end
16
+ lines
17
+ end
18
+
19
+
20
+ def is_round?( line )
21
+ ## note: =~ return nil if not match found, and 0,1, etc for match
22
+ (line =~ SportDb.lang.regex_round) != nil
23
+ end
24
+
25
+ def is_knockout_round?( line )
26
+
27
+ ## todo: check for adding ignore case for regex (e.g. 1st leg/1st Leg)
28
+
29
+ if line =~ SportDb.lang.regex_leg1
30
+ logger.debug " two leg knockout; skip knockout flag on first leg"
31
+ false
32
+ elsif line =~ SportDb.lang.regex_knockout_round
33
+ logger.debug " setting knockout flag to true"
34
+ true
35
+ elsif line =~ /K\.O\.|K\.o\.|Knockout/
36
+ ## NB: add two language independent markers, that is, K.O. and Knockout
37
+ logger.debug " setting knockout flag to true (lang independent marker)"
38
+ true
39
+ else
40
+ false
41
+ end
42
+ end
43
+
44
+ def is_round_def?( line )
45
+ ## must include bar (|) marker (make required)
46
+ ## todo/fix: use split('|') and check is_round? only on left hand side!!!! not whole line
47
+ line =~ /\|/ && is_round?( line )
48
+ end
49
+
50
+
51
+
52
+
53
+ def is_group?( line )
54
+ # note: check after is_round? (round may contain group reference!)
55
+ ## note: =~ return nil if not match found, and 0,1, etc for match
56
+ (line =~ SportDb.lang.regex_group) != nil
57
+ end
58
+
59
+ def is_group_def?( line )
60
+ # note: check after is_round? (round may contain group reference!)
61
+ ## must include bar (|) marker (make required)
62
+ ## todo/fix: use split('|') and check is_round? only on left hand side!!!! not whole line
63
+ line =~ /\|/ && is_group?( line )
64
+ end
65
+
66
+
67
+ def is_goals?( line )
68
+ # check if is goals line
69
+ # e.g. looks like
70
+ # Neymar 29', 71' (pen.) Oscar 90+1'; Marcelo 11' (o.g.)
71
+ # check for
72
+ # <space>90' or
73
+ # <space>90+1'
74
+
75
+ line =~ /[ ](\d{1,3}\+)?\d{1,3}'/
76
+ end
77
+
78
+
79
+ end # module ParserHelper
80
+ end # module SportDb
81
+
@@ -0,0 +1,180 @@
1
+
2
+ module ScoreFormats
3
+
4
+ ## todo/check: use ‹› (unicode chars) to mark optional parts in regex constant name - why? why not?
5
+
6
+ #####
7
+ # english helpers (penalty, extra time, ...)
8
+ P_EN = '(?: p | pen\.? | pso )' # e.g. p, pen, pen., PSO, etc.
9
+ ET_EN = '(?: aet | a\.e\.t\.? )' # note: make last . optional (e.g a.e.t) allowed too
10
+
11
+
12
+ ## e.g. 3-4 pen. 2-2 a.e.t. (1-1, 1-1) or
13
+ ## 3-4 pen. 2-2 a.e.t. (1-1, ) or
14
+ ## 3-4 pen. 2-2 a.e.t. (1-1) or
15
+ ## 2-2 a.e.t. (1-1, 1-1) or
16
+ ## 2-2 a.e.t. (1-1, ) or
17
+ ## 2-2 a.e.t. (1-1)
18
+
19
+ EN__P_ET_FT_HT__RE = /\b
20
+ (?:
21
+ (?<score1p>\d{1,2})
22
+ [ ]* - [ ]* # note: sep in optional block; CANNOT use a reference
23
+ (?<score2p>\d{1,2})
24
+ [ ]* #{P_EN} [ ]*
25
+ )? # note: make penalty (P) score optional for now
26
+ (?<score1et>\d{1,2})
27
+ [ ]* - [ ]*
28
+ (?<score2et>\d{1,2})
29
+ [ ]* #{ET_EN} [ ]*
30
+ \(
31
+ [ ]*
32
+ (?<score1>\d{1,2})
33
+ [ ]* - [ ]*
34
+ (?<score2>\d{1,2})
35
+ [ ]*
36
+ (?:
37
+ , [ ]*
38
+ (?: (?<score1i>\d{1,2})
39
+ [ ]* - [ ]*
40
+ (?<score2i>\d{1,2})
41
+ [ ]*
42
+ )?
43
+ )? # note: make half time (HT) score optional for now
44
+ \)
45
+ (?=[ \]]|$)/xi ## todo/check: remove loakahead assertion here - why require space?
46
+ ## note: \b works only after non-alphanum e.g. )
47
+
48
+ ###
49
+ ## special case for case WITHOUT extra time!!
50
+ ## same as above (but WITHOUT extra time and pen required)
51
+ EN__P_FT_HT__RE = /\b
52
+ (?<score1p>\d{1,2})
53
+ [ ]* - [ ]* # note: sep in optional block; CANNOT use a reference
54
+ (?<score2p>\d{1,2})
55
+ [ ]* #{P_EN} [ ]*
56
+ \(
57
+ [ ]*
58
+ (?<score1>\d{1,2})
59
+ [ ]* - [ ]*
60
+ (?<score2>\d{1,2})
61
+ [ ]*
62
+ (?:
63
+ , [ ]*
64
+ (?: (?<score1i>\d{1,2})
65
+ [ ]* - [ ]*
66
+ (?<score2i>\d{1,2})
67
+ [ ]*
68
+ )?
69
+ )? # note: make half time (HT) score optional for now
70
+ \)
71
+ (?=[ \]]|$)/xi ## todo/check: remove loakahead assertion here - why require space?
72
+ ## note: \b works only after non-alphanum e.g. )
73
+
74
+
75
+
76
+ ## e.g. 2-1 (1-1) or
77
+ ## 2-1
78
+ ## note: for now add here used in Brazil / Portugal
79
+ ## e.g 1x1 or 1X1 or 0x2 or 3x3 too
80
+ ## todo/check/fix: move to its own use PT__FT_HT etc!!!!
81
+
82
+ EN__FT_HT__RE = /\b
83
+ (?<score1>\d{1,2})
84
+ [ ]* (?<sep>[x-]) [ ]*
85
+ (?<score2>\d{1,2})
86
+ (?:
87
+ [ ]* \( [ ]*
88
+ (?<score1i>\d{1,2})
89
+ [ ]* \k<sep> [ ]*
90
+ (?<score2i>\d{1,2})
91
+ [ ]* \)
92
+ )? # note: make half time (HT) score optional for now
93
+ (?=[ \]]|$)/xi ## todo/check: remove loakahead assertion here - why require space?
94
+ ## note: \b works only after non-alphanum e.g. )
95
+
96
+ #####
97
+ # deutsch / german helpers (penalty, extra time, ...)
98
+ ## todo add more marker e.g. im Elf. or such!!!
99
+ P_DE = '(?: ie | i\.e\.? )' # e.g. iE, i.E., i.E etc.
100
+ ET_DE = '(?: nv | n\.v\.? )' # e.g. nV, n.V., n.V etc.
101
+
102
+ ## support all-in-one "literal form e.g.
103
+ # 2:2 (1:1, 1:0) n.V. 5:1 i.E. or
104
+ # 2-2 (1-1, 1-0) n.V. 5-1 i.E.
105
+ DE__ET_FT_HT_P__RE = /\b
106
+ (?<score1et>\d{1,2})
107
+ [ ]* (?<sep>[:-]) [ ]* ## note: for now allow : or - as separator!!
108
+ (?<score2et>\d{1,2})
109
+ [ ]*
110
+ \(
111
+ [ ]*
112
+ (?<score1>\d{1,2})
113
+ [ ]* \k<sep> [ ]*
114
+ (?<score2>\d{1,2})
115
+ [ ]*
116
+ (?:
117
+ , [ ]*
118
+ (?:
119
+ (?<score1i>\d{1,2})
120
+ [ ]* \k<sep> [ ]*
121
+ (?<score2i>\d{1,2})
122
+ [ ]*
123
+ )?
124
+ )? # note: make half time (HT) score optional for now
125
+ \)
126
+ [ ]*
127
+ #{ET_DE}
128
+ (?:
129
+ [ ]*
130
+ (?<score1p>\d{1,2})
131
+ [ ]* \k<sep> [ ]*
132
+ (?<score2p>\d{1,2})
133
+ [ ]*
134
+ #{P_DE}
135
+ )? # note: make penalty (P) score optional for now
136
+ (?=[ \]]|$)
137
+ /xi ## todo/check: remove loakahead assertion here - why require space?
138
+ ## note: \b works only after non-alphanum e.g. )
139
+
140
+
141
+ ## e.g. 2:1 (1:1) or
142
+ ## 2-1 (1-1) or
143
+ ## 2:1 or
144
+ ## 2-1
145
+ DE__FT_HT__RE = /\b
146
+ (?<score1>\d{1,2})
147
+ [ ]* (?<sep>[:-]) [ ]*
148
+ (?<score2>\d{1,2})
149
+ (?:
150
+ [ ]* \( [ ]*
151
+ (?<score1i>\d{1,2})
152
+ [ ]* \k<sep> [ ]*
153
+ (?<score2i>\d{1,2})
154
+ [ ]* \)
155
+ )? # note: make half time (HT) score optional for now
156
+ (?=[ \]]|$)/x ## todo/check: remove loakahead assertion here - why require space?
157
+ ## note: \b works only after non-alphanum e.g. )
158
+
159
+
160
+ #############################################
161
+ # map tables - 1) regex, 2) tag - note: order matters; first come-first matched/served
162
+
163
+
164
+ FORMATS_EN = [
165
+ [ EN__P_ET_FT_HT__RE, '[SCORE.EN__P?_ET_(FT_HT?)]' ], # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
166
+ [ EN__P_FT_HT__RE, '[SCORE.EN__P_(FT_HT?)]' ], # e.g. 5-1 pen. (1-1)
167
+ [ EN__FT_HT__RE, '[SCORE.EN__FT_(HT)?]' ], # e.g. 1-1 (1-0)
168
+ ]
169
+
170
+ FORMATS_DE = [
171
+ [ DE__ET_FT_HT_P__RE, '[SCORE.DE__ET_(FT_HT?)_P?]' ], # e.g. 2:2 (1:1, 1:0) n.V. 5:1 i.E.
172
+ [ DE__FT_HT__RE, '[SCORE.DE__FT_(HT)?]' ], # e.g. 1:1 (1:0)
173
+ ]
174
+
175
+ FORMATS = {
176
+ en: FORMATS_EN,
177
+ de: FORMATS_DE,
178
+ }
179
+
180
+ end # module ScoreFormats
@@ -0,0 +1,196 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ ## note: lets follow the model of DateFormats -see DateFormats gem for more!!!
5
+
6
+
7
+ ## note: make Score top-level and use like Date - why? why not?
8
+ class Score
9
+
10
+ attr_reader :score1i, :score2i, # half time (ht) score
11
+ :score1, :score2, # full time (ft) score
12
+ :score1et, :score2et, # extra time (et) score
13
+ :score1p, :score2p # penalty (p) score
14
+ ## todo/fix: add :score1agg, score2agg too - why? why not?!!!
15
+ ## add state too e.g. canceled or abadoned etc - why? why not?
16
+
17
+ def initialize( *values )
18
+ ## note: for now always assumes integers
19
+ ## todo/check - check/require integer args - why? why not?
20
+
21
+ @score1i = values[0] # half time (ht) score
22
+ @score2i = values[1]
23
+
24
+ @score1 = values[2] # full time (ft) score
25
+ @score2 = values[3]
26
+
27
+ @score1et = values[4] # extra time (et) score
28
+ @score2et = values[5]
29
+
30
+ @score1p = values[6] # penalty (p) score
31
+ @score2p = values[7]
32
+ end
33
+
34
+ def to_a
35
+ ## todo: how to handle game w/o extra time
36
+ # but w/ optional penalty ??? e.g. used in copa liberatores, for example
37
+ # retrun 0,0 or nil,nil for extra time score ?? or -1, -1 ??
38
+ # for now use nil,nil
39
+ score = []
40
+ score += [score1i, score2i] if score1p || score2p || score1et || score2et || score1 || score2 || score1i || score2i
41
+ score += [score1, score2] if score1p || score2p || score1et || score2et || score1 || score2
42
+ score += [score1et, score2et] if score1p || score2p || score1et || score2et
43
+ score += [score1p, score2p] if score1p || score2p
44
+ score
45
+ end
46
+
47
+ end # class Score
48
+
49
+
50
+
51
+ module ScoreFormats
52
+
53
+ def self.lang
54
+ @@lang ||= :en ## defaults to english (:en)
55
+ end
56
+ def self.lang=( value )
57
+ @@lang = value.to_sym ## note: make sure lang is always a symbol for now (NOT a string)
58
+ @@lang ## todo/check: remove =() method always returns passed in value? double check
59
+ end
60
+
61
+
62
+ def self.parser( lang: ) ## find parser
63
+ lang = lang.to_sym ## note: make sure lang is always a symbol for now (NOT a string)
64
+
65
+ ## note: cache all "built-in" lang versions (e.g. formats == nil)
66
+ @@parser ||= {}
67
+ parser = @@parser[ lang ] ||= ScoreParser.new( lang: lang )
68
+ end
69
+
70
+ def self.parse( line, lang: ScoreFormats.lang )
71
+ parser( lang: lang ).parse( line )
72
+ end
73
+
74
+ def self.find!( line, lang: ScoreFormats.lang )
75
+ parser( lang: lang ).find!( line )
76
+ end
77
+
78
+
79
+ class ScoreParser
80
+
81
+ include LogUtils::Logging
82
+
83
+ def initialize( lang: )
84
+ @lang = lang.to_sym ## note: make sure lang is always a symbol for now (NOT a string)
85
+
86
+ ## fallback to english if lang not available
87
+ ## todo/fix: add/issue warning - why? why not?
88
+ @formats = FORMATS[ @lang ] || FORMATS[ :en ]
89
+ end
90
+
91
+
92
+ def parse( line )
93
+ score = nil
94
+ @formats.each do |format|
95
+ re = format[0]
96
+ m = re.match( line )
97
+ if m
98
+ score = parse_matchdata( m )
99
+ break
100
+ end
101
+ # no match; continue; try next regex pattern
102
+ end
103
+
104
+ ## todo/fix - raise ArgumentError - invalid score; no format match found
105
+ score # note: nil if no match found
106
+ end # method parse
107
+
108
+
109
+ def find!( line )
110
+ ### fix: add and match all-in-one literal first, followed by
111
+
112
+ # note: always call after find_dates !!!
113
+ # scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
114
+ # -- note: score might have two digits too
115
+
116
+ ### fix: depending on language allow 1:1 or 1-1
117
+ ## do NOT allow mix and match
118
+ ## e.g. default to en is 1-1
119
+ ## de is 1:1 etc.
120
+
121
+
122
+ # extract score from line
123
+ # and return it
124
+ # note: side effect - removes date from line string
125
+
126
+ score = nil
127
+ @formats.each do |format|
128
+ re = format[0]
129
+ tag = format[1]
130
+ m = re.match( line )
131
+ if m
132
+ score = parse_matchdata( m )
133
+ line.sub!( m[0], tag )
134
+ break
135
+ end
136
+ # no match; continue; try next regex pattern
137
+ end
138
+
139
+ score # note: nil if no match found
140
+ end # method find!
141
+
142
+ private
143
+ def parse_matchdata( m )
144
+ # convert regex match_data captures to hash
145
+ # - note: cannont use match_data like a hash (e.g. raises exception if key/name not present/found)
146
+ h = {}
147
+ # - note: do NOT forget to turn name into symbol for lookup in new hash (name.to_sym)
148
+ m.names.each { |name| h[name.to_sym] = m[name] } # or use match_data.names.zip( match_data.captures ) - more cryptic but "elegant"??
149
+
150
+ ## puts "[parse_date_time] match_data:"
151
+ ## pp h
152
+ logger.debug " [parse_matchdata] hash: >#{h.inspect}<"
153
+
154
+ score1i = nil # half time (ht) scores
155
+ score2i = nil
156
+
157
+ score1 = nil # full time (ft) scores
158
+ score2 = nil
159
+
160
+ score1et = nil # extra time (et) scores
161
+ score2et = nil
162
+
163
+ score1p = nil # penalty (p) scores
164
+ score2p = nil
165
+
166
+
167
+ if h[:score1i] && h[:score2i] ## note: half time (HT) score is optional now
168
+ score1i = h[:score1i].to_i
169
+ score2i = h[:score2i].to_i
170
+ end
171
+
172
+ score1 = h[:score1].to_i
173
+ score2 = h[:score2].to_i
174
+
175
+ if h[:score1et] && h[:score2et]
176
+ score1et = h[:score1et].to_i
177
+ score2et = h[:score2et].to_i
178
+ end
179
+
180
+ if h[:score1p] && h[:score2p]
181
+ score1p = h[:score1p].to_i
182
+ score2p = h[:score2p].to_i
183
+ end
184
+
185
+ score = Score.new( score1i, score2i,
186
+ score1, score2,
187
+ score1et, score2et,
188
+ score1p, score2p )
189
+ score
190
+ end # method parse_matchdata
191
+
192
+
193
+
194
+ end # class ScoreParser
195
+ end # module ScoreFormats
196
+