sportdb-formats 0.4.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +24 -4
  3. data/Rakefile +3 -3
  4. data/lib/sportdb/formats.rb +25 -2
  5. data/lib/sportdb/formats/config.rb +40 -0
  6. data/lib/sportdb/formats/datafile.rb +42 -62
  7. data/lib/sportdb/formats/datafile_package.rb +160 -0
  8. data/lib/sportdb/formats/match/conf_parser.rb +120 -0
  9. data/lib/sportdb/formats/match/mapper.rb +319 -0
  10. data/lib/sportdb/formats/match/mapper_teams.rb +23 -0
  11. data/lib/sportdb/formats/match/match_parser.rb +659 -0
  12. data/lib/sportdb/formats/match/match_parser_auto_conf.rb +202 -0
  13. data/lib/sportdb/formats/name_helper.rb +84 -0
  14. data/lib/sportdb/formats/outline_reader.rb +53 -15
  15. data/lib/sportdb/formats/package.rb +172 -160
  16. data/lib/sportdb/formats/parser_helper.rb +81 -0
  17. data/lib/sportdb/formats/score/score_formats.rb +180 -0
  18. data/lib/sportdb/formats/score/score_parser.rb +196 -0
  19. data/lib/sportdb/formats/structs/country.rb +1 -43
  20. data/lib/sportdb/formats/structs/group.rb +25 -0
  21. data/lib/sportdb/formats/structs/league.rb +7 -26
  22. data/lib/sportdb/formats/structs/match.rb +72 -51
  23. data/lib/sportdb/formats/structs/round.rb +14 -4
  24. data/lib/sportdb/formats/structs/season.rb +3 -0
  25. data/lib/sportdb/formats/structs/team.rb +144 -0
  26. data/lib/sportdb/formats/version.rb +2 -2
  27. data/test/helper.rb +83 -1
  28. data/test/test_clubs.rb +3 -3
  29. data/test/test_conf.rb +65 -0
  30. data/test/test_datafile.rb +21 -30
  31. data/test/test_match.rb +0 -6
  32. data/test/test_match_auto.rb +72 -0
  33. data/test/test_match_auto_champs.rb +45 -0
  34. data/test/test_match_auto_euro.rb +37 -0
  35. data/test/test_match_auto_worldcup.rb +61 -0
  36. data/test/test_match_champs.rb +27 -0
  37. data/test/test_match_eng.rb +26 -0
  38. data/test/test_match_euro.rb +27 -0
  39. data/test/test_match_worldcup.rb +27 -0
  40. data/test/test_name_helper.rb +67 -0
  41. data/test/test_outline_reader.rb +3 -3
  42. data/test/test_package.rb +21 -2
  43. data/test/test_package_match.rb +78 -0
  44. data/test/test_scores.rb +67 -51
  45. metadata +32 -12
  46. data/lib/sportdb/formats/scores.rb +0 -253
  47. data/lib/sportdb/formats/structs/club.rb +0 -213
  48. data/test/test_club_helpers.rb +0 -63
  49. data/test/test_datafile_match.rb +0 -65
@@ -0,0 +1,81 @@
1
+
2
+ module SportDb
3
+ module ParserHelper
4
+
5
+
6
+ def read_lines( txt ) ## todo/check: add alias preproc_lines or build_lines or prep_lines etc. - why? why not?
7
+ ## returns an array of lines with comments and empty lines striped / removed
8
+ lines = []
9
+ txt.each_line do |line| ## preprocess
10
+ line = line.strip
11
+
12
+ next if line.empty? || line.start_with?('#') ### skip empty lines and comments
13
+ line = line.sub( /#.*/, '' ).strip ### cut-off end-of line comments too
14
+ lines << line
15
+ end
16
+ lines
17
+ end
18
+
19
+
20
+ def is_round?( line )
21
+ ## note: =~ return nil if not match found, and 0,1, etc for match
22
+ (line =~ SportDb.lang.regex_round) != nil
23
+ end
24
+
25
+ def is_knockout_round?( line )
26
+
27
+ ## todo: check for adding ignore case for regex (e.g. 1st leg/1st Leg)
28
+
29
+ if line =~ SportDb.lang.regex_leg1
30
+ logger.debug " two leg knockout; skip knockout flag on first leg"
31
+ false
32
+ elsif line =~ SportDb.lang.regex_knockout_round
33
+ logger.debug " setting knockout flag to true"
34
+ true
35
+ elsif line =~ /K\.O\.|K\.o\.|Knockout/
36
+ ## NB: add two language independent markers, that is, K.O. and Knockout
37
+ logger.debug " setting knockout flag to true (lang independent marker)"
38
+ true
39
+ else
40
+ false
41
+ end
42
+ end
43
+
44
+ def is_round_def?( line )
45
+ ## must include bar (|) marker (make required)
46
+ ## todo/fix: use split('|') and check is_round? only on left hand side!!!! not whole line
47
+ line =~ /\|/ && is_round?( line )
48
+ end
49
+
50
+
51
+
52
+
53
+ def is_group?( line )
54
+ # note: check after is_round? (round may contain group reference!)
55
+ ## note: =~ return nil if not match found, and 0,1, etc for match
56
+ (line =~ SportDb.lang.regex_group) != nil
57
+ end
58
+
59
+ def is_group_def?( line )
60
+ # note: check after is_round? (round may contain group reference!)
61
+ ## must include bar (|) marker (make required)
62
+ ## todo/fix: use split('|') and check is_round? only on left hand side!!!! not whole line
63
+ line =~ /\|/ && is_group?( line )
64
+ end
65
+
66
+
67
+ def is_goals?( line )
68
+ # check if is goals line
69
+ # e.g. looks like
70
+ # Neymar 29', 71' (pen.) Oscar 90+1'; Marcelo 11' (o.g.)
71
+ # check for
72
+ # <space>90' or
73
+ # <space>90+1'
74
+
75
+ line =~ /[ ](\d{1,3}\+)?\d{1,3}'/
76
+ end
77
+
78
+
79
+ end # module ParserHelper
80
+ end # module SportDb
81
+
@@ -0,0 +1,180 @@
1
+
2
+ module ScoreFormats
3
+
4
+ ## todo/check: use ‹› (unicode chars) to mark optional parts in regex constant name - why? why not?
5
+
6
+ #####
7
+ # english helpers (penalty, extra time, ...)
8
+ P_EN = '(?: p | pen\.? | pso )' # e.g. p, pen, pen., PSO, etc.
9
+ ET_EN = '(?: aet | a\.e\.t\.? )' # note: make last . optional (e.g a.e.t) allowed too
10
+
11
+
12
+ ## e.g. 3-4 pen. 2-2 a.e.t. (1-1, 1-1) or
13
+ ## 3-4 pen. 2-2 a.e.t. (1-1, ) or
14
+ ## 3-4 pen. 2-2 a.e.t. (1-1) or
15
+ ## 2-2 a.e.t. (1-1, 1-1) or
16
+ ## 2-2 a.e.t. (1-1, ) or
17
+ ## 2-2 a.e.t. (1-1)
18
+
19
+ EN__P_ET_FT_HT__RE = /\b
20
+ (?:
21
+ (?<score1p>\d{1,2})
22
+ [ ]* - [ ]* # note: sep in optional block; CANNOT use a reference
23
+ (?<score2p>\d{1,2})
24
+ [ ]* #{P_EN} [ ]*
25
+ )? # note: make penalty (P) score optional for now
26
+ (?<score1et>\d{1,2})
27
+ [ ]* - [ ]*
28
+ (?<score2et>\d{1,2})
29
+ [ ]* #{ET_EN} [ ]*
30
+ \(
31
+ [ ]*
32
+ (?<score1>\d{1,2})
33
+ [ ]* - [ ]*
34
+ (?<score2>\d{1,2})
35
+ [ ]*
36
+ (?:
37
+ , [ ]*
38
+ (?: (?<score1i>\d{1,2})
39
+ [ ]* - [ ]*
40
+ (?<score2i>\d{1,2})
41
+ [ ]*
42
+ )?
43
+ )? # note: make half time (HT) score optional for now
44
+ \)
45
+ (?=[ \]]|$)/xi ## todo/check: remove loakahead assertion here - why require space?
46
+ ## note: \b works only after non-alphanum e.g. )
47
+
48
+ ###
49
+ ## special case for case WITHOUT extra time!!
50
+ ## same as above (but WITHOUT extra time and pen required)
51
+ EN__P_FT_HT__RE = /\b
52
+ (?<score1p>\d{1,2})
53
+ [ ]* - [ ]* # note: sep in optional block; CANNOT use a reference
54
+ (?<score2p>\d{1,2})
55
+ [ ]* #{P_EN} [ ]*
56
+ \(
57
+ [ ]*
58
+ (?<score1>\d{1,2})
59
+ [ ]* - [ ]*
60
+ (?<score2>\d{1,2})
61
+ [ ]*
62
+ (?:
63
+ , [ ]*
64
+ (?: (?<score1i>\d{1,2})
65
+ [ ]* - [ ]*
66
+ (?<score2i>\d{1,2})
67
+ [ ]*
68
+ )?
69
+ )? # note: make half time (HT) score optional for now
70
+ \)
71
+ (?=[ \]]|$)/xi ## todo/check: remove loakahead assertion here - why require space?
72
+ ## note: \b works only after non-alphanum e.g. )
73
+
74
+
75
+
76
+ ## e.g. 2-1 (1-1) or
77
+ ## 2-1
78
+ ## note: for now add here used in Brazil / Portugal
79
+ ## e.g 1x1 or 1X1 or 0x2 or 3x3 too
80
+ ## todo/check/fix: move to its own use PT__FT_HT etc!!!!
81
+
82
+ EN__FT_HT__RE = /\b
83
+ (?<score1>\d{1,2})
84
+ [ ]* (?<sep>[x-]) [ ]*
85
+ (?<score2>\d{1,2})
86
+ (?:
87
+ [ ]* \( [ ]*
88
+ (?<score1i>\d{1,2})
89
+ [ ]* \k<sep> [ ]*
90
+ (?<score2i>\d{1,2})
91
+ [ ]* \)
92
+ )? # note: make half time (HT) score optional for now
93
+ (?=[ \]]|$)/xi ## todo/check: remove loakahead assertion here - why require space?
94
+ ## note: \b works only after non-alphanum e.g. )
95
+
96
+ #####
97
+ # deutsch / german helpers (penalty, extra time, ...)
98
+ ## todo add more marker e.g. im Elf. or such!!!
99
+ P_DE = '(?: ie | i\.e\.? )' # e.g. iE, i.E., i.E etc.
100
+ ET_DE = '(?: nv | n\.v\.? )' # e.g. nV, n.V., n.V etc.
101
+
102
+ ## support all-in-one "literal form e.g.
103
+ # 2:2 (1:1, 1:0) n.V. 5:1 i.E. or
104
+ # 2-2 (1-1, 1-0) n.V. 5-1 i.E.
105
+ DE__ET_FT_HT_P__RE = /\b
106
+ (?<score1et>\d{1,2})
107
+ [ ]* (?<sep>[:-]) [ ]* ## note: for now allow : or - as separator!!
108
+ (?<score2et>\d{1,2})
109
+ [ ]*
110
+ \(
111
+ [ ]*
112
+ (?<score1>\d{1,2})
113
+ [ ]* \k<sep> [ ]*
114
+ (?<score2>\d{1,2})
115
+ [ ]*
116
+ (?:
117
+ , [ ]*
118
+ (?:
119
+ (?<score1i>\d{1,2})
120
+ [ ]* \k<sep> [ ]*
121
+ (?<score2i>\d{1,2})
122
+ [ ]*
123
+ )?
124
+ )? # note: make half time (HT) score optional for now
125
+ \)
126
+ [ ]*
127
+ #{ET_DE}
128
+ (?:
129
+ [ ]*
130
+ (?<score1p>\d{1,2})
131
+ [ ]* \k<sep> [ ]*
132
+ (?<score2p>\d{1,2})
133
+ [ ]*
134
+ #{P_DE}
135
+ )? # note: make penalty (P) score optional for now
136
+ (?=[ \]]|$)
137
+ /xi ## todo/check: remove loakahead assertion here - why require space?
138
+ ## note: \b works only after non-alphanum e.g. )
139
+
140
+
141
+ ## e.g. 2:1 (1:1) or
142
+ ## 2-1 (1-1) or
143
+ ## 2:1 or
144
+ ## 2-1
145
+ DE__FT_HT__RE = /\b
146
+ (?<score1>\d{1,2})
147
+ [ ]* (?<sep>[:-]) [ ]*
148
+ (?<score2>\d{1,2})
149
+ (?:
150
+ [ ]* \( [ ]*
151
+ (?<score1i>\d{1,2})
152
+ [ ]* \k<sep> [ ]*
153
+ (?<score2i>\d{1,2})
154
+ [ ]* \)
155
+ )? # note: make half time (HT) score optional for now
156
+ (?=[ \]]|$)/x ## todo/check: remove loakahead assertion here - why require space?
157
+ ## note: \b works only after non-alphanum e.g. )
158
+
159
+
160
+ #############################################
161
+ # map tables - 1) regex, 2) tag - note: order matters; first come-first matched/served
162
+
163
+
164
+ FORMATS_EN = [
165
+ [ EN__P_ET_FT_HT__RE, '[SCORE.EN__P?_ET_(FT_HT?)]' ], # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
166
+ [ EN__P_FT_HT__RE, '[SCORE.EN__P_(FT_HT?)]' ], # e.g. 5-1 pen. (1-1)
167
+ [ EN__FT_HT__RE, '[SCORE.EN__FT_(HT)?]' ], # e.g. 1-1 (1-0)
168
+ ]
169
+
170
+ FORMATS_DE = [
171
+ [ DE__ET_FT_HT_P__RE, '[SCORE.DE__ET_(FT_HT?)_P?]' ], # e.g. 2:2 (1:1, 1:0) n.V. 5:1 i.E.
172
+ [ DE__FT_HT__RE, '[SCORE.DE__FT_(HT)?]' ], # e.g. 1:1 (1:0)
173
+ ]
174
+
175
+ FORMATS = {
176
+ en: FORMATS_EN,
177
+ de: FORMATS_DE,
178
+ }
179
+
180
+ end # module ScoreFormats
@@ -0,0 +1,196 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ ## note: lets follow the model of DateFormats -see DateFormats gem for more!!!
5
+
6
+
7
+ ## note: make Score top-level and use like Date - why? why not?
8
+ class Score
9
+
10
+ attr_reader :score1i, :score2i, # half time (ht) score
11
+ :score1, :score2, # full time (ft) score
12
+ :score1et, :score2et, # extra time (et) score
13
+ :score1p, :score2p # penalty (p) score
14
+ ## todo/fix: add :score1agg, score2agg too - why? why not?!!!
15
+ ## add state too e.g. canceled or abadoned etc - why? why not?
16
+
17
+ def initialize( *values )
18
+ ## note: for now always assumes integers
19
+ ## todo/check - check/require integer args - why? why not?
20
+
21
+ @score1i = values[0] # half time (ht) score
22
+ @score2i = values[1]
23
+
24
+ @score1 = values[2] # full time (ft) score
25
+ @score2 = values[3]
26
+
27
+ @score1et = values[4] # extra time (et) score
28
+ @score2et = values[5]
29
+
30
+ @score1p = values[6] # penalty (p) score
31
+ @score2p = values[7]
32
+ end
33
+
34
+ def to_a
35
+ ## todo: how to handle game w/o extra time
36
+ # but w/ optional penalty ??? e.g. used in copa liberatores, for example
37
+ # retrun 0,0 or nil,nil for extra time score ?? or -1, -1 ??
38
+ # for now use nil,nil
39
+ score = []
40
+ score += [score1i, score2i] if score1p || score2p || score1et || score2et || score1 || score2 || score1i || score2i
41
+ score += [score1, score2] if score1p || score2p || score1et || score2et || score1 || score2
42
+ score += [score1et, score2et] if score1p || score2p || score1et || score2et
43
+ score += [score1p, score2p] if score1p || score2p
44
+ score
45
+ end
46
+
47
+ end # class Score
48
+
49
+
50
+
51
+ module ScoreFormats
52
+
53
+ def self.lang
54
+ @@lang ||= :en ## defaults to english (:en)
55
+ end
56
+ def self.lang=( value )
57
+ @@lang = value.to_sym ## note: make sure lang is always a symbol for now (NOT a string)
58
+ @@lang ## todo/check: remove =() method always returns passed in value? double check
59
+ end
60
+
61
+
62
+ def self.parser( lang: ) ## find parser
63
+ lang = lang.to_sym ## note: make sure lang is always a symbol for now (NOT a string)
64
+
65
+ ## note: cache all "built-in" lang versions (e.g. formats == nil)
66
+ @@parser ||= {}
67
+ parser = @@parser[ lang ] ||= ScoreParser.new( lang: lang )
68
+ end
69
+
70
+ def self.parse( line, lang: ScoreFormats.lang )
71
+ parser( lang: lang ).parse( line )
72
+ end
73
+
74
+ def self.find!( line, lang: ScoreFormats.lang )
75
+ parser( lang: lang ).find!( line )
76
+ end
77
+
78
+
79
+ class ScoreParser
80
+
81
+ include LogUtils::Logging
82
+
83
+ def initialize( lang: )
84
+ @lang = lang.to_sym ## note: make sure lang is always a symbol for now (NOT a string)
85
+
86
+ ## fallback to english if lang not available
87
+ ## todo/fix: add/issue warning - why? why not?
88
+ @formats = FORMATS[ @lang ] || FORMATS[ :en ]
89
+ end
90
+
91
+
92
+ def parse( line )
93
+ score = nil
94
+ @formats.each do |format|
95
+ re = format[0]
96
+ m = re.match( line )
97
+ if m
98
+ score = parse_matchdata( m )
99
+ break
100
+ end
101
+ # no match; continue; try next regex pattern
102
+ end
103
+
104
+ ## todo/fix - raise ArgumentError - invalid score; no format match found
105
+ score # note: nil if no match found
106
+ end # method parse
107
+
108
+
109
+ def find!( line )
110
+ ### fix: add and match all-in-one literal first, followed by
111
+
112
+ # note: always call after find_dates !!!
113
+ # scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
114
+ # -- note: score might have two digits too
115
+
116
+ ### fix: depending on language allow 1:1 or 1-1
117
+ ## do NOT allow mix and match
118
+ ## e.g. default to en is 1-1
119
+ ## de is 1:1 etc.
120
+
121
+
122
+ # extract score from line
123
+ # and return it
124
+ # note: side effect - removes date from line string
125
+
126
+ score = nil
127
+ @formats.each do |format|
128
+ re = format[0]
129
+ tag = format[1]
130
+ m = re.match( line )
131
+ if m
132
+ score = parse_matchdata( m )
133
+ line.sub!( m[0], tag )
134
+ break
135
+ end
136
+ # no match; continue; try next regex pattern
137
+ end
138
+
139
+ score # note: nil if no match found
140
+ end # method find!
141
+
142
+ private
143
+ def parse_matchdata( m )
144
+ # convert regex match_data captures to hash
145
+ # - note: cannont use match_data like a hash (e.g. raises exception if key/name not present/found)
146
+ h = {}
147
+ # - note: do NOT forget to turn name into symbol for lookup in new hash (name.to_sym)
148
+ m.names.each { |name| h[name.to_sym] = m[name] } # or use match_data.names.zip( match_data.captures ) - more cryptic but "elegant"??
149
+
150
+ ## puts "[parse_date_time] match_data:"
151
+ ## pp h
152
+ logger.debug " [parse_matchdata] hash: >#{h.inspect}<"
153
+
154
+ score1i = nil # half time (ht) scores
155
+ score2i = nil
156
+
157
+ score1 = nil # full time (ft) scores
158
+ score2 = nil
159
+
160
+ score1et = nil # extra time (et) scores
161
+ score2et = nil
162
+
163
+ score1p = nil # penalty (p) scores
164
+ score2p = nil
165
+
166
+
167
+ if h[:score1i] && h[:score2i] ## note: half time (HT) score is optional now
168
+ score1i = h[:score1i].to_i
169
+ score2i = h[:score2i].to_i
170
+ end
171
+
172
+ score1 = h[:score1].to_i
173
+ score2 = h[:score2].to_i
174
+
175
+ if h[:score1et] && h[:score2et]
176
+ score1et = h[:score1et].to_i
177
+ score2et = h[:score2et].to_i
178
+ end
179
+
180
+ if h[:score1p] && h[:score2p]
181
+ score1p = h[:score1p].to_i
182
+ score2p = h[:score2p].to_i
183
+ end
184
+
185
+ score = Score.new( score1i, score2i,
186
+ score1, score2,
187
+ score1et, score2et,
188
+ score1p, score2p )
189
+ score
190
+ end # method parse_matchdata
191
+
192
+
193
+
194
+ end # class ScoreParser
195
+ end # module ScoreFormats
196
+