sportdb-formats 1.0.6 → 1.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +6 -33
  3. data/Rakefile +2 -5
  4. data/lib/sportdb/formats.rb +54 -70
  5. data/lib/sportdb/formats/country/country_index.rb +2 -2
  6. data/lib/sportdb/formats/event/event_index.rb +141 -0
  7. data/lib/sportdb/formats/event/event_reader.rb +183 -0
  8. data/lib/sportdb/formats/league/league_index.rb +22 -18
  9. data/lib/sportdb/formats/league/league_outline_reader.rb +45 -13
  10. data/lib/sportdb/formats/league/league_reader.rb +7 -1
  11. data/lib/sportdb/formats/match/match_parser.rb +101 -111
  12. data/lib/sportdb/formats/package.rb +59 -11
  13. data/lib/sportdb/formats/parser_helper.rb +11 -2
  14. data/lib/sportdb/formats/team/club_index.rb +13 -11
  15. data/lib/sportdb/formats/team/club_index_history.rb +134 -0
  16. data/lib/sportdb/formats/team/club_reader_history.rb +203 -0
  17. data/lib/sportdb/formats/team/club_reader_props.rb +20 -5
  18. data/lib/sportdb/formats/version.rb +2 -2
  19. data/test/helper.rb +51 -81
  20. data/test/test_club_index_history.rb +107 -0
  21. data/test/test_club_reader_history.rb +212 -0
  22. data/test/test_datafile_package.rb +1 -1
  23. data/test/test_regex.rb +25 -7
  24. metadata +9 -78
  25. data/lib/sportdb/formats/config.rb +0 -40
  26. data/lib/sportdb/formats/match/match_parser_csv.rb +0 -314
  27. data/lib/sportdb/formats/name_helper.rb +0 -84
  28. data/lib/sportdb/formats/score/score_formats.rb +0 -220
  29. data/lib/sportdb/formats/score/score_parser.rb +0 -202
  30. data/lib/sportdb/formats/season_utils.rb +0 -27
  31. data/lib/sportdb/formats/structs/country.rb +0 -31
  32. data/lib/sportdb/formats/structs/group.rb +0 -18
  33. data/lib/sportdb/formats/structs/league.rb +0 -37
  34. data/lib/sportdb/formats/structs/match.rb +0 -151
  35. data/lib/sportdb/formats/structs/matchlist.rb +0 -220
  36. data/lib/sportdb/formats/structs/round.rb +0 -25
  37. data/lib/sportdb/formats/structs/season.rb +0 -123
  38. data/lib/sportdb/formats/structs/standings.rb +0 -247
  39. data/lib/sportdb/formats/structs/team.rb +0 -150
  40. data/lib/sportdb/formats/structs/team_usage.rb +0 -88
  41. data/test/test_clubs.rb +0 -40
  42. data/test/test_conf.rb +0 -65
  43. data/test/test_csv_match_parser.rb +0 -114
  44. data/test/test_csv_match_parser_utils.rb +0 -20
  45. data/test/test_csv_reader.rb +0 -31
  46. data/test/test_match.rb +0 -30
  47. data/test/test_match_auto.rb +0 -72
  48. data/test/test_match_auto_champs.rb +0 -45
  49. data/test/test_match_auto_euro.rb +0 -37
  50. data/test/test_match_auto_worldcup.rb +0 -61
  51. data/test/test_match_champs.rb +0 -27
  52. data/test/test_match_eng.rb +0 -26
  53. data/test/test_match_euro.rb +0 -27
  54. data/test/test_match_worldcup.rb +0 -27
  55. data/test/test_name_helper.rb +0 -67
  56. data/test/test_scores.rb +0 -122
  57. data/test/test_season.rb +0 -62
@@ -1,84 +0,0 @@
1
-
2
- module SportDb
3
- module NameHelper
4
-
5
-
6
- ## note: allow placeholder years to e.g. (-___) or (-????)
7
- ## for marking missing (to be filled in) years
8
- ## e.g. (1887-1911), (-2013),
9
- ## (1946-2001, 2013-) etc.
10
- ## todo/check: make more strict e.g. only accept 4-digit years? - why? why not?
11
- YEAR_RE = %r{\(
12
- [0-9, ?_-]+? # note: non-greedy (minimum/first) match
13
- \)}x
14
-
15
- def strip_year( name )
16
- ## check for year(s) e.g. (1887-1911), (-2013),
17
- ## (1946-2001, 2013-) etc.
18
- ## todo/check: only sub once (not global) - why? why not?
19
- name.gsub( YEAR_RE, '' ).strip
20
- end
21
-
22
- def has_year?( name ) name =~ YEAR_RE; end
23
-
24
-
25
- LANG_RE = %r{\[
26
- [a-z]{1,2} # note also allow single-letter [a] or [d] or [e] - why? why not?
27
- \]}x
28
- def strip_lang( name )
29
- name.gsub( LANG_RE, '' ).strip
30
- end
31
-
32
- def has_lang?( name ) name =~ LANG_RE; end
33
-
34
-
35
- def sanitize( name )
36
- ## check for year(s) e.g. (1887-1911), (-2013),
37
- ## (1946-2001,2013-) etc.
38
- name = strip_year( name )
39
- ## check lang codes e.g. [en], [fr], etc.
40
- name = strip_lang( name )
41
- name
42
- end
43
-
44
-
45
- ## note: also add (),’,− etc. e.g.
46
- ## Estudiantes (LP) => Estudiantes LP
47
- ## Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
48
- ## Myllykosken Pallo −47 => Myllykosken Pallo 47
49
-
50
- NORM_RE = %r{
51
- [.'’º/()_−-]
52
- }x # note: in [] dash (-) if last doesn't need to get escaped
53
- ## note: remove all dots (.), dash (-), ', º, /, etc.
54
- # . U+002E (46) - FULL STOP
55
- # ' U+0027 (39) - APOSTROPHE
56
- # ’ U+2019 (8217) - RIGHT SINGLE QUOTATION MARK
57
- # º U+00BA (186) - MASCULINE ORDINAL INDICATOR
58
- # / U+002F (47) - SOLIDUS
59
- # ( U+0028 (40) - LEFT PARENTHESIS
60
- # ) U+0029 (41) - RIGHT PARENTHESIS
61
- # − U+2212 (8722) - MINUS SIGN
62
- # - U+002D (45) - HYPHEN-MINUS
63
-
64
- ## for norm(alizing) names
65
- def strip_norm( name )
66
- name.gsub( NORM_RE, '' )
67
- end
68
-
69
- def normalize( name )
70
- # note: do NOT call sanitize here (keep normalize "atomic" for reuse)
71
- name = strip_norm( name )
72
- name = name.gsub( ' ', '' ) # note: also remove all spaces!!!
73
-
74
- ## todo/check: use our own downcase - why? why not?
75
- name = downcase_i18n( name ) ## do NOT care about upper and lowercase for now
76
- name
77
- end
78
-
79
-
80
- def variants( name ) Variant.find( name ); end
81
-
82
- end # module NameHelper
83
- end # module SportDb
84
-
@@ -1,220 +0,0 @@
1
-
2
- module ScoreFormats
3
-
4
- ## todo/check: use ‹› (unicode chars) to mark optional parts in regex constant name - why? why not?
5
-
6
- #####
7
- # english helpers (penalty, extra time, ...)
8
- P_EN = '(?: p | pen\.? | pso )' # e.g. p, pen, pen., PSO, etc.
9
- ET_EN = '(?: aet | a\.e\.t\.? )' # note: make last . optional (e.g a.e.t) allowed too
10
-
11
-
12
- ## e.g. 3-4 pen. 2-2 a.e.t. (1-1, 1-1) or
13
- ## 3-4 pen. 2-2 a.e.t. (1-1, ) or
14
- ## 3-4 pen. 2-2 a.e.t. (1-1) or
15
- ## 2-2 a.e.t. (1-1, 1-1) or
16
- ## 2-2 a.e.t. (1-1, ) or
17
- ## 2-2 a.e.t. (1-1)
18
-
19
- EN__P_ET_FT_HT__RE = /\b
20
- (?:
21
- (?<score1p>\d{1,2})
22
- [ ]* - [ ]* # note: sep in optional block; CANNOT use a reference
23
- (?<score2p>\d{1,2})
24
- [ ]* #{P_EN} [ ]*
25
- )? # note: make penalty (P) score optional for now
26
- (?<score1et>\d{1,2})
27
- [ ]* - [ ]*
28
- (?<score2et>\d{1,2})
29
- [ ]* #{ET_EN} [ ]*
30
- \(
31
- [ ]*
32
- (?<score1>\d{1,2})
33
- [ ]* - [ ]*
34
- (?<score2>\d{1,2})
35
- [ ]*
36
- (?:
37
- , [ ]*
38
- (?: (?<score1i>\d{1,2})
39
- [ ]* - [ ]*
40
- (?<score2i>\d{1,2})
41
- [ ]*
42
- )?
43
- )? # note: make half time (HT) score optional for now
44
- \)
45
- (?=[ \]]|$)/xi ## todo/check: remove loakahead assertion here - why require space?
46
- ## note: \b works only after non-alphanum e.g. )
47
-
48
- ###
49
- ## special case for case WITHOUT extra time!!
50
- ## same as above (but WITHOUT extra time and pen required)
51
- EN__P_FT_HT__RE = /\b
52
- (?<score1p>\d{1,2})
53
- [ ]* - [ ]* # note: sep in optional block; CANNOT use a reference
54
- (?<score2p>\d{1,2})
55
- [ ]* #{P_EN} [ ]*
56
- \(
57
- [ ]*
58
- (?<score1>\d{1,2})
59
- [ ]* - [ ]*
60
- (?<score2>\d{1,2})
61
- [ ]*
62
- (?:
63
- , [ ]*
64
- (?: (?<score1i>\d{1,2})
65
- [ ]* - [ ]*
66
- (?<score2i>\d{1,2})
67
- [ ]*
68
- )?
69
- )? # note: make half time (HT) score optional for now
70
- \)
71
- (?=[ \]]|$)/xi ## todo/check: remove loakahead assertion here - why require space?
72
- ## note: \b works only after non-alphanum e.g. )
73
-
74
-
75
-
76
- ## e.g. 2-1 (1-1) or
77
- ## 2-1
78
- ## note: for now add here used in Brazil / Portugal
79
- ## e.g 1x1 or 1X1 or 0x2 or 3x3 too
80
- ## todo/check/fix: move to its own use PT__FT_HT etc!!!!
81
-
82
- EN__FT_HT__RE = /\b
83
- (?<score1>\d{1,2})
84
- [ ]* (?<sep>[x-]) [ ]*
85
- (?<score2>\d{1,2})
86
- (?:
87
- [ ]* \( [ ]*
88
- (?<score1i>\d{1,2})
89
- [ ]* \k<sep> [ ]*
90
- (?<score2i>\d{1,2})
91
- [ ]* \)
92
- )? # note: make half time (HT) score optional for now
93
- (?=[ \]]|$)/xi ## todo/check: remove loakahead assertion here - why require space?
94
- ## note: \b works only after non-alphanum e.g. )
95
-
96
-
97
- #####
98
- # deutsch / german helpers (penalty, extra time, ...)
99
- ## todo add more marker e.g. im Elf. or such!!!
100
- P_DE = '(?: ie | i\.e\.? )' # e.g. iE, i.E., i.E etc.
101
- ET_DE = '(?: nv | n\.v\.? )' # e.g. nV, n.V., n.V etc.
102
-
103
-
104
- ## support alternate all-in-one score e.g.
105
- ## i.E. 2:4, n.V. 3:3 (1:1, 1:1) or
106
- ## n.V. 3:2 (2:2, 1:2)
107
- DE__P_ET_FT_HT__RE = /\b
108
- (?:
109
- #{P_DE}
110
- [ ]*
111
- (?<score1p>\d{1,2})
112
- [ ]* : [ ]*
113
- (?<score2p>\d{1,2})
114
- [ ]* (?:, [ ]*)?
115
- )? # note: make penalty (P) score optional for now
116
- #{ET_DE}
117
- [ ]*
118
- (?<score1et>\d{1,2})
119
- [ ]* : [ ]*
120
- (?<score2et>\d{1,2})
121
- [ ]*
122
- \(
123
- [ ]*
124
- (?<score1>\d{1,2})
125
- [ ]* : [ ]*
126
- (?<score2>\d{1,2})
127
- [ ]*
128
- (?:
129
- , [ ]*
130
- (?:
131
- (?<score1i>\d{1,2})
132
- [ ]* : [ ]*
133
- (?<score2i>\d{1,2})
134
- [ ]*
135
- )?
136
- )? # note: make half time (HT) score optional for now
137
- \)
138
- (?=[ \]]|$)
139
- /xi
140
-
141
- ## support all-in-one "literal form e.g.
142
- # 2:2 (1:1, 1:0) n.V. 5:1 i.E. or
143
- # 2-2 (1-1, 1-0) n.V. 5-1 i.E.
144
- DE__ET_FT_HT_P__RE = /\b
145
- (?<score1et>\d{1,2})
146
- [ ]* (?<sep>[:-]) [ ]* ## note: for now allow : or - as separator!!
147
- (?<score2et>\d{1,2})
148
- [ ]*
149
- \(
150
- [ ]*
151
- (?<score1>\d{1,2})
152
- [ ]* \k<sep> [ ]*
153
- (?<score2>\d{1,2})
154
- [ ]*
155
- (?:
156
- , [ ]*
157
- (?:
158
- (?<score1i>\d{1,2})
159
- [ ]* \k<sep> [ ]*
160
- (?<score2i>\d{1,2})
161
- [ ]*
162
- )?
163
- )? # note: make half time (HT) score optional for now
164
- \)
165
- [ ]*
166
- #{ET_DE}
167
- (?:
168
- [ ]*
169
- (?<score1p>\d{1,2})
170
- [ ]* \k<sep> [ ]*
171
- (?<score2p>\d{1,2})
172
- [ ]*
173
- #{P_DE}
174
- )? # note: make penalty (P) score optional for now
175
- (?=[ \]]|$)
176
- /xi ## todo/check: remove loakahead assertion here - why require space?
177
- ## note: \b works only after non-alphanum e.g. )
178
-
179
-
180
- ## e.g. 2:1 (1:1) or
181
- ## 2-1 (1-1) or
182
- ## 2:1 or
183
- ## 2-1
184
- DE__FT_HT__RE = /\b
185
- (?<score1>\d{1,2})
186
- [ ]* (?<sep>[:-]) [ ]*
187
- (?<score2>\d{1,2})
188
- (?:
189
- [ ]* \( [ ]*
190
- (?<score1i>\d{1,2})
191
- [ ]* \k<sep> [ ]*
192
- (?<score2i>\d{1,2})
193
- [ ]* \)
194
- )? # note: make half time (HT) score optional for now
195
- (?=[ \]]|$)/x ## todo/check: remove loakahead assertion here - why require space?
196
- ## note: \b works only after non-alphanum e.g. )
197
-
198
-
199
- #############################################
200
- # map tables - 1) regex, 2) tag - note: order matters; first come-first matched/served
201
-
202
-
203
- FORMATS_EN = [
204
- [ EN__P_ET_FT_HT__RE, '[SCORE.EN__P?_ET_(FT_HT?)]' ], # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
205
- [ EN__P_FT_HT__RE, '[SCORE.EN__P_(FT_HT?)]' ], # e.g. 5-1 pen. (1-1)
206
- [ EN__FT_HT__RE, '[SCORE.EN__FT_(HT)?]' ], # e.g. 1-1 (1-0)
207
- ]
208
-
209
- FORMATS_DE = [
210
- [ DE__ET_FT_HT_P__RE, '[SCORE.DE__ET_(FT_HT?)_P?]' ], # e.g. 2:2 (1:1, 1:0) n.V. 5:1 i.E.
211
- [ DE__P_ET_FT_HT__RE, '[SCORE.DE__P?_ET_(FT_HT?)]' ], # e.g. i.E. 2:4, n.V. 3:3 (1:1, 1:1)
212
- [ DE__FT_HT__RE, '[SCORE.DE__FT_(HT)?]' ], # e.g. 1:1 (1:0)
213
- ]
214
-
215
- FORMATS = {
216
- en: FORMATS_EN,
217
- de: FORMATS_DE,
218
- }
219
-
220
- end # module ScoreFormats
@@ -1,202 +0,0 @@
1
- # encoding: utf-8
2
-
3
-
4
- ## note: lets follow the model of DateFormats -see DateFormats gem for more!!!
5
-
6
-
7
- ## note: make Score top-level and use like Date - why? why not?
8
- class Score
9
-
10
- attr_reader :score1i, :score2i, # half time (ht) score
11
- :score1, :score2, # full time (ft) score
12
- :score1et, :score2et, # extra time (et) score
13
- :score1p, :score2p # penalty (p) score
14
- ## todo/fix: add :score1agg, score2agg too - why? why not?!!!
15
- ## add state too e.g. canceled or abadoned etc - why? why not?
16
-
17
- def initialize( *values )
18
- ## note: for now always assumes integers
19
- ## todo/check - check/require integer args - why? why not?
20
-
21
- @score1i = values[0] # half time (ht) score
22
- @score2i = values[1]
23
-
24
- @score1 = values[2] # full time (ft) score
25
- @score2 = values[3]
26
-
27
- @score1et = values[4] # extra time (et) score
28
- @score2et = values[5]
29
-
30
- @score1p = values[6] # penalty (p) score
31
- @score2p = values[7]
32
- end
33
-
34
- def to_a
35
- ## todo: how to handle game w/o extra time
36
- # but w/ optional penalty ??? e.g. used in copa liberatores, for example
37
- # retrun 0,0 or nil,nil for extra time score ?? or -1, -1 ??
38
- # for now use nil,nil
39
- score = []
40
- score += [score1i, score2i] if score1p || score2p || score1et || score2et || score1 || score2 || score1i || score2i
41
- score += [score1, score2] if score1p || score2p || score1et || score2et || score1 || score2
42
- score += [score1et, score2et] if score1p || score2p || score1et || score2et
43
- score += [score1p, score2p] if score1p || score2p
44
- score
45
- end
46
-
47
- end # class Score
48
-
49
-
50
-
51
- module ScoreFormats
52
-
53
- def self.lang
54
- @@lang ||= :en ## defaults to english (:en)
55
- end
56
- def self.lang=( value )
57
- @@lang = value.to_sym ## note: make sure lang is always a symbol for now (NOT a string)
58
- @@lang ## todo/check: remove =() method always returns passed in value? double check
59
- end
60
-
61
-
62
- def self.parser( lang: ) ## find parser
63
- lang = lang.to_sym ## note: make sure lang is always a symbol for now (NOT a string)
64
-
65
- ## note: cache all "built-in" lang versions (e.g. formats == nil)
66
- @@parser ||= {}
67
- parser = @@parser[ lang ] ||= ScoreParser.new( lang: lang )
68
- end
69
-
70
- def self.parse( line, lang: ScoreFormats.lang )
71
- parser( lang: lang ).parse( line )
72
- end
73
-
74
- def self.find!( line, lang: ScoreFormats.lang )
75
- parser( lang: lang ).find!( line )
76
- end
77
-
78
-
79
- class ScoreParser
80
-
81
- include LogUtils::Logging
82
-
83
- def initialize( lang: )
84
- @lang = lang.to_sym ## note: make sure lang is always a symbol for now (NOT a string)
85
-
86
- ## fallback to english if lang not available
87
- ## todo/fix: add/issue warning - why? why not?
88
- @formats = FORMATS[ @lang ] || FORMATS[ :en ]
89
- end
90
-
91
-
92
- def parse( line )
93
-
94
- ##########
95
- ## todo/fix/check: add unicode to regular dash conversion - why? why not?
96
- ## e.g. – becomes - (yes, the letters a different!!!)
97
- #############
98
-
99
- score = nil
100
- @formats.each do |format|
101
- re = format[0]
102
- m = re.match( line )
103
- if m
104
- score = parse_matchdata( m )
105
- break
106
- end
107
- # no match; continue; try next regex pattern
108
- end
109
-
110
- ## todo/fix - raise ArgumentError - invalid score; no format match found
111
- score # note: nil if no match found
112
- end # method parse
113
-
114
-
115
- def find!( line )
116
- ### fix: add and match all-in-one literal first, followed by
117
-
118
- # note: always call after find_dates !!!
119
- # scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
120
- # -- note: score might have two digits too
121
-
122
- ### fix: depending on language allow 1:1 or 1-1
123
- ## do NOT allow mix and match
124
- ## e.g. default to en is 1-1
125
- ## de is 1:1 etc.
126
-
127
-
128
- # extract score from line
129
- # and return it
130
- # note: side effect - removes date from line string
131
-
132
- score = nil
133
- @formats.each do |format|
134
- re = format[0]
135
- tag = format[1]
136
- m = re.match( line )
137
- if m
138
- score = parse_matchdata( m )
139
- line.sub!( m[0], tag )
140
- break
141
- end
142
- # no match; continue; try next regex pattern
143
- end
144
-
145
- score # note: nil if no match found
146
- end # method find!
147
-
148
- private
149
- def parse_matchdata( m )
150
- # convert regex match_data captures to hash
151
- # - note: cannont use match_data like a hash (e.g. raises exception if key/name not present/found)
152
- h = {}
153
- # - note: do NOT forget to turn name into symbol for lookup in new hash (name.to_sym)
154
- m.names.each { |name| h[name.to_sym] = m[name] } # or use match_data.names.zip( match_data.captures ) - more cryptic but "elegant"??
155
-
156
- ## puts "[parse_date_time] match_data:"
157
- ## pp h
158
- logger.debug " [parse_matchdata] hash: >#{h.inspect}<"
159
-
160
- score1i = nil # half time (ht) scores
161
- score2i = nil
162
-
163
- score1 = nil # full time (ft) scores
164
- score2 = nil
165
-
166
- score1et = nil # extra time (et) scores
167
- score2et = nil
168
-
169
- score1p = nil # penalty (p) scores
170
- score2p = nil
171
-
172
-
173
- if h[:score1i] && h[:score2i] ## note: half time (HT) score is optional now
174
- score1i = h[:score1i].to_i
175
- score2i = h[:score2i].to_i
176
- end
177
-
178
- score1 = h[:score1].to_i
179
- score2 = h[:score2].to_i
180
-
181
- if h[:score1et] && h[:score2et]
182
- score1et = h[:score1et].to_i
183
- score2et = h[:score2et].to_i
184
- end
185
-
186
- if h[:score1p] && h[:score2p]
187
- score1p = h[:score1p].to_i
188
- score2p = h[:score2p].to_i
189
- end
190
-
191
- score = Score.new( score1i, score2i,
192
- score1, score2,
193
- score1et, score2et,
194
- score1p, score2p )
195
- score
196
- end # method parse_matchdata
197
-
198
-
199
-
200
- end # class ScoreParser
201
- end # module ScoreFormats
202
-