sportdb-formats 1.0.6 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +6 -33
  3. data/Rakefile +2 -5
  4. data/lib/sportdb/formats.rb +54 -70
  5. data/lib/sportdb/formats/country/country_index.rb +2 -2
  6. data/lib/sportdb/formats/event/event_index.rb +141 -0
  7. data/lib/sportdb/formats/event/event_reader.rb +183 -0
  8. data/lib/sportdb/formats/league/league_index.rb +22 -18
  9. data/lib/sportdb/formats/league/league_outline_reader.rb +45 -13
  10. data/lib/sportdb/formats/league/league_reader.rb +7 -1
  11. data/lib/sportdb/formats/match/match_parser.rb +101 -111
  12. data/lib/sportdb/formats/package.rb +59 -11
  13. data/lib/sportdb/formats/parser_helper.rb +11 -2
  14. data/lib/sportdb/formats/team/club_index.rb +13 -11
  15. data/lib/sportdb/formats/team/club_index_history.rb +134 -0
  16. data/lib/sportdb/formats/team/club_reader_history.rb +203 -0
  17. data/lib/sportdb/formats/team/club_reader_props.rb +20 -5
  18. data/lib/sportdb/formats/version.rb +2 -2
  19. data/test/helper.rb +51 -81
  20. data/test/test_club_index_history.rb +107 -0
  21. data/test/test_club_reader_history.rb +212 -0
  22. data/test/test_datafile_package.rb +1 -1
  23. data/test/test_regex.rb +25 -7
  24. metadata +9 -78
  25. data/lib/sportdb/formats/config.rb +0 -40
  26. data/lib/sportdb/formats/match/match_parser_csv.rb +0 -314
  27. data/lib/sportdb/formats/name_helper.rb +0 -84
  28. data/lib/sportdb/formats/score/score_formats.rb +0 -220
  29. data/lib/sportdb/formats/score/score_parser.rb +0 -202
  30. data/lib/sportdb/formats/season_utils.rb +0 -27
  31. data/lib/sportdb/formats/structs/country.rb +0 -31
  32. data/lib/sportdb/formats/structs/group.rb +0 -18
  33. data/lib/sportdb/formats/structs/league.rb +0 -37
  34. data/lib/sportdb/formats/structs/match.rb +0 -151
  35. data/lib/sportdb/formats/structs/matchlist.rb +0 -220
  36. data/lib/sportdb/formats/structs/round.rb +0 -25
  37. data/lib/sportdb/formats/structs/season.rb +0 -123
  38. data/lib/sportdb/formats/structs/standings.rb +0 -247
  39. data/lib/sportdb/formats/structs/team.rb +0 -150
  40. data/lib/sportdb/formats/structs/team_usage.rb +0 -88
  41. data/test/test_clubs.rb +0 -40
  42. data/test/test_conf.rb +0 -65
  43. data/test/test_csv_match_parser.rb +0 -114
  44. data/test/test_csv_match_parser_utils.rb +0 -20
  45. data/test/test_csv_reader.rb +0 -31
  46. data/test/test_match.rb +0 -30
  47. data/test/test_match_auto.rb +0 -72
  48. data/test/test_match_auto_champs.rb +0 -45
  49. data/test/test_match_auto_euro.rb +0 -37
  50. data/test/test_match_auto_worldcup.rb +0 -61
  51. data/test/test_match_champs.rb +0 -27
  52. data/test/test_match_eng.rb +0 -26
  53. data/test/test_match_euro.rb +0 -27
  54. data/test/test_match_worldcup.rb +0 -27
  55. data/test/test_name_helper.rb +0 -67
  56. data/test/test_scores.rb +0 -122
  57. data/test/test_season.rb +0 -62
@@ -1,84 +0,0 @@
1
-
2
- module SportDb
3
- module NameHelper
4
-
5
-
6
- ## note: allow placeholder years to e.g. (-___) or (-????)
7
- ## for marking missing (to be filled in) years
8
- ## e.g. (1887-1911), (-2013),
9
- ## (1946-2001, 2013-) etc.
10
- ## todo/check: make more strict e.g. only accept 4-digit years? - why? why not?
11
- YEAR_RE = %r{\(
12
- [0-9, ?_-]+? # note: non-greedy (minimum/first) match
13
- \)}x
14
-
15
- def strip_year( name )
16
- ## check for year(s) e.g. (1887-1911), (-2013),
17
- ## (1946-2001, 2013-) etc.
18
- ## todo/check: only sub once (not global) - why? why not?
19
- name.gsub( YEAR_RE, '' ).strip
20
- end
21
-
22
- def has_year?( name ) name =~ YEAR_RE; end
23
-
24
-
25
- LANG_RE = %r{\[
26
- [a-z]{1,2} # note also allow single-letter [a] or [d] or [e] - why? why not?
27
- \]}x
28
- def strip_lang( name )
29
- name.gsub( LANG_RE, '' ).strip
30
- end
31
-
32
- def has_lang?( name ) name =~ LANG_RE; end
33
-
34
-
35
- def sanitize( name )
36
- ## check for year(s) e.g. (1887-1911), (-2013),
37
- ## (1946-2001,2013-) etc.
38
- name = strip_year( name )
39
- ## check lang codes e.g. [en], [fr], etc.
40
- name = strip_lang( name )
41
- name
42
- end
43
-
44
-
45
- ## note: also add (),’,− etc. e.g.
46
- ## Estudiantes (LP) => Estudiantes LP
47
- ## Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
48
- ## Myllykosken Pallo −47 => Myllykosken Pallo 47
49
-
50
- NORM_RE = %r{
51
- [.'’º/()_−-]
52
- }x # note: in [] dash (-) if last doesn't need to get escaped
53
- ## note: remove all dots (.), dash (-), ', º, /, etc.
54
- # . U+002E (46) - FULL STOP
55
- # ' U+0027 (39) - APOSTROPHE
56
- # ’ U+2019 (8217) - RIGHT SINGLE QUOTATION MARK
57
- # º U+00BA (186) - MASCULINE ORDINAL INDICATOR
58
- # / U+002F (47) - SOLIDUS
59
- # ( U+0028 (40) - LEFT PARENTHESIS
60
- # ) U+0029 (41) - RIGHT PARENTHESIS
61
- # − U+2212 (8722) - MINUS SIGN
62
- # - U+002D (45) - HYPHEN-MINUS
63
-
64
- ## for norm(alizing) names
65
- def strip_norm( name )
66
- name.gsub( NORM_RE, '' )
67
- end
68
-
69
- def normalize( name )
70
- # note: do NOT call sanitize here (keep normalize "atomic" for reuse)
71
- name = strip_norm( name )
72
- name = name.gsub( ' ', '' ) # note: also remove all spaces!!!
73
-
74
- ## todo/check: use our own downcase - why? why not?
75
- name = downcase_i18n( name ) ## do NOT care about upper and lowercase for now
76
- name
77
- end
78
-
79
-
80
- def variants( name ) Variant.find( name ); end
81
-
82
- end # module NameHelper
83
- end # module SportDb
84
-
@@ -1,220 +0,0 @@
1
-
2
- module ScoreFormats
3
-
4
- ## todo/check: use ‹› (unicode chars) to mark optional parts in regex constant name - why? why not?
5
-
6
- #####
7
- # english helpers (penalty, extra time, ...)
8
- P_EN = '(?: p | pen\.? | pso )' # e.g. p, pen, pen., PSO, etc.
9
- ET_EN = '(?: aet | a\.e\.t\.? )' # note: make last . optional (e.g a.e.t) allowed too
10
-
11
-
12
- ## e.g. 3-4 pen. 2-2 a.e.t. (1-1, 1-1) or
13
- ## 3-4 pen. 2-2 a.e.t. (1-1, ) or
14
- ## 3-4 pen. 2-2 a.e.t. (1-1) or
15
- ## 2-2 a.e.t. (1-1, 1-1) or
16
- ## 2-2 a.e.t. (1-1, ) or
17
- ## 2-2 a.e.t. (1-1)
18
-
19
- EN__P_ET_FT_HT__RE = /\b
20
- (?:
21
- (?<score1p>\d{1,2})
22
- [ ]* - [ ]* # note: sep in optional block; CANNOT use a reference
23
- (?<score2p>\d{1,2})
24
- [ ]* #{P_EN} [ ]*
25
- )? # note: make penalty (P) score optional for now
26
- (?<score1et>\d{1,2})
27
- [ ]* - [ ]*
28
- (?<score2et>\d{1,2})
29
- [ ]* #{ET_EN} [ ]*
30
- \(
31
- [ ]*
32
- (?<score1>\d{1,2})
33
- [ ]* - [ ]*
34
- (?<score2>\d{1,2})
35
- [ ]*
36
- (?:
37
- , [ ]*
38
- (?: (?<score1i>\d{1,2})
39
- [ ]* - [ ]*
40
- (?<score2i>\d{1,2})
41
- [ ]*
42
- )?
43
- )? # note: make half time (HT) score optional for now
44
- \)
45
- (?=[ \]]|$)/xi ## todo/check: remove loakahead assertion here - why require space?
46
- ## note: \b works only after non-alphanum e.g. )
47
-
48
- ###
49
- ## special case for case WITHOUT extra time!!
50
- ## same as above (but WITHOUT extra time and pen required)
51
- EN__P_FT_HT__RE = /\b
52
- (?<score1p>\d{1,2})
53
- [ ]* - [ ]* # note: sep in optional block; CANNOT use a reference
54
- (?<score2p>\d{1,2})
55
- [ ]* #{P_EN} [ ]*
56
- \(
57
- [ ]*
58
- (?<score1>\d{1,2})
59
- [ ]* - [ ]*
60
- (?<score2>\d{1,2})
61
- [ ]*
62
- (?:
63
- , [ ]*
64
- (?: (?<score1i>\d{1,2})
65
- [ ]* - [ ]*
66
- (?<score2i>\d{1,2})
67
- [ ]*
68
- )?
69
- )? # note: make half time (HT) score optional for now
70
- \)
71
- (?=[ \]]|$)/xi ## todo/check: remove loakahead assertion here - why require space?
72
- ## note: \b works only after non-alphanum e.g. )
73
-
74
-
75
-
76
- ## e.g. 2-1 (1-1) or
77
- ## 2-1
78
- ## note: for now add here used in Brazil / Portugal
79
- ## e.g 1x1 or 1X1 or 0x2 or 3x3 too
80
- ## todo/check/fix: move to its own use PT__FT_HT etc!!!!
81
-
82
- EN__FT_HT__RE = /\b
83
- (?<score1>\d{1,2})
84
- [ ]* (?<sep>[x-]) [ ]*
85
- (?<score2>\d{1,2})
86
- (?:
87
- [ ]* \( [ ]*
88
- (?<score1i>\d{1,2})
89
- [ ]* \k<sep> [ ]*
90
- (?<score2i>\d{1,2})
91
- [ ]* \)
92
- )? # note: make half time (HT) score optional for now
93
- (?=[ \]]|$)/xi ## todo/check: remove loakahead assertion here - why require space?
94
- ## note: \b works only after non-alphanum e.g. )
95
-
96
-
97
- #####
98
- # deutsch / german helpers (penalty, extra time, ...)
99
- ## todo add more marker e.g. im Elf. or such!!!
100
- P_DE = '(?: ie | i\.e\.? )' # e.g. iE, i.E., i.E etc.
101
- ET_DE = '(?: nv | n\.v\.? )' # e.g. nV, n.V., n.V etc.
102
-
103
-
104
- ## support alternate all-in-one score e.g.
105
- ## i.E. 2:4, n.V. 3:3 (1:1, 1:1) or
106
- ## n.V. 3:2 (2:2, 1:2)
107
- DE__P_ET_FT_HT__RE = /\b
108
- (?:
109
- #{P_DE}
110
- [ ]*
111
- (?<score1p>\d{1,2})
112
- [ ]* : [ ]*
113
- (?<score2p>\d{1,2})
114
- [ ]* (?:, [ ]*)?
115
- )? # note: make penalty (P) score optional for now
116
- #{ET_DE}
117
- [ ]*
118
- (?<score1et>\d{1,2})
119
- [ ]* : [ ]*
120
- (?<score2et>\d{1,2})
121
- [ ]*
122
- \(
123
- [ ]*
124
- (?<score1>\d{1,2})
125
- [ ]* : [ ]*
126
- (?<score2>\d{1,2})
127
- [ ]*
128
- (?:
129
- , [ ]*
130
- (?:
131
- (?<score1i>\d{1,2})
132
- [ ]* : [ ]*
133
- (?<score2i>\d{1,2})
134
- [ ]*
135
- )?
136
- )? # note: make half time (HT) score optional for now
137
- \)
138
- (?=[ \]]|$)
139
- /xi
140
-
141
- ## support all-in-one "literal form e.g.
142
- # 2:2 (1:1, 1:0) n.V. 5:1 i.E. or
143
- # 2-2 (1-1, 1-0) n.V. 5-1 i.E.
144
- DE__ET_FT_HT_P__RE = /\b
145
- (?<score1et>\d{1,2})
146
- [ ]* (?<sep>[:-]) [ ]* ## note: for now allow : or - as separator!!
147
- (?<score2et>\d{1,2})
148
- [ ]*
149
- \(
150
- [ ]*
151
- (?<score1>\d{1,2})
152
- [ ]* \k<sep> [ ]*
153
- (?<score2>\d{1,2})
154
- [ ]*
155
- (?:
156
- , [ ]*
157
- (?:
158
- (?<score1i>\d{1,2})
159
- [ ]* \k<sep> [ ]*
160
- (?<score2i>\d{1,2})
161
- [ ]*
162
- )?
163
- )? # note: make half time (HT) score optional for now
164
- \)
165
- [ ]*
166
- #{ET_DE}
167
- (?:
168
- [ ]*
169
- (?<score1p>\d{1,2})
170
- [ ]* \k<sep> [ ]*
171
- (?<score2p>\d{1,2})
172
- [ ]*
173
- #{P_DE}
174
- )? # note: make penalty (P) score optional for now
175
- (?=[ \]]|$)
176
- /xi ## todo/check: remove loakahead assertion here - why require space?
177
- ## note: \b works only after non-alphanum e.g. )
178
-
179
-
180
- ## e.g. 2:1 (1:1) or
181
- ## 2-1 (1-1) or
182
- ## 2:1 or
183
- ## 2-1
184
- DE__FT_HT__RE = /\b
185
- (?<score1>\d{1,2})
186
- [ ]* (?<sep>[:-]) [ ]*
187
- (?<score2>\d{1,2})
188
- (?:
189
- [ ]* \( [ ]*
190
- (?<score1i>\d{1,2})
191
- [ ]* \k<sep> [ ]*
192
- (?<score2i>\d{1,2})
193
- [ ]* \)
194
- )? # note: make half time (HT) score optional for now
195
- (?=[ \]]|$)/x ## todo/check: remove loakahead assertion here - why require space?
196
- ## note: \b works only after non-alphanum e.g. )
197
-
198
-
199
- #############################################
200
- # map tables - 1) regex, 2) tag - note: order matters; first come-first matched/served
201
-
202
-
203
- FORMATS_EN = [
204
- [ EN__P_ET_FT_HT__RE, '[SCORE.EN__P?_ET_(FT_HT?)]' ], # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
205
- [ EN__P_FT_HT__RE, '[SCORE.EN__P_(FT_HT?)]' ], # e.g. 5-1 pen. (1-1)
206
- [ EN__FT_HT__RE, '[SCORE.EN__FT_(HT)?]' ], # e.g. 1-1 (1-0)
207
- ]
208
-
209
- FORMATS_DE = [
210
- [ DE__ET_FT_HT_P__RE, '[SCORE.DE__ET_(FT_HT?)_P?]' ], # e.g. 2:2 (1:1, 1:0) n.V. 5:1 i.E.
211
- [ DE__P_ET_FT_HT__RE, '[SCORE.DE__P?_ET_(FT_HT?)]' ], # e.g. i.E. 2:4, n.V. 3:3 (1:1, 1:1)
212
- [ DE__FT_HT__RE, '[SCORE.DE__FT_(HT)?]' ], # e.g. 1:1 (1:0)
213
- ]
214
-
215
- FORMATS = {
216
- en: FORMATS_EN,
217
- de: FORMATS_DE,
218
- }
219
-
220
- end # module ScoreFormats
@@ -1,202 +0,0 @@
1
- # encoding: utf-8
2
-
3
-
4
- ## note: lets follow the model of DateFormats -see DateFormats gem for more!!!
5
-
6
-
7
- ## note: make Score top-level and use like Date - why? why not?
8
- class Score
9
-
10
- attr_reader :score1i, :score2i, # half time (ht) score
11
- :score1, :score2, # full time (ft) score
12
- :score1et, :score2et, # extra time (et) score
13
- :score1p, :score2p # penalty (p) score
14
- ## todo/fix: add :score1agg, score2agg too - why? why not?!!!
15
- ## add state too e.g. canceled or abadoned etc - why? why not?
16
-
17
- def initialize( *values )
18
- ## note: for now always assumes integers
19
- ## todo/check - check/require integer args - why? why not?
20
-
21
- @score1i = values[0] # half time (ht) score
22
- @score2i = values[1]
23
-
24
- @score1 = values[2] # full time (ft) score
25
- @score2 = values[3]
26
-
27
- @score1et = values[4] # extra time (et) score
28
- @score2et = values[5]
29
-
30
- @score1p = values[6] # penalty (p) score
31
- @score2p = values[7]
32
- end
33
-
34
- def to_a
35
- ## todo: how to handle game w/o extra time
36
- # but w/ optional penalty ??? e.g. used in copa liberatores, for example
37
- # retrun 0,0 or nil,nil for extra time score ?? or -1, -1 ??
38
- # for now use nil,nil
39
- score = []
40
- score += [score1i, score2i] if score1p || score2p || score1et || score2et || score1 || score2 || score1i || score2i
41
- score += [score1, score2] if score1p || score2p || score1et || score2et || score1 || score2
42
- score += [score1et, score2et] if score1p || score2p || score1et || score2et
43
- score += [score1p, score2p] if score1p || score2p
44
- score
45
- end
46
-
47
- end # class Score
48
-
49
-
50
-
51
- module ScoreFormats
52
-
53
- def self.lang
54
- @@lang ||= :en ## defaults to english (:en)
55
- end
56
- def self.lang=( value )
57
- @@lang = value.to_sym ## note: make sure lang is always a symbol for now (NOT a string)
58
- @@lang ## todo/check: remove =() method always returns passed in value? double check
59
- end
60
-
61
-
62
- def self.parser( lang: ) ## find parser
63
- lang = lang.to_sym ## note: make sure lang is always a symbol for now (NOT a string)
64
-
65
- ## note: cache all "built-in" lang versions (e.g. formats == nil)
66
- @@parser ||= {}
67
- parser = @@parser[ lang ] ||= ScoreParser.new( lang: lang )
68
- end
69
-
70
- def self.parse( line, lang: ScoreFormats.lang )
71
- parser( lang: lang ).parse( line )
72
- end
73
-
74
- def self.find!( line, lang: ScoreFormats.lang )
75
- parser( lang: lang ).find!( line )
76
- end
77
-
78
-
79
- class ScoreParser
80
-
81
- include LogUtils::Logging
82
-
83
- def initialize( lang: )
84
- @lang = lang.to_sym ## note: make sure lang is always a symbol for now (NOT a string)
85
-
86
- ## fallback to english if lang not available
87
- ## todo/fix: add/issue warning - why? why not?
88
- @formats = FORMATS[ @lang ] || FORMATS[ :en ]
89
- end
90
-
91
-
92
- def parse( line )
93
-
94
- ##########
95
- ## todo/fix/check: add unicode to regular dash conversion - why? why not?
96
- ## e.g. – becomes - (yes, the letters a different!!!)
97
- #############
98
-
99
- score = nil
100
- @formats.each do |format|
101
- re = format[0]
102
- m = re.match( line )
103
- if m
104
- score = parse_matchdata( m )
105
- break
106
- end
107
- # no match; continue; try next regex pattern
108
- end
109
-
110
- ## todo/fix - raise ArgumentError - invalid score; no format match found
111
- score # note: nil if no match found
112
- end # method parse
113
-
114
-
115
- def find!( line )
116
- ### fix: add and match all-in-one literal first, followed by
117
-
118
- # note: always call after find_dates !!!
119
- # scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
120
- # -- note: score might have two digits too
121
-
122
- ### fix: depending on language allow 1:1 or 1-1
123
- ## do NOT allow mix and match
124
- ## e.g. default to en is 1-1
125
- ## de is 1:1 etc.
126
-
127
-
128
- # extract score from line
129
- # and return it
130
- # note: side effect - removes date from line string
131
-
132
- score = nil
133
- @formats.each do |format|
134
- re = format[0]
135
- tag = format[1]
136
- m = re.match( line )
137
- if m
138
- score = parse_matchdata( m )
139
- line.sub!( m[0], tag )
140
- break
141
- end
142
- # no match; continue; try next regex pattern
143
- end
144
-
145
- score # note: nil if no match found
146
- end # method find!
147
-
148
- private
149
- def parse_matchdata( m )
150
- # convert regex match_data captures to hash
151
- # - note: cannont use match_data like a hash (e.g. raises exception if key/name not present/found)
152
- h = {}
153
- # - note: do NOT forget to turn name into symbol for lookup in new hash (name.to_sym)
154
- m.names.each { |name| h[name.to_sym] = m[name] } # or use match_data.names.zip( match_data.captures ) - more cryptic but "elegant"??
155
-
156
- ## puts "[parse_date_time] match_data:"
157
- ## pp h
158
- logger.debug " [parse_matchdata] hash: >#{h.inspect}<"
159
-
160
- score1i = nil # half time (ht) scores
161
- score2i = nil
162
-
163
- score1 = nil # full time (ft) scores
164
- score2 = nil
165
-
166
- score1et = nil # extra time (et) scores
167
- score2et = nil
168
-
169
- score1p = nil # penalty (p) scores
170
- score2p = nil
171
-
172
-
173
- if h[:score1i] && h[:score2i] ## note: half time (HT) score is optional now
174
- score1i = h[:score1i].to_i
175
- score2i = h[:score2i].to_i
176
- end
177
-
178
- score1 = h[:score1].to_i
179
- score2 = h[:score2].to_i
180
-
181
- if h[:score1et] && h[:score2et]
182
- score1et = h[:score1et].to_i
183
- score2et = h[:score2et].to_i
184
- end
185
-
186
- if h[:score1p] && h[:score2p]
187
- score1p = h[:score1p].to_i
188
- score2p = h[:score2p].to_i
189
- end
190
-
191
- score = Score.new( score1i, score2i,
192
- score1, score2,
193
- score1et, score2et,
194
- score1p, score2p )
195
- score
196
- end # method parse_matchdata
197
-
198
-
199
-
200
- end # class ScoreParser
201
- end # module ScoreFormats
202
-