sportdb-parser 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -1
  3. data/Manifest.txt +17 -4
  4. data/lib/sportdb/parser/lexer-on_goal.rb +172 -0
  5. data/lib/sportdb/parser/lexer-on_group_def.rb +31 -0
  6. data/lib/sportdb/parser/lexer-on_prop_lineup.rb +79 -0
  7. data/lib/sportdb/parser/lexer-on_prop_misc.rb +110 -0
  8. data/lib/sportdb/parser/lexer-on_prop_penalties.rb +40 -0
  9. data/lib/sportdb/parser/lexer-on_round_def.rb +37 -0
  10. data/lib/sportdb/parser/lexer-on_top.rb +125 -0
  11. data/lib/sportdb/parser/lexer-prep_doc.rb +131 -0
  12. data/lib/sportdb/parser/lexer-prep_line.rb +63 -0
  13. data/lib/sportdb/parser/lexer-tokenize.rb +449 -0
  14. data/lib/sportdb/parser/lexer.rb +133 -1363
  15. data/lib/sportdb/parser/lexer_buffer.rb +8 -37
  16. data/lib/sportdb/parser/lexer_token.rb +126 -0
  17. data/lib/sportdb/parser/parser.rb +1104 -1403
  18. data/lib/sportdb/parser/racc_parser.rb +36 -32
  19. data/lib/sportdb/parser/racc_tree.rb +65 -98
  20. data/lib/sportdb/parser/token-date--helpers.rb +130 -0
  21. data/lib/sportdb/parser/token-date--names.rb +108 -0
  22. data/lib/sportdb/parser/token-date.rb +20 -192
  23. data/lib/sportdb/parser/token-date_duration.rb +8 -27
  24. data/lib/sportdb/parser/token-geo.rb +16 -16
  25. data/lib/sportdb/parser/token-goals--helpers.rb +114 -0
  26. data/lib/sportdb/parser/token-goals.rb +103 -249
  27. data/lib/sportdb/parser/token-group.rb +8 -22
  28. data/lib/sportdb/parser/token-prop.rb +138 -124
  29. data/lib/sportdb/parser/token-prop_name.rb +48 -39
  30. data/lib/sportdb/parser/token-round.rb +21 -35
  31. data/lib/sportdb/parser/token-score--helpers.rb +189 -0
  32. data/lib/sportdb/parser/token-score.rb +9 -393
  33. data/lib/sportdb/parser/token-score_full.rb +331 -0
  34. data/lib/sportdb/parser/token-status.rb +44 -46
  35. data/lib/sportdb/parser/token-status_inline.rb +112 -0
  36. data/lib/sportdb/parser/token-text.rb +41 -31
  37. data/lib/sportdb/parser/token-time.rb +29 -26
  38. data/lib/sportdb/parser/token.rb +58 -159
  39. data/lib/sportdb/parser/version.rb +1 -1
  40. data/lib/sportdb/parser.rb +45 -17
  41. metadata +19 -6
  42. data/lib/sportdb/parser/blocktxt.rb +0 -99
  43. data/lib/sportdb/parser/lexer_tty.rb +0 -111
  44. data/lib/sportdb/parser/token-table.rb +0 -149
  45. data/lib/sportdb/parser/token_helpers.rb +0 -92
@@ -0,0 +1,108 @@
1
+ module SportDb
2
+ class Lexer
3
+
4
+
5
+ def self.parse_names( txt )
6
+ lines = [] # array of lines (with words)
7
+
8
+ txt.each_line do |line|
9
+ line = line.strip
10
+
11
+ next if line.empty?
12
+ next if line.start_with?( '#' ) ## skip comments too
13
+
14
+ ## strip inline (until end-of-line) comments too
15
+ ## e.g. Janvier Janv Jan ## check janv in use??
16
+ ## => Janvier Janv Jan
17
+
18
+ line = line.sub( /#.*/, '' ).strip
19
+ ## pp line
20
+
21
+ values = line.split( /[ \t]+/ )
22
+ ## pp values
23
+
24
+ ## todo/fix -- add check for duplicates
25
+ lines << values
26
+ end
27
+ lines
28
+
29
+ end # method parse
30
+
31
+
32
+ def self.build_names( lines )
33
+ ## join all words together into a single string e.g.
34
+ ## January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|...
35
+ lines.map { |line| line.join('|') }.join('|')
36
+ end
37
+
38
+
39
+ def self.build_map( lines, downcase: false )
40
+ ## note: downcase name!!!
41
+ ## build a lookup map that maps the word to the index (line no) plus 1 e.g.
42
+ ## {"january" => 1, "jan" => 1,
43
+ ## "february" => 2, "feb" => 2,
44
+ ## "march" => 3, "mar" => 3,
45
+ ## "april" => 4, "apr" => 4,
46
+ ## "may" => 5,
47
+ ## "june" => 6, "jun" => 6, ...
48
+ lines.each_with_index.reduce( {} ) do |h,(line,i)|
49
+ line.each do |name|
50
+ h[ downcase ? name.downcase : name ] = i+1
51
+ end ## note: start mapping with 1 (and NOT zero-based, that is, 0)
52
+ h
53
+ end
54
+ end
55
+
56
+
57
+
58
+
59
+ MONTH_LINES = parse_names( <<TXT )
60
+ January Jan
61
+ February Feb
62
+ March Mar
63
+ April Apr
64
+ May
65
+ June Jun
66
+ July Jul
67
+ August Aug
68
+ September Sept Sep
69
+ October Oct
70
+ November Nov
71
+ December Dec
72
+ TXT
73
+
74
+ MONTH_NAMES = build_names( MONTH_LINES )
75
+ # pp MONTH_NAMES
76
+ MONTH_MAP = build_map( MONTH_LINES, downcase: true )
77
+ # pp MONTH_MAP
78
+
79
+
80
+
81
+ DAY_LINES = parse_names( <<TXT )
82
+ Monday Mon Mo
83
+ Tuesday Tues Tue Tu
84
+ Wednesday Wed We
85
+ Thursday Thurs Thur Thu Th
86
+ Friday Fri Fr
87
+ Saturday Sat Sa
88
+ Sunday Sun Su
89
+ TXT
90
+
91
+ DAY_NAMES = build_names( DAY_LINES )
92
+ # pp DAY_NAMES
93
+ DAY_MAP = build_map( DAY_LINES, downcase: true )
94
+ # pp DAY_MAP
95
+
96
+
97
+ #=>
98
+ # "January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|
99
+ # July|Jul|August|Aug|September|Sept|Sep|October|Oct|
100
+ # November|Nov|December|Dec"
101
+ #
102
+ # "Monday|Mon|Mo|Tuesday|Tues|Tue|Tu|Wednesday|Wed|We|
103
+ # Thursday|Thurs|Thur|Thu|Th|Friday|Fri|Fr|
104
+ # Saturday|Sat|Sa|Sunday|Sun|Su"
105
+
106
+
107
+ end # class Lexer
108
+ end # module SportDb
@@ -3,112 +3,6 @@ class Lexer
3
3
 
4
4
 
5
5
 
6
- def self.parse_names( txt )
7
- lines = [] # array of lines (with words)
8
-
9
- txt.each_line do |line|
10
- line = line.strip
11
-
12
- next if line.empty?
13
- next if line.start_with?( '#' ) ## skip comments too
14
-
15
- ## strip inline (until end-of-line) comments too
16
- ## e.g. Janvier Janv Jan ## check janv in use??
17
- ## => Janvier Janv Jan
18
-
19
- line = line.sub( /#.*/, '' ).strip
20
- ## pp line
21
-
22
- values = line.split( /[ \t]+/ )
23
- ## pp values
24
-
25
- ## todo/fix -- add check for duplicates
26
- lines << values
27
- end
28
- lines
29
-
30
- end # method parse
31
-
32
-
33
- def self.build_names( lines )
34
- ## join all words together into a single string e.g.
35
- ## January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|...
36
- lines.map { |line| line.join('|') }.join('|')
37
- end
38
-
39
-
40
- def self.build_map( lines, downcase: false )
41
- ## note: downcase name!!!
42
- ## build a lookup map that maps the word to the index (line no) plus 1 e.g.
43
- ## {"january" => 1, "jan" => 1,
44
- ## "february" => 2, "feb" => 2,
45
- ## "march" => 3, "mar" => 3,
46
- ## "april" => 4, "apr" => 4,
47
- ## "may" => 5,
48
- ## "june" => 6, "jun" => 6, ...
49
- lines.each_with_index.reduce( {} ) do |h,(line,i)|
50
- line.each do |name|
51
- h[ downcase ? name.downcase : name ] = i+1
52
- end ## note: start mapping with 1 (and NOT zero-based, that is, 0)
53
- h
54
- end
55
- end
56
-
57
-
58
-
59
-
60
- MONTH_LINES = parse_names( <<TXT )
61
- January Jan
62
- February Feb
63
- March Mar
64
- April Apr
65
- May
66
- June Jun
67
- July Jul
68
- August Aug
69
- September Sept Sep
70
- October Oct
71
- November Nov
72
- December Dec
73
- TXT
74
-
75
- MONTH_NAMES = build_names( MONTH_LINES )
76
- # pp MONTH_NAMES
77
- MONTH_MAP = build_map( MONTH_LINES, downcase: true )
78
- # pp MONTH_MAP
79
-
80
-
81
-
82
- DAY_LINES = parse_names( <<TXT )
83
- Monday Mon Mo
84
- Tuesday Tues Tue Tu
85
- Wednesday Wed We
86
- Thursday Thurs Thur Thu Th
87
- Friday Fri Fr
88
- Saturday Sat Sa
89
- Sunday Sun Su
90
- TXT
91
-
92
- DAY_NAMES = build_names( DAY_LINES )
93
- # pp DAY_NAMES
94
- DAY_MAP = build_map( DAY_LINES, downcase: true )
95
- # pp DAY_MAP
96
-
97
-
98
- #=>
99
- # "January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|
100
- # July|Jul|August|Aug|September|Sept|Sep|October|Oct|
101
- # November|Nov|December|Dec"
102
- #
103
- # "Monday|Mon|Mo|Tuesday|Tues|Tue|Tu|Wednesday|Wed|We|
104
- # Thursday|Thurs|Thur|Thu|Th|Friday|Fri|Fr|
105
- # Saturday|Sat|Sa|Sunday|Sun|Su"
106
-
107
-
108
-
109
- ## todo - add more date variants !!!! why? why not?
110
-
111
-
112
6
  # e.g. Fri Aug 9
113
7
  # Fri Aug 9
114
8
  ## Fri, Aug 9
@@ -118,7 +12,7 @@ DAY_MAP = build_map( DAY_LINES, downcase: true )
118
12
  ## Aug 9, 2024
119
13
  ## note - eat-up optional comma after DAY_NAMES!!
120
14
  ##
121
- ## note - Fri Aug/9 no longer supported!!!
15
+ ## note - Fri Aug/9 no longer supported!!!
122
16
  DATE_I_RE = %r{
123
17
  (?<date>
124
18
  \b
@@ -127,12 +21,12 @@ DATE_I_RE = %r{
127
21
  (?: ,?[ ]+)
128
22
  )?
129
23
  (?<month_name>#{MONTH_NAMES})
130
- [ ]
24
+ [ ]
131
25
  (?<day>\d{1,2})
132
26
  \b
133
27
  ## optional year
134
28
  ( ,? [ ] ## note - comma optinal with single space required for now
135
- (?<year>\d{4}) ## optional year 2025 (yyyy)
29
+ (?<year>\d{4}) ## optional year 2025 (yyyy)
136
30
  \b
137
31
  )?
138
32
  )}ix
@@ -146,13 +40,13 @@ DATE_LEGS_I_RE = %r{
146
40
  (?<date_legs>
147
41
  \b
148
42
  (?<month_name1>#{MONTH_NAMES})
149
- [ ]
43
+ [ ]
150
44
  (?<day1>\d{1,2})
151
45
  [ ] & [ ]
152
46
  (?:
153
47
  (?<month_name2>#{MONTH_NAMES})
154
- [ ]
155
- )? ## note - make 2nd month_name optional
48
+ [ ]
49
+ )? ## note - make 2nd month_name optional
156
50
  (?<day2>\d{1,2})
157
51
  \b
158
52
  )}ix
@@ -161,10 +55,10 @@ DATE_LEGS_I_RE = %r{
161
55
  ###
162
56
  # e.g. 3 June or 10 June
163
57
  ## note - allow more spaces between DAY_NAMES and DAY e.g.
164
- ## Sun 1 Mar
165
- ## Wed 4 Mar
166
- ## Sat 14 Mar
167
- ## Sat 11 Apr
58
+ ## Sun 1 Mar
59
+ ## Wed 4 Mar
60
+ ## Sat 14 Mar
61
+ ## Sat 11 Apr
168
62
  ## Sat 11 Apr 2021
169
63
  ## Sat 11 Apr 21
170
64
  ##
@@ -187,7 +81,7 @@ DATE_II_RE = %r{
187
81
  \b
188
82
  ## optional year
189
83
  ( [ ]
190
- (?:
84
+ (?:
191
85
  (?<year>\d{4}) ## optional year 2025 (yyyy)
192
86
  |
193
87
  (?:
@@ -196,13 +90,13 @@ DATE_II_RE = %r{
196
90
  (?! :|[:h]\d{2})
197
91
  )
198
92
  )
199
- \b
93
+ \b
200
94
  )?
201
95
  )}ix
202
96
 
203
97
 
204
- # e.g. iso-date - 2011-08-25
205
- ## note - allow/support ("shortcuts") e.g 2011-8-25 or 2011-8-3 / 2011-08-03 etc.
98
+ # e.g. iso-date - 2011-08-25
99
+ ## note - allow/support ("shortcuts") e.g 2011-8-25 or 2011-8-3 / 2011-08-03 etc.
206
100
  DATE_III_A_RE = %r{
207
101
  (?<date>
208
102
  \b
@@ -232,20 +126,20 @@ DATE_III_B_RE = %r{
232
126
 
233
127
 
234
128
 
235
- ## allow (short)"european" style 8.8.
129
+ ## allow (short)"european" style 8.8.
236
130
  ## note - assume day/month!!!
237
131
  DATE_IIII_RE = %r{
238
132
  (?<date>
239
133
  \b
240
134
  ## optional day name
241
135
  ((?<day_name>#{DAY_NAMES})
242
- (?: ,?[ ]+)
136
+ (?: ,?[ ]+)
243
137
  )?
244
138
  (?<day>\d{1,2})
245
139
  \.
246
140
  (?<month>\d{1,2})
247
141
  \.
248
- (?: (?:
142
+ (?: (?:
249
143
  (?<year>\d{4}) ## optional year 2025 (yyyy)
250
144
  |
251
145
  (?<yy>\d{2}) ## optional year 25 (yy)
@@ -271,9 +165,9 @@ DATE_IIIII_RE = %r{
271
165
  /
272
166
  (?<month>\d{1,2})
273
167
  \b
274
- (?:
168
+ (?:
275
169
  /
276
- (?:
170
+ (?:
277
171
  (?<year>\d{4}) ## optional year 2025 (yyyy)
278
172
  |
279
173
  (?<yy>\d{2}) ## optional year 25 (yy)
@@ -293,7 +187,7 @@ DATE_RE = Regexp.union(
293
187
  DATE_II_RE,
294
188
  DATE_III_A_RE, ## e.g. 1973-08-14
295
189
  DATE_III_B_RE,
296
- DATE_IIII_RE, ## e.g. 8.8. or 8.13.79 or 08.14.1973
190
+ DATE_IIII_RE, ## e.g. 8.8. or 8.13.79 or 08.14.1973
297
191
  DATE_IIIII_RE, ## e.g. 08/14/1973
298
192
  )
299
193
 
@@ -301,72 +195,6 @@ DATE_RE = Regexp.union(
301
195
  DATE_LEGS_RE = DATE_LEGS_I_RE
302
196
 
303
197
 
304
- ## "internal" date helpers
305
- def self._build_date( m )
306
- date = {}
307
- ## map month names
308
- ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
309
- date[:y] = m[:year].to_i(10) if m[:year]
310
- ## check - use y too for two-digit year or keep separate - why? why not?
311
- date[:yy] = m[:yy].to_i(10) if m[:yy] ## two digit year (e.g. 25 or 78 etc.)
312
- date[:m] = m[:month].to_i(10) if m[:month]
313
- date[:m] = MONTH_MAP[ m[:month_name].downcase ] if m[:month_name]
314
- date[:d] = m[:day].to_i(10) if m[:day]
315
- date[:wday] = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
316
-
317
- date
318
- end
319
- def _build_date( m ) self.class._build_date( m ); end
320
-
321
- def self._build_date_legs( m )
322
- legs = {}
323
- ## map month names
324
- ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
325
- date = {}
326
- date[:m] = MONTH_MAP[ m[:month_name1].downcase ]
327
- date[:d] = m[:day1].to_i(10)
328
- legs[:date1] = date
329
-
330
- date = {}
331
- date[:m] = MONTH_MAP[ m[:month_name2].downcase ] if m[:month_name2]
332
- date[:d] = m[:day2].to_i(10)
333
- legs[:date2] = date
334
-
335
- legs
336
- end
337
- def _build_date_legs( m ) self.class._build_date_legs( m ); end
338
-
339
-
340
-
341
-
342
- #############
343
- ## "top-level" add a date parser helper
344
- def self.parse_date( str, start: )
345
- if m=DATE_RE.match( str )
346
-
347
- year = m[:year].to_i(10) if m[:year]
348
- month = MONTH_MAP[ m[:month_name].downcase ] if m[:month_name]
349
- day = m[:day].to_i(10) if m[:day]
350
- wday = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
351
-
352
- if year.nil? ## try to calculate year
353
- year = if month > start.month ||
354
- (month == start.month && day >= start.day)
355
- # assume same year as start_at event (e.g. 2013 for 2013/14 season)
356
- start.year
357
- else
358
- # assume year+1 as start_at event (e.g. 2014 for 2013/14 season)
359
- start.year+1
360
- end
361
- end
362
- Date.new( year,month,day )
363
- else
364
- puts "!! ERROR - unexpected date format; cannot parse >#{str}<"
365
- exit 1
366
- end
367
- end
368
-
369
198
 
370
199
  end # class Lexer
371
200
  end # module SportDb
372
-
@@ -52,7 +52,7 @@ DURATION_I_RE = %r{
52
52
  [ ]
53
53
  )?
54
54
  (?<month_name1>#{MONTH_NAMES})
55
- [ ]
55
+ [ ]
56
56
  (?<day1>\d{1,2})
57
57
  ## optional year
58
58
  ( ,? # optional comma
@@ -68,7 +68,7 @@ DURATION_I_RE = %r{
68
68
  [ ]
69
69
  )?
70
70
  (?<month_name2>#{MONTH_NAMES})
71
- [ ]
71
+ [ ]
72
72
  (?<day2>\d{1,2})
73
73
  ## optional year
74
74
  ( ,? # optional comma
@@ -81,14 +81,14 @@ DURATION_I_RE = %r{
81
81
 
82
82
 
83
83
 
84
- # FIX - remove this variant
84
+ # FIX - remove this variant
85
85
  # "standardize on month day [year]" !!!!
86
86
 
87
87
  =begin
88
88
  ###
89
89
  # variant ii
90
90
  # e.g. 26 July - 27 July
91
- # 26 July,
91
+ # 26 July,
92
92
  XXX_DURATION_II_RE = %r{
93
93
  (?<duration>
94
94
  \b
@@ -101,7 +101,7 @@ XXX_DURATION_II_RE = %r{
101
101
  [ ]
102
102
  (?<month_name1>#{MONTH_NAMES})
103
103
  ## optional year
104
- (
104
+ (
105
105
  [ ]
106
106
  (?<year1>\d{4})
107
107
  )?
@@ -128,12 +128,12 @@ XXX_DURATION_II_RE = %r{
128
128
 
129
129
  # variant ii
130
130
  # add support for shorthand
131
- # August 16-18, 2011
131
+ # August 16-18, 2011
132
132
  # September 13-15, 2011
133
133
  # October 18-20, 2011
134
134
  # March 6-8 2012
135
135
  # March 6-8
136
- #
136
+ #
137
137
  # - add support for August 16+17 or such (and check 16+18)
138
138
  # use <op> to check if day2 is a plus or range or such - why? why not?
139
139
 
@@ -150,7 +150,7 @@ DURATION_II_RE = %r{
150
150
  ,? ## optional comma
151
151
  [ ]
152
152
  (?<year1>\d{4})
153
- )? ## optional year
153
+ )? ## optional year
154
154
  )
155
155
  \b
156
156
  )}ix
@@ -166,25 +166,6 @@ DURATION_RE = Regexp.union(
166
166
  )
167
167
 
168
168
 
169
- def self._build_duration( m )
170
- ## todo/check/fix - if end: works for kwargs!!!!!
171
- duration = { start: {}, end: {}}
172
-
173
- duration[:start][:y] = m[:year1].to_i(10) if m[:year1]
174
- duration[:start][:m] = MONTH_MAP[ m[:month_name1].downcase ] if m[:month_name1]
175
- duration[:start][:d] = m[:day1].to_i(10) if m[:day1]
176
- duration[:start][:wday] = DAY_MAP[ m[:day_name1].downcase ] if m[:day_name1]
177
-
178
- duration[:end][:y] = m[:year2].to_i(10) if m[:year2]
179
- duration[:end][:m] = MONTH_MAP[ m[:month_name2].downcase ] if m[:month_name2]
180
- duration[:end][:d] = m[:day2].to_i(10) if m[:day2]
181
- duration[:end][:wday] = DAY_MAP[ m[:day_name2].downcase ] if m[:day_name2]
182
-
183
- duration
184
- end
185
- def _build_duration(m) self.class._build_duration( m ); end
186
-
187
169
 
188
170
  end # class Lexer
189
171
  end # module SportDb
190
-
@@ -21,7 +21,7 @@ GEO_TEXT_RE = %r{
21
21
  # opt 1 - start with alpha
22
22
  \p{L}+ ## all unicode letters (e.g. [a-z])
23
23
  |
24
- # opt 2 - start with num!! -
24
+ # opt 2 - start with num!! -
25
25
  \d+ # check for num lookahead (MUST be space or dot)
26
26
  ## MAY be followed by (optional space) !
27
27
  ## MUST be follow by a to z!!!!
@@ -37,11 +37,11 @@ GEO_TEXT_RE = %r{
37
37
 
38
38
  ##
39
39
  ## todo/check - find a different "more intuitive" regex/rule if possible?
40
- ## for single spaces only (and _/ MUST not be surround by spaces)
40
+ ## for single spaces only (and _/ MUST not be surround by spaces)
41
41
 
42
- (?:
42
+ (?:
43
43
  (?:
44
- [ ]? # only single (inline) space allowed - double spaces are breaks!!!
44
+ [ ]? # only single (inline) space allowed - double spaces are breaks!!!
45
45
  (?:
46
46
  \p{L} | \d | [.&'°]
47
47
  |
@@ -64,7 +64,7 @@ GEO_TEXT_RE = %r{
64
64
  ## Ost-Berlin (Walter-Ulbricht)
65
65
  ## Athinai (OAKA - Maroussi)
66
66
  ##
67
- ## or Valencia (Spain) or Solna
67
+ ## or Valencia (Spain) or Solna
68
68
  (?:
69
69
  [ ]
70
70
  \(
@@ -93,20 +93,14 @@ GEO_TEXT_RE = %r{
93
93
 
94
94
 
95
95
 
96
- GEO_BASICS_RE = %r{
97
- (?<spaces> [ ]{2,}) |
98
- (?<space> [ ])
99
- |
100
- (?<sym> [,›>\[] )
101
- }ix
102
96
 
103
97
 
104
98
  ## note - add "hacky" check for comma that is followed by a prop(erty)
105
99
  ##
106
100
  ## make sure to NOT match
107
101
  ## props e.g. att: 18000
108
- ## July 10 @ Paris, Parc des Princes, att: 18000
109
- ## July 10 @ Paris, Parc des Princes, att: 18000
102
+ ## July 10 @ Paris, Parc des Princes, att: 18000
103
+ ## July 10 @ Paris, Parc des Princes, att: 18000
110
104
  ##
111
105
 
112
106
 
@@ -115,18 +109,24 @@ GEO_END_RE = %r{
115
109
  ,
116
110
  )
117
111
  ## POSITIVE lookahead for props
118
- (?=
112
+ ## todo/fix - use generic [a-z]+ - why? why not?
113
+ (?=
119
114
  [ ]* ## optional spaces
120
- (?: att|ref) ## todo/fix - use generic [a-z]+ - why? why not?
115
+ (?: attendance|att
116
+ | referee?s|refs?
117
+ )
121
118
  :
122
119
  )
123
120
  }ix
124
121
 
125
122
 
123
+
124
+
126
125
  GEO_RE = Regexp.union(
126
+ SPACES_RE,
127
127
  GEO_END_RE,
128
- GEO_BASICS_RE,
129
128
  GEO_TEXT_RE,
129
+ / (?<sym> [,›>\[] ) /x,
130
130
  ANY_RE,
131
131
  )
132
132
 
@@ -0,0 +1,114 @@
1
+ module SportDb
2
+ class Lexer
3
+
4
+
5
+
6
+ def self._build_goal_minute( m )
7
+ minute = {}
8
+
9
+ minute[:m] = m[:value].to_i(10) ## always required
10
+
11
+ ## stoppage/injury time (offset)
12
+ minute[:offset] = m[:value2].to_i(10) if m[:value2]
13
+
14
+ minute[:og] = true if m[:og]
15
+ minute[:pen] = true if m[:pen]
16
+ minute[:freekick] = true if m[:fk]
17
+ minute[:header] = true if m[:hdr]
18
+
19
+ minute[:secs] = m[:secs].to_i(10) if m[:secs]
20
+
21
+ minute
22
+ end
23
+
24
+ def self._build_goal_minute_na( m )
25
+ minute = {}
26
+
27
+ minute[:m] = '?' ## or use nil or 999 or -1 or ???
28
+
29
+ minute[:og] = true if m[:og]
30
+ minute[:pen] = true if m[:pen]
31
+ minute[:freekick] = true if m[:fk]
32
+ minute[:header] = true if m[:hdr]
33
+
34
+ minute
35
+ end
36
+
37
+
38
+
39
+ def self._build_minute( m )
40
+ minute = {}
41
+ minute[:m] = m[:value].to_i(10) ## always required
42
+
43
+ ## stoppage/injury time (offset)
44
+ minute[:offset] = m[:value2].to_i(10) if m[:value2]
45
+
46
+ minute
47
+ end
48
+
49
+
50
+ def self._build_goal_count( m )
51
+ count = {}
52
+ count[:count] = m[:value].to_i(10) if m[:value]
53
+ count[:og] = m[:og_value] ? m[:og_value].to_i(10) : 1 if m[:og] ## check flag
54
+ count[:pen] = m[:pen_value] ? m[:pen_value].to_i(10) : 1 if m[:pen] ## check flag
55
+ count
56
+ end
57
+
58
+ def self._build_goal_type( m )
59
+ goal = {}
60
+ goal[:og] = true if m[:og]
61
+ goal[:pen] = true if m[:pen]
62
+ goal[:freekick] = true if m[:fk]
63
+ goal[:header] = true if m[:hdr]
64
+ goal
65
+ end
66
+
67
+
68
+ def _build_goal_minute( m ) self.class._build_goal_minute( m ); end
69
+ def _build_goal_minute_na( m ) self.class._build_goal_minute_na( m ); end
70
+ def _build_minute( m ) self.class._build_minute( m ); end
71
+ def _build_goal_count( m ) self.class._build_goal_count( m ); end
72
+ def _build_goal_type( m ) self.class._build_goal_type( m ); end
73
+
74
+
75
+
76
+
77
+
78
+ ###
79
+ # parse helpers
80
+
81
+ def self._parse_goal_minute( str )
82
+ ## note - strip - leading/trailing spaces
83
+ m = GOAL_MINUTE_RE.match( str.strip )
84
+ if m && m.pre_match == '' && m.post_match == ''
85
+ _build_goal_minute( m )
86
+ elsif m
87
+ ## note - match BUT not anchored to start and end-of-string!!!
88
+ ## report, error somehow??
89
+ nil
90
+ else
91
+ nil ## no match - return nil
92
+ end
93
+ end
94
+
95
+ def self._parse_goal_count( str )
96
+ ## note - strip - leading/trailing spaces
97
+ m = GOAL_COUNT_RE.match( str.strip )
98
+ if m && m.pre_match == '' && m.post_match == ''
99
+ _build_goal_count( m )
100
+ elsif m
101
+ ## note - match BUT not anchored to start and end-of-string!!!
102
+ ## report, error somehow??
103
+ nil
104
+ else
105
+ nil ## no match - return nil
106
+ end
107
+ end
108
+
109
+
110
+
111
+
112
+
113
+ end # class Lexer
114
+ end # module SportDb