sportdb-parser 0.7.1 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +17 -4
- data/lib/sportdb/parser/lexer-on_goal.rb +172 -0
- data/lib/sportdb/parser/lexer-on_group_def.rb +31 -0
- data/lib/sportdb/parser/lexer-on_prop_lineup.rb +79 -0
- data/lib/sportdb/parser/lexer-on_prop_misc.rb +110 -0
- data/lib/sportdb/parser/lexer-on_prop_penalties.rb +40 -0
- data/lib/sportdb/parser/lexer-on_round_def.rb +37 -0
- data/lib/sportdb/parser/lexer-on_top.rb +125 -0
- data/lib/sportdb/parser/lexer-prep_doc.rb +131 -0
- data/lib/sportdb/parser/lexer-prep_line.rb +63 -0
- data/lib/sportdb/parser/lexer-tokenize.rb +449 -0
- data/lib/sportdb/parser/lexer.rb +133 -1363
- data/lib/sportdb/parser/lexer_buffer.rb +8 -37
- data/lib/sportdb/parser/lexer_token.rb +126 -0
- data/lib/sportdb/parser/parser.rb +1104 -1403
- data/lib/sportdb/parser/racc_parser.rb +36 -32
- data/lib/sportdb/parser/racc_tree.rb +65 -98
- data/lib/sportdb/parser/token-date--helpers.rb +130 -0
- data/lib/sportdb/parser/token-date--names.rb +108 -0
- data/lib/sportdb/parser/token-date.rb +20 -192
- data/lib/sportdb/parser/token-date_duration.rb +8 -27
- data/lib/sportdb/parser/token-geo.rb +16 -16
- data/lib/sportdb/parser/token-goals--helpers.rb +114 -0
- data/lib/sportdb/parser/token-goals.rb +103 -249
- data/lib/sportdb/parser/token-group.rb +8 -22
- data/lib/sportdb/parser/token-prop.rb +138 -124
- data/lib/sportdb/parser/token-prop_name.rb +48 -39
- data/lib/sportdb/parser/token-round.rb +21 -35
- data/lib/sportdb/parser/token-score--helpers.rb +189 -0
- data/lib/sportdb/parser/token-score.rb +9 -393
- data/lib/sportdb/parser/token-score_full.rb +331 -0
- data/lib/sportdb/parser/token-status.rb +44 -46
- data/lib/sportdb/parser/token-status_inline.rb +112 -0
- data/lib/sportdb/parser/token-text.rb +41 -31
- data/lib/sportdb/parser/token-time.rb +29 -26
- data/lib/sportdb/parser/token.rb +58 -159
- data/lib/sportdb/parser/version.rb +1 -1
- data/lib/sportdb/parser.rb +45 -17
- metadata +19 -6
- data/lib/sportdb/parser/blocktxt.rb +0 -99
- data/lib/sportdb/parser/lexer_tty.rb +0 -111
- data/lib/sportdb/parser/token-table.rb +0 -149
- data/lib/sportdb/parser/token_helpers.rb +0 -92
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
module SportDb
|
|
2
|
+
class Lexer
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def self.parse_names( txt )
|
|
6
|
+
lines = [] # array of lines (with words)
|
|
7
|
+
|
|
8
|
+
txt.each_line do |line|
|
|
9
|
+
line = line.strip
|
|
10
|
+
|
|
11
|
+
next if line.empty?
|
|
12
|
+
next if line.start_with?( '#' ) ## skip comments too
|
|
13
|
+
|
|
14
|
+
## strip inline (until end-of-line) comments too
|
|
15
|
+
## e.g. Janvier Janv Jan ## check janv in use??
|
|
16
|
+
## => Janvier Janv Jan
|
|
17
|
+
|
|
18
|
+
line = line.sub( /#.*/, '' ).strip
|
|
19
|
+
## pp line
|
|
20
|
+
|
|
21
|
+
values = line.split( /[ \t]+/ )
|
|
22
|
+
## pp values
|
|
23
|
+
|
|
24
|
+
## todo/fix -- add check for duplicates
|
|
25
|
+
lines << values
|
|
26
|
+
end
|
|
27
|
+
lines
|
|
28
|
+
|
|
29
|
+
end # method parse
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def self.build_names( lines )
|
|
33
|
+
## join all words together into a single string e.g.
|
|
34
|
+
## January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|...
|
|
35
|
+
lines.map { |line| line.join('|') }.join('|')
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def self.build_map( lines, downcase: false )
|
|
40
|
+
## note: downcase name!!!
|
|
41
|
+
## build a lookup map that maps the word to the index (line no) plus 1 e.g.
|
|
42
|
+
## {"january" => 1, "jan" => 1,
|
|
43
|
+
## "february" => 2, "feb" => 2,
|
|
44
|
+
## "march" => 3, "mar" => 3,
|
|
45
|
+
## "april" => 4, "apr" => 4,
|
|
46
|
+
## "may" => 5,
|
|
47
|
+
## "june" => 6, "jun" => 6, ...
|
|
48
|
+
lines.each_with_index.reduce( {} ) do |h,(line,i)|
|
|
49
|
+
line.each do |name|
|
|
50
|
+
h[ downcase ? name.downcase : name ] = i+1
|
|
51
|
+
end ## note: start mapping with 1 (and NOT zero-based, that is, 0)
|
|
52
|
+
h
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
MONTH_LINES = parse_names( <<TXT )
|
|
60
|
+
January Jan
|
|
61
|
+
February Feb
|
|
62
|
+
March Mar
|
|
63
|
+
April Apr
|
|
64
|
+
May
|
|
65
|
+
June Jun
|
|
66
|
+
July Jul
|
|
67
|
+
August Aug
|
|
68
|
+
September Sept Sep
|
|
69
|
+
October Oct
|
|
70
|
+
November Nov
|
|
71
|
+
December Dec
|
|
72
|
+
TXT
|
|
73
|
+
|
|
74
|
+
MONTH_NAMES = build_names( MONTH_LINES )
|
|
75
|
+
# pp MONTH_NAMES
|
|
76
|
+
MONTH_MAP = build_map( MONTH_LINES, downcase: true )
|
|
77
|
+
# pp MONTH_MAP
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
DAY_LINES = parse_names( <<TXT )
|
|
82
|
+
Monday Mon Mo
|
|
83
|
+
Tuesday Tues Tue Tu
|
|
84
|
+
Wednesday Wed We
|
|
85
|
+
Thursday Thurs Thur Thu Th
|
|
86
|
+
Friday Fri Fr
|
|
87
|
+
Saturday Sat Sa
|
|
88
|
+
Sunday Sun Su
|
|
89
|
+
TXT
|
|
90
|
+
|
|
91
|
+
DAY_NAMES = build_names( DAY_LINES )
|
|
92
|
+
# pp DAY_NAMES
|
|
93
|
+
DAY_MAP = build_map( DAY_LINES, downcase: true )
|
|
94
|
+
# pp DAY_MAP
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
#=>
|
|
98
|
+
# "January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|
|
|
99
|
+
# July|Jul|August|Aug|September|Sept|Sep|October|Oct|
|
|
100
|
+
# November|Nov|December|Dec"
|
|
101
|
+
#
|
|
102
|
+
# "Monday|Mon|Mo|Tuesday|Tues|Tue|Tu|Wednesday|Wed|We|
|
|
103
|
+
# Thursday|Thurs|Thur|Thu|Th|Friday|Fri|Fr|
|
|
104
|
+
# Saturday|Sat|Sa|Sunday|Sun|Su"
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
end # class Lexer
|
|
108
|
+
end # module SportDb
|
|
@@ -3,112 +3,6 @@ class Lexer
|
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
def self.parse_names( txt )
|
|
7
|
-
lines = [] # array of lines (with words)
|
|
8
|
-
|
|
9
|
-
txt.each_line do |line|
|
|
10
|
-
line = line.strip
|
|
11
|
-
|
|
12
|
-
next if line.empty?
|
|
13
|
-
next if line.start_with?( '#' ) ## skip comments too
|
|
14
|
-
|
|
15
|
-
## strip inline (until end-of-line) comments too
|
|
16
|
-
## e.g. Janvier Janv Jan ## check janv in use??
|
|
17
|
-
## => Janvier Janv Jan
|
|
18
|
-
|
|
19
|
-
line = line.sub( /#.*/, '' ).strip
|
|
20
|
-
## pp line
|
|
21
|
-
|
|
22
|
-
values = line.split( /[ \t]+/ )
|
|
23
|
-
## pp values
|
|
24
|
-
|
|
25
|
-
## todo/fix -- add check for duplicates
|
|
26
|
-
lines << values
|
|
27
|
-
end
|
|
28
|
-
lines
|
|
29
|
-
|
|
30
|
-
end # method parse
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def self.build_names( lines )
|
|
34
|
-
## join all words together into a single string e.g.
|
|
35
|
-
## January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|...
|
|
36
|
-
lines.map { |line| line.join('|') }.join('|')
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def self.build_map( lines, downcase: false )
|
|
41
|
-
## note: downcase name!!!
|
|
42
|
-
## build a lookup map that maps the word to the index (line no) plus 1 e.g.
|
|
43
|
-
## {"january" => 1, "jan" => 1,
|
|
44
|
-
## "february" => 2, "feb" => 2,
|
|
45
|
-
## "march" => 3, "mar" => 3,
|
|
46
|
-
## "april" => 4, "apr" => 4,
|
|
47
|
-
## "may" => 5,
|
|
48
|
-
## "june" => 6, "jun" => 6, ...
|
|
49
|
-
lines.each_with_index.reduce( {} ) do |h,(line,i)|
|
|
50
|
-
line.each do |name|
|
|
51
|
-
h[ downcase ? name.downcase : name ] = i+1
|
|
52
|
-
end ## note: start mapping with 1 (and NOT zero-based, that is, 0)
|
|
53
|
-
h
|
|
54
|
-
end
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
MONTH_LINES = parse_names( <<TXT )
|
|
61
|
-
January Jan
|
|
62
|
-
February Feb
|
|
63
|
-
March Mar
|
|
64
|
-
April Apr
|
|
65
|
-
May
|
|
66
|
-
June Jun
|
|
67
|
-
July Jul
|
|
68
|
-
August Aug
|
|
69
|
-
September Sept Sep
|
|
70
|
-
October Oct
|
|
71
|
-
November Nov
|
|
72
|
-
December Dec
|
|
73
|
-
TXT
|
|
74
|
-
|
|
75
|
-
MONTH_NAMES = build_names( MONTH_LINES )
|
|
76
|
-
# pp MONTH_NAMES
|
|
77
|
-
MONTH_MAP = build_map( MONTH_LINES, downcase: true )
|
|
78
|
-
# pp MONTH_MAP
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
DAY_LINES = parse_names( <<TXT )
|
|
83
|
-
Monday Mon Mo
|
|
84
|
-
Tuesday Tues Tue Tu
|
|
85
|
-
Wednesday Wed We
|
|
86
|
-
Thursday Thurs Thur Thu Th
|
|
87
|
-
Friday Fri Fr
|
|
88
|
-
Saturday Sat Sa
|
|
89
|
-
Sunday Sun Su
|
|
90
|
-
TXT
|
|
91
|
-
|
|
92
|
-
DAY_NAMES = build_names( DAY_LINES )
|
|
93
|
-
# pp DAY_NAMES
|
|
94
|
-
DAY_MAP = build_map( DAY_LINES, downcase: true )
|
|
95
|
-
# pp DAY_MAP
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
#=>
|
|
99
|
-
# "January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|
|
|
100
|
-
# July|Jul|August|Aug|September|Sept|Sep|October|Oct|
|
|
101
|
-
# November|Nov|December|Dec"
|
|
102
|
-
#
|
|
103
|
-
# "Monday|Mon|Mo|Tuesday|Tues|Tue|Tu|Wednesday|Wed|We|
|
|
104
|
-
# Thursday|Thurs|Thur|Thu|Th|Friday|Fri|Fr|
|
|
105
|
-
# Saturday|Sat|Sa|Sunday|Sun|Su"
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
## todo - add more date variants !!!! why? why not?
|
|
110
|
-
|
|
111
|
-
|
|
112
6
|
# e.g. Fri Aug 9
|
|
113
7
|
# Fri Aug 9
|
|
114
8
|
## Fri, Aug 9
|
|
@@ -118,7 +12,7 @@ DAY_MAP = build_map( DAY_LINES, downcase: true )
|
|
|
118
12
|
## Aug 9, 2024
|
|
119
13
|
## note - eat-up optional comma after DAY_NAMES!!
|
|
120
14
|
##
|
|
121
|
-
## note - Fri Aug/9 no longer supported!!!
|
|
15
|
+
## note - Fri Aug/9 no longer supported!!!
|
|
122
16
|
DATE_I_RE = %r{
|
|
123
17
|
(?<date>
|
|
124
18
|
\b
|
|
@@ -127,12 +21,12 @@ DATE_I_RE = %r{
|
|
|
127
21
|
(?: ,?[ ]+)
|
|
128
22
|
)?
|
|
129
23
|
(?<month_name>#{MONTH_NAMES})
|
|
130
|
-
[ ]
|
|
24
|
+
[ ]
|
|
131
25
|
(?<day>\d{1,2})
|
|
132
26
|
\b
|
|
133
27
|
## optional year
|
|
134
28
|
( ,? [ ] ## note - comma optinal with single space required for now
|
|
135
|
-
(?<year>\d{4}) ## optional year 2025 (yyyy)
|
|
29
|
+
(?<year>\d{4}) ## optional year 2025 (yyyy)
|
|
136
30
|
\b
|
|
137
31
|
)?
|
|
138
32
|
)}ix
|
|
@@ -146,13 +40,13 @@ DATE_LEGS_I_RE = %r{
|
|
|
146
40
|
(?<date_legs>
|
|
147
41
|
\b
|
|
148
42
|
(?<month_name1>#{MONTH_NAMES})
|
|
149
|
-
[ ]
|
|
43
|
+
[ ]
|
|
150
44
|
(?<day1>\d{1,2})
|
|
151
45
|
[ ] & [ ]
|
|
152
46
|
(?:
|
|
153
47
|
(?<month_name2>#{MONTH_NAMES})
|
|
154
|
-
[ ]
|
|
155
|
-
)? ## note - make 2nd month_name optional
|
|
48
|
+
[ ]
|
|
49
|
+
)? ## note - make 2nd month_name optional
|
|
156
50
|
(?<day2>\d{1,2})
|
|
157
51
|
\b
|
|
158
52
|
)}ix
|
|
@@ -161,10 +55,10 @@ DATE_LEGS_I_RE = %r{
|
|
|
161
55
|
###
|
|
162
56
|
# e.g. 3 June or 10 June
|
|
163
57
|
## note - allow more spaces between DAY_NAMES and DAY e.g.
|
|
164
|
-
## Sun 1 Mar
|
|
165
|
-
## Wed 4 Mar
|
|
166
|
-
## Sat 14 Mar
|
|
167
|
-
## Sat 11 Apr
|
|
58
|
+
## Sun 1 Mar
|
|
59
|
+
## Wed 4 Mar
|
|
60
|
+
## Sat 14 Mar
|
|
61
|
+
## Sat 11 Apr
|
|
168
62
|
## Sat 11 Apr 2021
|
|
169
63
|
## Sat 11 Apr 21
|
|
170
64
|
##
|
|
@@ -187,7 +81,7 @@ DATE_II_RE = %r{
|
|
|
187
81
|
\b
|
|
188
82
|
## optional year
|
|
189
83
|
( [ ]
|
|
190
|
-
(?:
|
|
84
|
+
(?:
|
|
191
85
|
(?<year>\d{4}) ## optional year 2025 (yyyy)
|
|
192
86
|
|
|
|
193
87
|
(?:
|
|
@@ -196,13 +90,13 @@ DATE_II_RE = %r{
|
|
|
196
90
|
(?! :|[:h]\d{2})
|
|
197
91
|
)
|
|
198
92
|
)
|
|
199
|
-
\b
|
|
93
|
+
\b
|
|
200
94
|
)?
|
|
201
95
|
)}ix
|
|
202
96
|
|
|
203
97
|
|
|
204
|
-
# e.g. iso-date - 2011-08-25
|
|
205
|
-
## note - allow/support ("shortcuts") e.g 2011-8-25 or 2011-8-3 / 2011-08-03 etc.
|
|
98
|
+
# e.g. iso-date - 2011-08-25
|
|
99
|
+
## note - allow/support ("shortcuts") e.g 2011-8-25 or 2011-8-3 / 2011-08-03 etc.
|
|
206
100
|
DATE_III_A_RE = %r{
|
|
207
101
|
(?<date>
|
|
208
102
|
\b
|
|
@@ -232,20 +126,20 @@ DATE_III_B_RE = %r{
|
|
|
232
126
|
|
|
233
127
|
|
|
234
128
|
|
|
235
|
-
## allow (short)"european" style 8.8.
|
|
129
|
+
## allow (short)"european" style 8.8.
|
|
236
130
|
## note - assume day/month!!!
|
|
237
131
|
DATE_IIII_RE = %r{
|
|
238
132
|
(?<date>
|
|
239
133
|
\b
|
|
240
134
|
## optional day name
|
|
241
135
|
((?<day_name>#{DAY_NAMES})
|
|
242
|
-
(?: ,?[ ]+)
|
|
136
|
+
(?: ,?[ ]+)
|
|
243
137
|
)?
|
|
244
138
|
(?<day>\d{1,2})
|
|
245
139
|
\.
|
|
246
140
|
(?<month>\d{1,2})
|
|
247
141
|
\.
|
|
248
|
-
(?: (?:
|
|
142
|
+
(?: (?:
|
|
249
143
|
(?<year>\d{4}) ## optional year 2025 (yyyy)
|
|
250
144
|
|
|
|
251
145
|
(?<yy>\d{2}) ## optional year 25 (yy)
|
|
@@ -271,9 +165,9 @@ DATE_IIIII_RE = %r{
|
|
|
271
165
|
/
|
|
272
166
|
(?<month>\d{1,2})
|
|
273
167
|
\b
|
|
274
|
-
(?:
|
|
168
|
+
(?:
|
|
275
169
|
/
|
|
276
|
-
(?:
|
|
170
|
+
(?:
|
|
277
171
|
(?<year>\d{4}) ## optional year 2025 (yyyy)
|
|
278
172
|
|
|
|
279
173
|
(?<yy>\d{2}) ## optional year 25 (yy)
|
|
@@ -293,7 +187,7 @@ DATE_RE = Regexp.union(
|
|
|
293
187
|
DATE_II_RE,
|
|
294
188
|
DATE_III_A_RE, ## e.g. 1973-08-14
|
|
295
189
|
DATE_III_B_RE,
|
|
296
|
-
DATE_IIII_RE, ## e.g. 8.8. or 8.13.79 or 08.14.1973
|
|
190
|
+
DATE_IIII_RE, ## e.g. 8.8. or 8.13.79 or 08.14.1973
|
|
297
191
|
DATE_IIIII_RE, ## e.g. 08/14/1973
|
|
298
192
|
)
|
|
299
193
|
|
|
@@ -301,72 +195,6 @@ DATE_RE = Regexp.union(
|
|
|
301
195
|
DATE_LEGS_RE = DATE_LEGS_I_RE
|
|
302
196
|
|
|
303
197
|
|
|
304
|
-
## "internal" date helpers
|
|
305
|
-
def self._build_date( m )
|
|
306
|
-
date = {}
|
|
307
|
-
## map month names
|
|
308
|
-
## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
|
|
309
|
-
date[:y] = m[:year].to_i(10) if m[:year]
|
|
310
|
-
## check - use y too for two-digit year or keep separate - why? why not?
|
|
311
|
-
date[:yy] = m[:yy].to_i(10) if m[:yy] ## two digit year (e.g. 25 or 78 etc.)
|
|
312
|
-
date[:m] = m[:month].to_i(10) if m[:month]
|
|
313
|
-
date[:m] = MONTH_MAP[ m[:month_name].downcase ] if m[:month_name]
|
|
314
|
-
date[:d] = m[:day].to_i(10) if m[:day]
|
|
315
|
-
date[:wday] = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
|
|
316
|
-
|
|
317
|
-
date
|
|
318
|
-
end
|
|
319
|
-
def _build_date( m ) self.class._build_date( m ); end
|
|
320
|
-
|
|
321
|
-
def self._build_date_legs( m )
|
|
322
|
-
legs = {}
|
|
323
|
-
## map month names
|
|
324
|
-
## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
|
|
325
|
-
date = {}
|
|
326
|
-
date[:m] = MONTH_MAP[ m[:month_name1].downcase ]
|
|
327
|
-
date[:d] = m[:day1].to_i(10)
|
|
328
|
-
legs[:date1] = date
|
|
329
|
-
|
|
330
|
-
date = {}
|
|
331
|
-
date[:m] = MONTH_MAP[ m[:month_name2].downcase ] if m[:month_name2]
|
|
332
|
-
date[:d] = m[:day2].to_i(10)
|
|
333
|
-
legs[:date2] = date
|
|
334
|
-
|
|
335
|
-
legs
|
|
336
|
-
end
|
|
337
|
-
def _build_date_legs( m ) self.class._build_date_legs( m ); end
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
#############
|
|
343
|
-
## "top-level" add a date parser helper
|
|
344
|
-
def self.parse_date( str, start: )
|
|
345
|
-
if m=DATE_RE.match( str )
|
|
346
|
-
|
|
347
|
-
year = m[:year].to_i(10) if m[:year]
|
|
348
|
-
month = MONTH_MAP[ m[:month_name].downcase ] if m[:month_name]
|
|
349
|
-
day = m[:day].to_i(10) if m[:day]
|
|
350
|
-
wday = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
|
|
351
|
-
|
|
352
|
-
if year.nil? ## try to calculate year
|
|
353
|
-
year = if month > start.month ||
|
|
354
|
-
(month == start.month && day >= start.day)
|
|
355
|
-
# assume same year as start_at event (e.g. 2013 for 2013/14 season)
|
|
356
|
-
start.year
|
|
357
|
-
else
|
|
358
|
-
# assume year+1 as start_at event (e.g. 2014 for 2013/14 season)
|
|
359
|
-
start.year+1
|
|
360
|
-
end
|
|
361
|
-
end
|
|
362
|
-
Date.new( year,month,day )
|
|
363
|
-
else
|
|
364
|
-
puts "!! ERROR - unexpected date format; cannot parse >#{str}<"
|
|
365
|
-
exit 1
|
|
366
|
-
end
|
|
367
|
-
end
|
|
368
|
-
|
|
369
198
|
|
|
370
199
|
end # class Lexer
|
|
371
200
|
end # module SportDb
|
|
372
|
-
|
|
@@ -52,7 +52,7 @@ DURATION_I_RE = %r{
|
|
|
52
52
|
[ ]
|
|
53
53
|
)?
|
|
54
54
|
(?<month_name1>#{MONTH_NAMES})
|
|
55
|
-
[ ]
|
|
55
|
+
[ ]
|
|
56
56
|
(?<day1>\d{1,2})
|
|
57
57
|
## optional year
|
|
58
58
|
( ,? # optional comma
|
|
@@ -68,7 +68,7 @@ DURATION_I_RE = %r{
|
|
|
68
68
|
[ ]
|
|
69
69
|
)?
|
|
70
70
|
(?<month_name2>#{MONTH_NAMES})
|
|
71
|
-
[ ]
|
|
71
|
+
[ ]
|
|
72
72
|
(?<day2>\d{1,2})
|
|
73
73
|
## optional year
|
|
74
74
|
( ,? # optional comma
|
|
@@ -81,14 +81,14 @@ DURATION_I_RE = %r{
|
|
|
81
81
|
|
|
82
82
|
|
|
83
83
|
|
|
84
|
-
# FIX - remove this variant
|
|
84
|
+
# FIX - remove this variant
|
|
85
85
|
# "standardize on month day [year]" !!!!
|
|
86
86
|
|
|
87
87
|
=begin
|
|
88
88
|
###
|
|
89
89
|
# variant ii
|
|
90
90
|
# e.g. 26 July - 27 July
|
|
91
|
-
# 26 July,
|
|
91
|
+
# 26 July,
|
|
92
92
|
XXX_DURATION_II_RE = %r{
|
|
93
93
|
(?<duration>
|
|
94
94
|
\b
|
|
@@ -101,7 +101,7 @@ XXX_DURATION_II_RE = %r{
|
|
|
101
101
|
[ ]
|
|
102
102
|
(?<month_name1>#{MONTH_NAMES})
|
|
103
103
|
## optional year
|
|
104
|
-
(
|
|
104
|
+
(
|
|
105
105
|
[ ]
|
|
106
106
|
(?<year1>\d{4})
|
|
107
107
|
)?
|
|
@@ -128,12 +128,12 @@ XXX_DURATION_II_RE = %r{
|
|
|
128
128
|
|
|
129
129
|
# variant ii
|
|
130
130
|
# add support for shorthand
|
|
131
|
-
# August 16-18, 2011
|
|
131
|
+
# August 16-18, 2011
|
|
132
132
|
# September 13-15, 2011
|
|
133
133
|
# October 18-20, 2011
|
|
134
134
|
# March 6-8 2012
|
|
135
135
|
# March 6-8
|
|
136
|
-
#
|
|
136
|
+
#
|
|
137
137
|
# - add support for August 16+17 or such (and check 16+18)
|
|
138
138
|
# use <op> to check if day2 is a plus or range or such - why? why not?
|
|
139
139
|
|
|
@@ -150,7 +150,7 @@ DURATION_II_RE = %r{
|
|
|
150
150
|
,? ## optional comma
|
|
151
151
|
[ ]
|
|
152
152
|
(?<year1>\d{4})
|
|
153
|
-
)? ## optional year
|
|
153
|
+
)? ## optional year
|
|
154
154
|
)
|
|
155
155
|
\b
|
|
156
156
|
)}ix
|
|
@@ -166,25 +166,6 @@ DURATION_RE = Regexp.union(
|
|
|
166
166
|
)
|
|
167
167
|
|
|
168
168
|
|
|
169
|
-
def self._build_duration( m )
|
|
170
|
-
## todo/check/fix - if end: works for kwargs!!!!!
|
|
171
|
-
duration = { start: {}, end: {}}
|
|
172
|
-
|
|
173
|
-
duration[:start][:y] = m[:year1].to_i(10) if m[:year1]
|
|
174
|
-
duration[:start][:m] = MONTH_MAP[ m[:month_name1].downcase ] if m[:month_name1]
|
|
175
|
-
duration[:start][:d] = m[:day1].to_i(10) if m[:day1]
|
|
176
|
-
duration[:start][:wday] = DAY_MAP[ m[:day_name1].downcase ] if m[:day_name1]
|
|
177
|
-
|
|
178
|
-
duration[:end][:y] = m[:year2].to_i(10) if m[:year2]
|
|
179
|
-
duration[:end][:m] = MONTH_MAP[ m[:month_name2].downcase ] if m[:month_name2]
|
|
180
|
-
duration[:end][:d] = m[:day2].to_i(10) if m[:day2]
|
|
181
|
-
duration[:end][:wday] = DAY_MAP[ m[:day_name2].downcase ] if m[:day_name2]
|
|
182
|
-
|
|
183
|
-
duration
|
|
184
|
-
end
|
|
185
|
-
def _build_duration(m) self.class._build_duration( m ); end
|
|
186
|
-
|
|
187
169
|
|
|
188
170
|
end # class Lexer
|
|
189
171
|
end # module SportDb
|
|
190
|
-
|
|
@@ -21,7 +21,7 @@ GEO_TEXT_RE = %r{
|
|
|
21
21
|
# opt 1 - start with alpha
|
|
22
22
|
\p{L}+ ## all unicode letters (e.g. [a-z])
|
|
23
23
|
|
|
|
24
|
-
# opt 2 - start with num!! -
|
|
24
|
+
# opt 2 - start with num!! -
|
|
25
25
|
\d+ # check for num lookahead (MUST be space or dot)
|
|
26
26
|
## MAY be followed by (optional space) !
|
|
27
27
|
## MUST be follow by a to z!!!!
|
|
@@ -37,11 +37,11 @@ GEO_TEXT_RE = %r{
|
|
|
37
37
|
|
|
38
38
|
##
|
|
39
39
|
## todo/check - find a different "more intuitive" regex/rule if possible?
|
|
40
|
-
## for single spaces only (and _/ MUST not be surround by spaces)
|
|
40
|
+
## for single spaces only (and _/ MUST not be surround by spaces)
|
|
41
41
|
|
|
42
|
-
(?:
|
|
42
|
+
(?:
|
|
43
43
|
(?:
|
|
44
|
-
[ ]? # only single (inline) space allowed - double spaces are breaks!!!
|
|
44
|
+
[ ]? # only single (inline) space allowed - double spaces are breaks!!!
|
|
45
45
|
(?:
|
|
46
46
|
\p{L} | \d | [.&'°]
|
|
47
47
|
|
|
|
@@ -64,7 +64,7 @@ GEO_TEXT_RE = %r{
|
|
|
64
64
|
## Ost-Berlin (Walter-Ulbricht)
|
|
65
65
|
## Athinai (OAKA - Maroussi)
|
|
66
66
|
##
|
|
67
|
-
## or Valencia (Spain) or Solna
|
|
67
|
+
## or Valencia (Spain) or Solna
|
|
68
68
|
(?:
|
|
69
69
|
[ ]
|
|
70
70
|
\(
|
|
@@ -93,20 +93,14 @@ GEO_TEXT_RE = %r{
|
|
|
93
93
|
|
|
94
94
|
|
|
95
95
|
|
|
96
|
-
GEO_BASICS_RE = %r{
|
|
97
|
-
(?<spaces> [ ]{2,}) |
|
|
98
|
-
(?<space> [ ])
|
|
99
|
-
|
|
|
100
|
-
(?<sym> [,›>\[] )
|
|
101
|
-
}ix
|
|
102
96
|
|
|
103
97
|
|
|
104
98
|
## note - add "hacky" check for comma that is followed by a prop(erty)
|
|
105
99
|
##
|
|
106
100
|
## make sure to NOT match
|
|
107
101
|
## props e.g. att: 18000
|
|
108
|
-
## July 10 @ Paris, Parc des Princes, att: 18000
|
|
109
|
-
## July 10 @ Paris, Parc des Princes, att: 18000
|
|
102
|
+
## July 10 @ Paris, Parc des Princes, att: 18000
|
|
103
|
+
## July 10 @ Paris, Parc des Princes, att: 18000
|
|
110
104
|
##
|
|
111
105
|
|
|
112
106
|
|
|
@@ -115,18 +109,24 @@ GEO_END_RE = %r{
|
|
|
115
109
|
,
|
|
116
110
|
)
|
|
117
111
|
## POSITIVE lookahead for props
|
|
118
|
-
|
|
112
|
+
## todo/fix - use generic [a-z]+ - why? why not?
|
|
113
|
+
(?=
|
|
119
114
|
[ ]* ## optional spaces
|
|
120
|
-
(?:
|
|
115
|
+
(?: attendance|att
|
|
116
|
+
| referee?s|refs?
|
|
117
|
+
)
|
|
121
118
|
:
|
|
122
119
|
)
|
|
123
120
|
}ix
|
|
124
121
|
|
|
125
122
|
|
|
123
|
+
|
|
124
|
+
|
|
126
125
|
GEO_RE = Regexp.union(
|
|
126
|
+
SPACES_RE,
|
|
127
127
|
GEO_END_RE,
|
|
128
|
-
GEO_BASICS_RE,
|
|
129
128
|
GEO_TEXT_RE,
|
|
129
|
+
/ (?<sym> [,›>\[] ) /x,
|
|
130
130
|
ANY_RE,
|
|
131
131
|
)
|
|
132
132
|
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
module SportDb
|
|
2
|
+
class Lexer
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def self._build_goal_minute( m )
|
|
7
|
+
minute = {}
|
|
8
|
+
|
|
9
|
+
minute[:m] = m[:value].to_i(10) ## always required
|
|
10
|
+
|
|
11
|
+
## stoppage/injury time (offset)
|
|
12
|
+
minute[:offset] = m[:value2].to_i(10) if m[:value2]
|
|
13
|
+
|
|
14
|
+
minute[:og] = true if m[:og]
|
|
15
|
+
minute[:pen] = true if m[:pen]
|
|
16
|
+
minute[:freekick] = true if m[:fk]
|
|
17
|
+
minute[:header] = true if m[:hdr]
|
|
18
|
+
|
|
19
|
+
minute[:secs] = m[:secs].to_i(10) if m[:secs]
|
|
20
|
+
|
|
21
|
+
minute
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def self._build_goal_minute_na( m )
|
|
25
|
+
minute = {}
|
|
26
|
+
|
|
27
|
+
minute[:m] = '?' ## or use nil or 999 or -1 or ???
|
|
28
|
+
|
|
29
|
+
minute[:og] = true if m[:og]
|
|
30
|
+
minute[:pen] = true if m[:pen]
|
|
31
|
+
minute[:freekick] = true if m[:fk]
|
|
32
|
+
minute[:header] = true if m[:hdr]
|
|
33
|
+
|
|
34
|
+
minute
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def self._build_minute( m )
|
|
40
|
+
minute = {}
|
|
41
|
+
minute[:m] = m[:value].to_i(10) ## always required
|
|
42
|
+
|
|
43
|
+
## stoppage/injury time (offset)
|
|
44
|
+
minute[:offset] = m[:value2].to_i(10) if m[:value2]
|
|
45
|
+
|
|
46
|
+
minute
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def self._build_goal_count( m )
|
|
51
|
+
count = {}
|
|
52
|
+
count[:count] = m[:value].to_i(10) if m[:value]
|
|
53
|
+
count[:og] = m[:og_value] ? m[:og_value].to_i(10) : 1 if m[:og] ## check flag
|
|
54
|
+
count[:pen] = m[:pen_value] ? m[:pen_value].to_i(10) : 1 if m[:pen] ## check flag
|
|
55
|
+
count
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def self._build_goal_type( m )
|
|
59
|
+
goal = {}
|
|
60
|
+
goal[:og] = true if m[:og]
|
|
61
|
+
goal[:pen] = true if m[:pen]
|
|
62
|
+
goal[:freekick] = true if m[:fk]
|
|
63
|
+
goal[:header] = true if m[:hdr]
|
|
64
|
+
goal
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _build_goal_minute( m ) self.class._build_goal_minute( m ); end
|
|
69
|
+
def _build_goal_minute_na( m ) self.class._build_goal_minute_na( m ); end
|
|
70
|
+
def _build_minute( m ) self.class._build_minute( m ); end
|
|
71
|
+
def _build_goal_count( m ) self.class._build_goal_count( m ); end
|
|
72
|
+
def _build_goal_type( m ) self.class._build_goal_type( m ); end
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
###
|
|
79
|
+
# parse helpers
|
|
80
|
+
|
|
81
|
+
def self._parse_goal_minute( str )
|
|
82
|
+
## note - strip - leading/trailing spaces
|
|
83
|
+
m = GOAL_MINUTE_RE.match( str.strip )
|
|
84
|
+
if m && m.pre_match == '' && m.post_match == ''
|
|
85
|
+
_build_goal_minute( m )
|
|
86
|
+
elsif m
|
|
87
|
+
## note - match BUT not anchored to start and end-of-string!!!
|
|
88
|
+
## report, error somehow??
|
|
89
|
+
nil
|
|
90
|
+
else
|
|
91
|
+
nil ## no match - return nil
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def self._parse_goal_count( str )
|
|
96
|
+
## note - strip - leading/trailing spaces
|
|
97
|
+
m = GOAL_COUNT_RE.match( str.strip )
|
|
98
|
+
if m && m.pre_match == '' && m.post_match == ''
|
|
99
|
+
_build_goal_count( m )
|
|
100
|
+
elsif m
|
|
101
|
+
## note - match BUT not anchored to start and end-of-string!!!
|
|
102
|
+
## report, error somehow??
|
|
103
|
+
nil
|
|
104
|
+
else
|
|
105
|
+
nil ## no match - return nil
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
end # class Lexer
|
|
114
|
+
end # module SportDb
|