sportdb-parser 0.6.20 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +14 -8
- data/Rakefile +1 -1
- data/lib/sportdb/parser/blocktxt.rb +99 -0
- data/lib/sportdb/parser/lexer.rb +958 -395
- data/lib/sportdb/parser/lexer_buffer.rb +97 -0
- data/lib/sportdb/parser/lexer_tty.rb +111 -0
- data/lib/sportdb/parser/parser.rb +1768 -855
- data/lib/sportdb/parser/racc_parser.rb +1 -1
- data/lib/sportdb/parser/racc_tree.rb +327 -41
- data/lib/sportdb/parser/token-date.rb +160 -178
- data/lib/sportdb/parser/token-date_duration.rb +190 -0
- data/lib/sportdb/parser/token-geo.rb +59 -59
- data/lib/sportdb/parser/token-goals.rb +460 -0
- data/lib/sportdb/parser/token-group.rb +43 -0
- data/lib/sportdb/parser/token-note.rb +40 -0
- data/lib/sportdb/parser/token-prop.rb +70 -54
- data/lib/sportdb/parser/token-prop_name.rb +74 -0
- data/lib/sportdb/parser/token-round.rb +102 -0
- data/lib/sportdb/parser/token-score.rb +323 -47
- data/lib/sportdb/parser/token-score_fuller.rb +435 -0
- data/lib/sportdb/parser/token-score_legs.rb +59 -0
- data/lib/sportdb/parser/token-status.rb +157 -160
- data/lib/sportdb/parser/token-table.rb +149 -0
- data/lib/sportdb/parser/token-text.rb +72 -23
- data/lib/sportdb/parser/token-time.rb +141 -0
- data/lib/sportdb/parser/token.rb +242 -105
- data/lib/sportdb/parser/token_helpers.rb +92 -0
- data/lib/sportdb/parser/version.rb +2 -2
- data/lib/sportdb/parser.rb +24 -2
- metadata +18 -18
- data/config/rounds_de.txt +0 -125
- data/config/rounds_en.txt +0 -29
- data/config/rounds_es.txt +0 -26
- data/config/rounds_misc.txt +0 -25
- data/config/rounds_pt.txt +0 -4
- data/config/zones_en.txt +0 -20
- data/lib/sportdb/parser/lang.rb +0 -298
- data/lib/sportdb/parser/token-minute.rb +0 -205
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
##
|
|
4
4
|
##
|
|
5
5
|
## Fri Jun 14 21:00 @ München Fußball Arena, München
|
|
6
|
-
##
|
|
7
|
-
## Wirtz 10' Musiala 19' Havertz 45+1' (pen.) Füllkrug 68' Can 90+3'; Rüdiger 87' (o.g.)
|
|
6
|
+
## Germany v Scotland 5-1 (3-0)
|
|
7
|
+
## (Wirtz 10' Musiala 19' Havertz 45+1' (pen.) Füllkrug 68' Can 90+3'; Rüdiger 87' (o.g.))
|
|
8
8
|
##
|
|
9
9
|
## Germany: Neuer - Kimmich, Rüdiger, Tah [Y], Mittelstädt - Andrich [Y] (Groß 46'),
|
|
10
10
|
## Kroos (Can 80') - Musiala (Müller 74'), Gündogan, Wirtz (Sane 63') -
|
|
@@ -18,56 +18,6 @@ module SportDb
|
|
|
18
18
|
class Lexer
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
## name different from text (does NOT allow number in name/text)
|
|
22
|
-
PROP_NAME_RE = %r{
|
|
23
|
-
(?<prop_name>
|
|
24
|
-
\b
|
|
25
|
-
(?<name>
|
|
26
|
-
\p{L}+
|
|
27
|
-
\.? ## optional dot
|
|
28
|
-
(?:
|
|
29
|
-
## rule for space; only one single space allowed inline!!!
|
|
30
|
-
(?:
|
|
31
|
-
(?<![ ]) ## use negative lookbehind
|
|
32
|
-
[ ]
|
|
33
|
-
(?=\p{L}|['"]) ## use lookahead
|
|
34
|
-
)
|
|
35
|
-
## support (inline) quoted name e.g. "Rodri" or such
|
|
36
|
-
|
|
|
37
|
-
(?:
|
|
38
|
-
(?<=[ ]) ## use positive lookbehind
|
|
39
|
-
" \p{L}+ "
|
|
40
|
-
## require space here too - why? why not?
|
|
41
|
-
)
|
|
42
|
-
|
|
|
43
|
-
(?:
|
|
44
|
-
(?<=\p{L}) ## use lookbehind
|
|
45
|
-
[-] ## must be surrounded by letters
|
|
46
|
-
## e.g. One/Two NOT
|
|
47
|
-
## One/ Two or One / Two or One /Two etc.
|
|
48
|
-
(?=\p{L}) ## use lookahead
|
|
49
|
-
)
|
|
50
|
-
|
|
|
51
|
-
(?: ## flex rule for quote - allow any
|
|
52
|
-
## only check for double quotes e.g. cannot follow other ' for now - why? why not?
|
|
53
|
-
## allows rodrigez 'rodri' for example
|
|
54
|
-
(?<!') ## use negative lookbehind
|
|
55
|
-
'
|
|
56
|
-
)
|
|
57
|
-
| ## standard case with letter(s) and optinal dot
|
|
58
|
-
(?: \p{L}+
|
|
59
|
-
\.? ## optional dot
|
|
60
|
-
)
|
|
61
|
-
)*
|
|
62
|
-
)
|
|
63
|
-
## add lookahead - must be non-alphanum
|
|
64
|
-
(?=[ ,;\]\)]|$)
|
|
65
|
-
)
|
|
66
|
-
}ix
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
21
|
##############
|
|
72
22
|
# add support for props/ attributes e.g.
|
|
73
23
|
#
|
|
@@ -90,6 +40,10 @@ PROP_NAME_RE = %r{
|
|
|
90
40
|
##
|
|
91
41
|
## note - use special \G - Matches first matching position !!!!
|
|
92
42
|
|
|
43
|
+
###
|
|
44
|
+
## todo/fix/fix
|
|
45
|
+
## change ^ to \A
|
|
46
|
+
## change name to START_WITH_PROP_KEY_RE !!!
|
|
93
47
|
|
|
94
48
|
PROP_KEY_RE = %r{
|
|
95
49
|
^ # note - MUST start line; leading spaces optional (eat-up)
|
|
@@ -132,6 +86,60 @@ PROP_NAME_RE = %r{
|
|
|
132
86
|
## add [c] for captain too
|
|
133
87
|
|
|
134
88
|
|
|
89
|
+
## [c] or [C] for marking player as captain
|
|
90
|
+
## support [y ] too - or require Y - why? why not?
|
|
91
|
+
INLINE_CAPTAIN = %r{ (?<inline_captain>
|
|
92
|
+
\[ [cC] \]
|
|
93
|
+
)}x
|
|
94
|
+
|
|
95
|
+
INLINE_YELLOW = %r{ (?<inline_yellow>
|
|
96
|
+
\[ [yY]
|
|
97
|
+
## optional minute
|
|
98
|
+
(?: [ ]+
|
|
99
|
+
(?<minute> \d{1,3})
|
|
100
|
+
'?
|
|
101
|
+
(?:
|
|
102
|
+
\+
|
|
103
|
+
(?<offset>\d{1,2})
|
|
104
|
+
'?
|
|
105
|
+
)?
|
|
106
|
+
)?
|
|
107
|
+
\]
|
|
108
|
+
)}x
|
|
109
|
+
|
|
110
|
+
INLINE_RED = %r{ (?<inline_red>
|
|
111
|
+
\[ [rR]
|
|
112
|
+
## optional minute
|
|
113
|
+
(?: [ ]+
|
|
114
|
+
(?<minute> \d{1,3})
|
|
115
|
+
'?
|
|
116
|
+
(?:
|
|
117
|
+
\+
|
|
118
|
+
(?<offset>\d{1,2})
|
|
119
|
+
'?
|
|
120
|
+
)?
|
|
121
|
+
)?
|
|
122
|
+
\]
|
|
123
|
+
)}x
|
|
124
|
+
|
|
125
|
+
INLINE_YELLOW_RED = %r{ (?<inline_yellow_red>
|
|
126
|
+
\[ (?:y/r |
|
|
127
|
+
Y/R )
|
|
128
|
+
## optional minute
|
|
129
|
+
(?: [ ]+
|
|
130
|
+
(?<minute> \d{1,3})
|
|
131
|
+
'?
|
|
132
|
+
(?:
|
|
133
|
+
\+
|
|
134
|
+
(?<offset>\d{1,2})
|
|
135
|
+
'?
|
|
136
|
+
)?
|
|
137
|
+
)?
|
|
138
|
+
\]
|
|
139
|
+
)}x
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
|
|
135
143
|
|
|
136
144
|
### simple prop key for inline use e.g.
|
|
137
145
|
### Coach: or Trainer: or ... add more here later
|
|
@@ -192,9 +200,17 @@ PROP_BASICS_RE = %r{
|
|
|
192
200
|
)
|
|
193
201
|
}ix
|
|
194
202
|
|
|
203
|
+
|
|
204
|
+
|
|
195
205
|
PROP_RE = Regexp.union(
|
|
196
|
-
MINUTE_RE,
|
|
197
|
-
|
|
206
|
+
MINUTE_RE, ## e.g. 44 or 44' or 45+1 or 45+1' etc.
|
|
207
|
+
|
|
208
|
+
INLINE_CAPTAIN, ## e.g. [c]
|
|
209
|
+
INLINE_YELLOW, ## e.g. [Y] or [Y 44] or [Y 44'] or [Y 45+1']
|
|
210
|
+
INLINE_YELLOW_RED, ## e.g. [Y/R] or [Y/R 78]
|
|
211
|
+
INLINE_RED, ## e.g. [R] or [R 42] or [R 42']
|
|
212
|
+
|
|
213
|
+
PROP_KEY_INLINE_RE,
|
|
198
214
|
PROP_NAME_RE,
|
|
199
215
|
PROP_BASICS_RE,
|
|
200
216
|
## todo/fix - add ANY_RE here too!!!
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
module SportDb
|
|
2
|
+
class Lexer
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
##
|
|
6
|
+
## see token-text for TEXT_RE
|
|
7
|
+
## change PROP_NAME_RE to TEXT_II or TEXT_??? - why? why not?
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
##
|
|
12
|
+
##
|
|
13
|
+
## FIX / FIX / FIX
|
|
14
|
+
## support match for
|
|
15
|
+
## K.-H.Förster
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
## name different from text (does NOT allow number in name/text)
|
|
21
|
+
PROP_NAME_RE = %r{
|
|
22
|
+
(?<prop_name>
|
|
23
|
+
\b
|
|
24
|
+
(?<name>
|
|
25
|
+
\p{L}+
|
|
26
|
+
\.? ## optional dot
|
|
27
|
+
(?:
|
|
28
|
+
## rule for space; only one single space allowed inline!!!
|
|
29
|
+
(?:
|
|
30
|
+
(?<![ ]) ## use negative lookbehind
|
|
31
|
+
[ ]
|
|
32
|
+
(?=\p{L}|['"]) ## use lookahead
|
|
33
|
+
)
|
|
34
|
+
## support (inline) quoted name e.g. "Rodri" or such
|
|
35
|
+
|
|
|
36
|
+
(?:
|
|
37
|
+
(?<=[ ]) ## use positive lookbehind
|
|
38
|
+
" \p{L}+ "
|
|
39
|
+
## require space here too - why? why not?
|
|
40
|
+
)
|
|
41
|
+
|
|
|
42
|
+
(?:
|
|
43
|
+
(?<= ## \p{L}\. | \p{L}
|
|
44
|
+
[\p{L}.]
|
|
45
|
+
) ## use POSITIVE lookbehind
|
|
46
|
+
[-] ## must be surrounded by letters
|
|
47
|
+
## note - allow leading dot (.) e.g. K.-H.Förster
|
|
48
|
+
## short for Karl-Heinz Förster
|
|
49
|
+
##
|
|
50
|
+
## e.g. One-Two NOT
|
|
51
|
+
## One- Two or One - Two or One -Two etc.
|
|
52
|
+
(?=\p{L}) ## use lookahead
|
|
53
|
+
)
|
|
54
|
+
|
|
|
55
|
+
(?: ## flex rule for quote - allow any
|
|
56
|
+
## only check for double quotes e.g. cannot follow other ' for now - why? why not?
|
|
57
|
+
## allows rodrigez 'rodri' for example
|
|
58
|
+
(?<!') ## use negative lookbehind
|
|
59
|
+
'
|
|
60
|
+
)
|
|
61
|
+
| ## standard case with letter(s) and optinal dot
|
|
62
|
+
(?: \p{L}+
|
|
63
|
+
\.? ## optional dot
|
|
64
|
+
)
|
|
65
|
+
)*
|
|
66
|
+
)
|
|
67
|
+
## add lookahead - must be non-alphanum
|
|
68
|
+
(?=[ ,;\]\)]|$)
|
|
69
|
+
)
|
|
70
|
+
}ix
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
end # class Lexer
|
|
74
|
+
end # module SportDb
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
module SportDb
|
|
2
|
+
class Lexer
|
|
3
|
+
|
|
4
|
+
####
|
|
5
|
+
#
|
|
6
|
+
## note - use \A (instead of ^) - \A strictly matches the start of the string.
|
|
7
|
+
##
|
|
8
|
+
## todo - add support for trailing markers e.g.
|
|
9
|
+
## ▪ Round 1 ▪▪▪▪▪▪▪▪
|
|
10
|
+
## :: Round 1 ::::::::::::
|
|
11
|
+
##
|
|
12
|
+
## check - allow without space (like in heading =Heading 1=) - why? why not?
|
|
13
|
+
## ▪Round 1▪▪▪▪▪▪▪▪
|
|
14
|
+
## ::Round 1::::::::::::
|
|
15
|
+
|
|
16
|
+
ROUND_OUTLINE_I_RE = %r{ \A
|
|
17
|
+
[ ]* ## ignore leading spaces (if any)
|
|
18
|
+
(?<round_marker>
|
|
19
|
+
[▪]{1,3} ## BLACK SMALL SQUARE e.g. ▪,▪▪,▪▪▪
|
|
20
|
+
)
|
|
21
|
+
[ ]+
|
|
22
|
+
(?<round_outline>
|
|
23
|
+
## must start with letter - why? why not?
|
|
24
|
+
### 1st round
|
|
25
|
+
## allow numbers e.g. Group A - 1
|
|
26
|
+
##
|
|
27
|
+
## note - CANNOT incl. :| !!!
|
|
28
|
+
## used for markers for defs/definitions
|
|
29
|
+
[^:|]+? ## use non-greedy
|
|
30
|
+
)
|
|
31
|
+
(?:
|
|
32
|
+
[ ]+
|
|
33
|
+
[▪]+
|
|
34
|
+
)?
|
|
35
|
+
[ ]* ## ignore trailing spaces (if any)
|
|
36
|
+
\z
|
|
37
|
+
}xi
|
|
38
|
+
|
|
39
|
+
ROUND_OUTLINE_II_RE = %r{ \A
|
|
40
|
+
[ ]* ## ignore leading spaces (if any)
|
|
41
|
+
(?<round_marker>
|
|
42
|
+
::{1,3} ## e.g. ::,:::,::::
|
|
43
|
+
)
|
|
44
|
+
[ ]+
|
|
45
|
+
(?<round_outline>
|
|
46
|
+
## must start with letter - why? why not?
|
|
47
|
+
### 1st round
|
|
48
|
+
## allow numbers e.g. Group A - 1
|
|
49
|
+
##
|
|
50
|
+
## note - CANNOT incl. :| !!!
|
|
51
|
+
## used for markers for defs/definitions
|
|
52
|
+
[^:|]+? ## use non-greedy
|
|
53
|
+
)
|
|
54
|
+
(?:
|
|
55
|
+
[ ]+
|
|
56
|
+
::+
|
|
57
|
+
)?
|
|
58
|
+
[ ]* ## ignore trailing spaces (if any)
|
|
59
|
+
\z
|
|
60
|
+
}xi
|
|
61
|
+
|
|
62
|
+
ROUND_OUTLINE_RE = Regexp.union( ROUND_OUTLINE_I_RE,
|
|
63
|
+
ROUND_OUTLINE_II_RE,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
###
|
|
68
|
+
# note - for def(initions) only one level support
|
|
69
|
+
# that is, no round outline additions possible (e.g ▪▪ 1st leg etc.)
|
|
70
|
+
ROUND_DEF_OUTLINE_RE = %r{ \A
|
|
71
|
+
[ ]* ## ignore leading spaces (if any)
|
|
72
|
+
(?: [▪] ## BLACK SMALL SQUARE
|
|
73
|
+
|
|
|
74
|
+
:: )
|
|
75
|
+
[ ]+
|
|
76
|
+
(?<round_outline>
|
|
77
|
+
[^:|]+? ## use non-greedy
|
|
78
|
+
)
|
|
79
|
+
[ ]* ## ignore trailing spaces (if any)
|
|
80
|
+
### possitive lookahead MUST be : OR |
|
|
81
|
+
(?= [:|]
|
|
82
|
+
[ ]) ## note: requires space for now after [:|] - keep - why? why not?
|
|
83
|
+
}ix
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
ROUND_DEF_BASICS_RE = %r{
|
|
87
|
+
(?<spaces> [ ]{2,}) |
|
|
88
|
+
(?<space> [ ])
|
|
89
|
+
|
|
|
90
|
+
(?<sym> [:|,] ) ### note - add comma (,) as optional separator
|
|
91
|
+
}ix
|
|
92
|
+
|
|
93
|
+
ROUND_DEF_RE = Regexp.union( ROUND_DEF_BASICS_RE,
|
|
94
|
+
DURATION_RE, # note - duration MUST match before date
|
|
95
|
+
DATE_RE, ## note - date must go before time (e.g. 12.12. vs 12.12)
|
|
96
|
+
ANY_RE,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
end # class Lexer
|
|
102
|
+
end # module SportDb
|