sportdb-parser 0.6.20 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -1
  3. data/Manifest.txt +14 -8
  4. data/Rakefile +1 -1
  5. data/lib/sportdb/parser/blocktxt.rb +99 -0
  6. data/lib/sportdb/parser/lexer.rb +958 -395
  7. data/lib/sportdb/parser/lexer_buffer.rb +97 -0
  8. data/lib/sportdb/parser/lexer_tty.rb +111 -0
  9. data/lib/sportdb/parser/parser.rb +1768 -855
  10. data/lib/sportdb/parser/racc_parser.rb +1 -1
  11. data/lib/sportdb/parser/racc_tree.rb +327 -41
  12. data/lib/sportdb/parser/token-date.rb +160 -178
  13. data/lib/sportdb/parser/token-date_duration.rb +190 -0
  14. data/lib/sportdb/parser/token-geo.rb +59 -59
  15. data/lib/sportdb/parser/token-goals.rb +460 -0
  16. data/lib/sportdb/parser/token-group.rb +43 -0
  17. data/lib/sportdb/parser/token-note.rb +40 -0
  18. data/lib/sportdb/parser/token-prop.rb +70 -54
  19. data/lib/sportdb/parser/token-prop_name.rb +74 -0
  20. data/lib/sportdb/parser/token-round.rb +102 -0
  21. data/lib/sportdb/parser/token-score.rb +323 -47
  22. data/lib/sportdb/parser/token-score_fuller.rb +435 -0
  23. data/lib/sportdb/parser/token-score_legs.rb +59 -0
  24. data/lib/sportdb/parser/token-status.rb +157 -160
  25. data/lib/sportdb/parser/token-table.rb +149 -0
  26. data/lib/sportdb/parser/token-text.rb +72 -23
  27. data/lib/sportdb/parser/token-time.rb +141 -0
  28. data/lib/sportdb/parser/token.rb +242 -105
  29. data/lib/sportdb/parser/token_helpers.rb +92 -0
  30. data/lib/sportdb/parser/version.rb +2 -2
  31. data/lib/sportdb/parser.rb +24 -2
  32. metadata +18 -18
  33. data/config/rounds_de.txt +0 -125
  34. data/config/rounds_en.txt +0 -29
  35. data/config/rounds_es.txt +0 -26
  36. data/config/rounds_misc.txt +0 -25
  37. data/config/rounds_pt.txt +0 -4
  38. data/config/zones_en.txt +0 -20
  39. data/lib/sportdb/parser/lang.rb +0 -298
  40. data/lib/sportdb/parser/token-minute.rb +0 -205
@@ -3,8 +3,8 @@
3
3
  ##
4
4
  ##
5
5
  ## Fri Jun 14 21:00 @ München Fußball Arena, München
6
- ## (1) Germany v Scotland 5-1 (3-0)
7
- ## Wirtz 10' Musiala 19' Havertz 45+1' (pen.) Füllkrug 68' Can 90+3'; Rüdiger 87' (o.g.)
6
+ ## Germany v Scotland 5-1 (3-0)
7
+ ## (Wirtz 10' Musiala 19' Havertz 45+1' (pen.) Füllkrug 68' Can 90+3'; Rüdiger 87' (o.g.))
8
8
  ##
9
9
  ## Germany: Neuer - Kimmich, Rüdiger, Tah [Y], Mittelstädt - Andrich [Y] (Groß 46'),
10
10
  ## Kroos (Can 80') - Musiala (Müller 74'), Gündogan, Wirtz (Sane 63') -
@@ -18,56 +18,6 @@ module SportDb
18
18
  class Lexer
19
19
 
20
20
 
21
- ## name different from text (does NOT allow number in name/text)
22
- PROP_NAME_RE = %r{
23
- (?<prop_name>
24
- \b
25
- (?<name>
26
- \p{L}+
27
- \.? ## optional dot
28
- (?:
29
- ## rule for space; only one single space allowed inline!!!
30
- (?:
31
- (?<![ ]) ## use negative lookbehind
32
- [ ]
33
- (?=\p{L}|['"]) ## use lookahead
34
- )
35
- ## support (inline) quoted name e.g. "Rodri" or such
36
- |
37
- (?:
38
- (?<=[ ]) ## use positive lookbehind
39
- " \p{L}+ "
40
- ## require space here too - why? why not?
41
- )
42
- |
43
- (?:
44
- (?<=\p{L}) ## use lookbehind
45
- [-] ## must be surrounded by letters
46
- ## e.g. One/Two NOT
47
- ## One/ Two or One / Two or One /Two etc.
48
- (?=\p{L}) ## use lookahead
49
- )
50
- |
51
- (?: ## flex rule for quote - allow any
52
- ## only check for double quotes e.g. cannot follow other ' for now - why? why not?
53
- ## allows rodrigez 'rodri' for example
54
- (?<!') ## use negative lookbehind
55
- '
56
- )
57
- | ## standard case with letter(s) and optinal dot
58
- (?: \p{L}+
59
- \.? ## optional dot
60
- )
61
- )*
62
- )
63
- ## add lookahead - must be non-alphanum
64
- (?=[ ,;\]\)]|$)
65
- )
66
- }ix
67
-
68
-
69
-
70
-
71
21
  ##############
72
22
  # add support for props/ attributes e.g.
73
23
  #
@@ -90,6 +40,10 @@ PROP_NAME_RE = %r{
90
40
  ##
91
41
  ## note - use special \G - Matches first matching position !!!!
92
42
 
43
+ ###
44
+ ## todo/fix/fix
45
+ ## change ^ to \A
46
+ ## change name to START_WITH_PROP_KEY_RE !!!
93
47
 
94
48
  PROP_KEY_RE = %r{
95
49
  ^ # note - MUST start line; leading spaces optional (eat-up)
@@ -132,6 +86,60 @@ PROP_NAME_RE = %r{
132
86
  ## add [c] for captain too
133
87
 
134
88
 
89
+ ## [c] or [C] for marking player as captain
90
+ ## support [y ] too - or require Y - why? why not?
91
+ INLINE_CAPTAIN = %r{ (?<inline_captain>
92
+ \[ [cC] \]
93
+ )}x
94
+
95
+ INLINE_YELLOW = %r{ (?<inline_yellow>
96
+ \[ [yY]
97
+ ## optional minute
98
+ (?: [ ]+
99
+ (?<minute> \d{1,3})
100
+ '?
101
+ (?:
102
+ \+
103
+ (?<offset>\d{1,2})
104
+ '?
105
+ )?
106
+ )?
107
+ \]
108
+ )}x
109
+
110
+ INLINE_RED = %r{ (?<inline_red>
111
+ \[ [rR]
112
+ ## optional minute
113
+ (?: [ ]+
114
+ (?<minute> \d{1,3})
115
+ '?
116
+ (?:
117
+ \+
118
+ (?<offset>\d{1,2})
119
+ '?
120
+ )?
121
+ )?
122
+ \]
123
+ )}x
124
+
125
+ INLINE_YELLOW_RED = %r{ (?<inline_yellow_red>
126
+ \[ (?:y/r |
127
+ Y/R )
128
+ ## optional minute
129
+ (?: [ ]+
130
+ (?<minute> \d{1,3})
131
+ '?
132
+ (?:
133
+ \+
134
+ (?<offset>\d{1,2})
135
+ '?
136
+ )?
137
+ )?
138
+ \]
139
+ )}x
140
+
141
+
142
+
135
143
 
136
144
  ### simple prop key for inline use e.g.
137
145
  ### Coach: or Trainer: or ... add more here later
@@ -192,9 +200,17 @@ PROP_BASICS_RE = %r{
192
200
  )
193
201
  }ix
194
202
 
203
+
204
+
195
205
  PROP_RE = Regexp.union(
196
- MINUTE_RE,
197
- PROP_KEY_INLINE_RE,
206
+ MINUTE_RE, ## e.g. 44 or 44' or 45+1 or 45+1' etc.
207
+
208
+ INLINE_CAPTAIN, ## e.g. [c]
209
+ INLINE_YELLOW, ## e.g. [Y] or [Y 44] or [Y 44'] or [Y 45+1']
210
+ INLINE_YELLOW_RED, ## e.g. [Y/R] or [Y/R 78]
211
+ INLINE_RED, ## e.g. [R] or [R 42] or [R 42']
212
+
213
+ PROP_KEY_INLINE_RE,
198
214
  PROP_NAME_RE,
199
215
  PROP_BASICS_RE,
200
216
  ## todo/fix - add ANY_RE here too!!!
@@ -0,0 +1,74 @@
1
+ module SportDb
2
+ class Lexer
3
+
4
+
5
+ ##
6
+ ## see token-text for TEXT_RE
7
+ ## change PROP_NAME_RE to TEXT_II or TEXT_??? - why? why not?
8
+
9
+
10
+
11
+ ##
12
+ ##
13
+ ## FIX / FIX / FIX
14
+ ## support match for
15
+ ## K.-H.Förster
16
+
17
+
18
+
19
+
20
+ ## name different from text (does NOT allow number in name/text)
21
+ PROP_NAME_RE = %r{
22
+ (?<prop_name>
23
+ \b
24
+ (?<name>
25
+ \p{L}+
26
+ \.? ## optional dot
27
+ (?:
28
+ ## rule for space; only one single space allowed inline!!!
29
+ (?:
30
+ (?<![ ]) ## use negative lookbehind
31
+ [ ]
32
+ (?=\p{L}|['"]) ## use lookahead
33
+ )
34
+ ## support (inline) quoted name e.g. "Rodri" or such
35
+ |
36
+ (?:
37
+ (?<=[ ]) ## use positive lookbehind
38
+ " \p{L}+ "
39
+ ## require space here too - why? why not?
40
+ )
41
+ |
42
+ (?:
43
+ (?<= ## \p{L}\. | \p{L}
44
+ [\p{L}.]
45
+ ) ## use POSITIVE lookbehind
46
+ [-] ## must be surrounded by letters
47
+ ## note - allow leading dot (.) e.g. K.-H.Förster
48
+ ## short for Karl-Heinz Förster
49
+ ##
50
+ ## e.g. One-Two NOT
51
+ ## One- Two or One - Two or One -Two etc.
52
+ (?=\p{L}) ## use lookahead
53
+ )
54
+ |
55
+ (?: ## flex rule for quote - allow any
56
+ ## only check for double quotes e.g. cannot follow other ' for now - why? why not?
57
+ ## allows rodrigez 'rodri' for example
58
+ (?<!') ## use negative lookbehind
59
+ '
60
+ )
61
+ | ## standard case with letter(s) and optinal dot
62
+ (?: \p{L}+
63
+ \.? ## optional dot
64
+ )
65
+ )*
66
+ )
67
+ ## add lookahead - must be non-alphanum
68
+ (?=[ ,;\]\)]|$)
69
+ )
70
+ }ix
71
+
72
+
73
+ end # class Lexer
74
+ end # module SportDb
@@ -0,0 +1,102 @@
1
+ module SportDb
2
+ class Lexer
3
+
4
+ ####
5
+ #
6
+ ## note - use \A (instead of ^) - \A strictly matches the start of the string.
7
+ ##
8
+ ## todo - add support for trailing markers e.g.
9
+ ## ▪ Round 1 ▪▪▪▪▪▪▪▪
10
+ ## :: Round 1 ::::::::::::
11
+ ##
12
+ ## check - allow without space (like in heading =Heading 1=) - why? why not?
13
+ ## ▪Round 1▪▪▪▪▪▪▪▪
14
+ ## ::Round 1::::::::::::
15
+
16
+ ROUND_OUTLINE_I_RE = %r{ \A
17
+ [ ]* ## ignore leading spaces (if any)
18
+ (?<round_marker>
19
+ [▪]{1,3} ## BLACK SMALL SQUARE e.g. ▪,▪▪,▪▪▪
20
+ )
21
+ [ ]+
22
+ (?<round_outline>
23
+ ## must start with letter - why? why not?
24
+ ### 1st round
25
+ ## allow numbers e.g. Group A - 1
26
+ ##
27
+ ## note - CANNOT incl. :| !!!
28
+ ## used for markers for defs/definitions
29
+ [^:|]+? ## use non-greedy
30
+ )
31
+ (?:
32
+ [ ]+
33
+ [▪]+
34
+ )?
35
+ [ ]* ## ignore trailing spaces (if any)
36
+ \z
37
+ }xi
38
+
39
+ ROUND_OUTLINE_II_RE = %r{ \A
40
+ [ ]* ## ignore leading spaces (if any)
41
+ (?<round_marker>
42
+ ::{1,3} ## e.g. ::,:::,::::
43
+ )
44
+ [ ]+
45
+ (?<round_outline>
46
+ ## must start with letter - why? why not?
47
+ ### 1st round
48
+ ## allow numbers e.g. Group A - 1
49
+ ##
50
+ ## note - CANNOT incl. :| !!!
51
+ ## used for markers for defs/definitions
52
+ [^:|]+? ## use non-greedy
53
+ )
54
+ (?:
55
+ [ ]+
56
+ ::+
57
+ )?
58
+ [ ]* ## ignore trailing spaces (if any)
59
+ \z
60
+ }xi
61
+
62
+ ROUND_OUTLINE_RE = Regexp.union( ROUND_OUTLINE_I_RE,
63
+ ROUND_OUTLINE_II_RE,
64
+ )
65
+
66
+
67
+ ###
68
+ # note - for def(initions) only one level support
69
+ # that is, no round outline additions possible (e.g ▪▪ 1st leg etc.)
70
+ ROUND_DEF_OUTLINE_RE = %r{ \A
71
+ [ ]* ## ignore leading spaces (if any)
72
+ (?: [▪] ## BLACK SMALL SQUARE
73
+ |
74
+ :: )
75
+ [ ]+
76
+ (?<round_outline>
77
+ [^:|]+? ## use non-greedy
78
+ )
79
+ [ ]* ## ignore trailing spaces (if any)
80
+ ### possitive lookahead MUST be : OR |
81
+ (?= [:|]
82
+ [ ]) ## note: requires space for now after [:|] - keep - why? why not?
83
+ }ix
84
+
85
+
86
+ ROUND_DEF_BASICS_RE = %r{
87
+ (?<spaces> [ ]{2,}) |
88
+ (?<space> [ ])
89
+ |
90
+ (?<sym> [:|,] ) ### note - add comma (,) as optional separator
91
+ }ix
92
+
93
+ ROUND_DEF_RE = Regexp.union( ROUND_DEF_BASICS_RE,
94
+ DURATION_RE, # note - duration MUST match before date
95
+ DATE_RE, ## note - date must go before time (e.g. 12.12. vs 12.12)
96
+ ANY_RE,
97
+ )
98
+
99
+
100
+
101
+ end # class Lexer
102
+ end # module SportDb