sportdb-parser 0.6.20 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -1
  3. data/Manifest.txt +14 -8
  4. data/Rakefile +1 -1
  5. data/lib/sportdb/parser/blocktxt.rb +99 -0
  6. data/lib/sportdb/parser/lexer.rb +958 -395
  7. data/lib/sportdb/parser/lexer_buffer.rb +97 -0
  8. data/lib/sportdb/parser/lexer_tty.rb +111 -0
  9. data/lib/sportdb/parser/parser.rb +1768 -855
  10. data/lib/sportdb/parser/racc_parser.rb +1 -1
  11. data/lib/sportdb/parser/racc_tree.rb +327 -41
  12. data/lib/sportdb/parser/token-date.rb +160 -178
  13. data/lib/sportdb/parser/token-date_duration.rb +190 -0
  14. data/lib/sportdb/parser/token-geo.rb +59 -59
  15. data/lib/sportdb/parser/token-goals.rb +460 -0
  16. data/lib/sportdb/parser/token-group.rb +43 -0
  17. data/lib/sportdb/parser/token-note.rb +40 -0
  18. data/lib/sportdb/parser/token-prop.rb +70 -54
  19. data/lib/sportdb/parser/token-prop_name.rb +74 -0
  20. data/lib/sportdb/parser/token-round.rb +102 -0
  21. data/lib/sportdb/parser/token-score.rb +323 -47
  22. data/lib/sportdb/parser/token-score_fuller.rb +435 -0
  23. data/lib/sportdb/parser/token-score_legs.rb +59 -0
  24. data/lib/sportdb/parser/token-status.rb +157 -160
  25. data/lib/sportdb/parser/token-table.rb +149 -0
  26. data/lib/sportdb/parser/token-text.rb +72 -23
  27. data/lib/sportdb/parser/token-time.rb +141 -0
  28. data/lib/sportdb/parser/token.rb +242 -105
  29. data/lib/sportdb/parser/token_helpers.rb +92 -0
  30. data/lib/sportdb/parser/version.rb +2 -2
  31. data/lib/sportdb/parser.rb +24 -2
  32. metadata +18 -18
  33. data/config/rounds_de.txt +0 -125
  34. data/config/rounds_en.txt +0 -29
  35. data/config/rounds_es.txt +0 -26
  36. data/config/rounds_misc.txt +0 -25
  37. data/config/rounds_pt.txt +0 -4
  38. data/config/zones_en.txt +0 -20
  39. data/lib/sportdb/parser/lang.rb +0 -298
  40. data/lib/sportdb/parser/token-minute.rb +0 -205
@@ -1,205 +0,0 @@
1
-
2
- module SportDb
3
- class Lexer
4
-
5
- #
6
- # todo/check - move goal type regexes to goal or somewhere else?
7
- #
8
-
9
- ## goal types
10
- # (pen.) or (pen) or (p.) or (p)
11
- ## (o.g.) or (og)
12
- ## todo/check - keep case-insensitive
13
- ## or allow OG or P or PEN or
14
- ## only lower case - why? why not?
15
- GOAL_PEN_RE = %r{
16
- (?<pen> \(
17
- (?:pen|p)\.?
18
- \)
19
- )
20
- }ix
21
- GOAL_OG_RE = %r{
22
- (?<og> \(
23
- (?:og|o\.g\.)
24
- \)
25
- )
26
- }ix
27
-
28
-
29
- ## minute variant for N/A not/available
30
- ## todo/check - find a better syntax - why? why not?
31
- ##
32
- ## note "??".to_i(10) returns 0 or
33
- ## "__".to_i(10) returns 0
34
- ## quick hack - assume 0 for n/a for now
35
-
36
- MINUTE_NA_RE = %r{
37
- (?<minute>
38
- (?<=[ (]) # positive lookbehind for space or opening
39
- (?<value> \?{2} | _{2} )
40
- ' ## must have minute marker!!!!
41
- )
42
- }ix
43
-
44
- MINUTE_RE = %r{
45
- (?<minute>
46
- (?<=[ (]) # positive lookbehind for space or opening ( e.g. (61') required
47
- # todo - add more lookbehinds e.g. ,) etc. - why? why not?
48
- (?<value>\d{1,3}) ## constrain numbers to 0 to 999!!!
49
- (?: \+
50
- (?<value2>\d{1,3})
51
- )?
52
- ' ## must have minute marker!!!!
53
- )
54
- }ix
55
-
56
-
57
- #####
58
- # player with minute (top-level) regex
59
- # - starts new player/goal mode (until end of line)!!!
60
- # - note: allow one or more spaces between name and minute
61
- #
62
- # note - aaa bbb 40'
63
- # make sure anchor (^) - beginning of line - present!!!
64
- # note - will NOT work with ^ anchor!!
65
- # use special \G - Matches first matching position !!!!
66
- # otherwise you get matches such as >bbb 40'< skipping >aaa< etc.!!!
67
- #
68
- # regex question - check if in an regex union - space regex gets matches
69
- # or others with first matching position
70
- # or if chars get eaten-up?
71
- # let us know if \G is required here or not
72
- #
73
- ## note - use \A (instead of ^) - \A strictly matches the start of the string.
74
-
75
-
76
- PLAYER_WITH_MINUTE_RE = %r{
77
- \A ### note - MUST start line; leading spaces optional (eat-up)
78
- [ ]*
79
- (?: # optional open bracket ([) -- remove later
80
- (?<open_bracket> \[ )
81
- [ ]*
82
- )?
83
- (?: # optional none a.k.a. -; - what todo here?
84
- (?<none> - [ ]* ; [ ]* )
85
- )?
86
- (?<player_with_minute>
87
- (?<name>
88
- \p{L}+
89
- \.? ## optional dot
90
-
91
- (?:
92
- ## rule for space; only one single space allowed inline!!!
93
- (?:
94
- (?<![ ]) ## use negative lookbehind
95
- [ ]
96
- (?=\p{L}|') ## use lookahead
97
- )
98
- |
99
- (?:
100
- (?<=\p{L}) ## use lookbehind
101
- ['-] ## must be surrounded by letters
102
- ## e.g. One/Two NOT
103
- ## One/ Two or One / Two or One /Two etc.
104
- (?=\p{L}) ## use lookahead
105
- )
106
- |
107
- (?:
108
- (?<=[ ]) ## use lookbehind -- add letter (plus dot) or such - why? why not?
109
- ['] ## must be surrounded by leading space and
110
- ## traling letters (e.g. UDI 'Beter Bed)
111
- (?=\p{L}) ## use lookahead
112
- )
113
- |
114
- (?:
115
- (?<=\p{L}) ## use lookbehind
116
- ['] ## must be surrounded by leading letter and
117
- ## trailing space PLUS letter (e.g. UDI' Beter Bed)
118
- (?=[ ]\p{L}) ## use lookahead (space WITH letter
119
- )
120
- | ## standard case with letter(s) and optinal dot
121
- (?: \p{L}+
122
- \.? ## optional dot
123
- )
124
- )*
125
- )
126
- #### spaces
127
- (?: [ ]+)
128
- #### minute (see above)
129
- ##### use MINUTE_RE.source or such - for inline (reference) use? do not copy
130
- (?<minute>
131
- (?<=[ (]) # positive lookbehind for space or opening ( e.g. (61') required
132
- # todo - add more lookbehinds e.g. ,) etc. - why? why not?
133
- (?:
134
- (?<value>\d{1,3}) ## constrain numbers to 0 to 999!!!
135
- (?: \+
136
- (?<value2>\d{1,3})
137
- )?
138
- |
139
- (?<value> \?{2} | _{2} ) ## add support for n/a (not/available)
140
- )
141
- ' ## must have minute marker!!!!
142
- )
143
-
144
- )
145
- }ix
146
-
147
-
148
-
149
- ## note - use \A (instead of ^) - \A strictly matches the start of the string.
150
-
151
- PLAYER_WITH_SCORE_RE = %r{
152
- \A ### note - MUST start line; leading spaces optional (eat-up)
153
- [ ]*
154
- (?<player_with_score>
155
- (?<score>
156
- (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
157
- )
158
- [ ]+
159
- (?<name>
160
- \p{L}+
161
- \.? ## optional dot
162
-
163
- (?:
164
- ## rule for space; only one single space allowed inline!!!
165
- (?:
166
- (?<![ ]) ## use negative lookbehind
167
- [ ]
168
- (?=\p{L}|') ## use lookahead
169
- )
170
- |
171
- (?:
172
- (?<=\p{L}) ## use lookbehind
173
- ['-] ## must be surrounded by letters
174
- ## e.g. One/Two NOT
175
- ## One/ Two or One / Two or One /Two etc.
176
- (?=\p{L}) ## use lookahead
177
- )
178
- |
179
- (?:
180
- (?<=[ ]) ## use lookbehind -- add letter (plus dot) or such - why? why not?
181
- ['] ## must be surrounded by leading space and
182
- ## traling letters (e.g. UDI 'Beter Bed)
183
- (?=\p{L}) ## use lookahead
184
- )
185
- |
186
- (?:
187
- (?<=\p{L}) ## use lookbehind
188
- ['] ## must be surrounded by leading letter and
189
- ## trailing space PLUS letter (e.g. UDI' Beter Bed)
190
- (?=[ ]\p{L}) ## use lookahead (space WITH letter
191
- )
192
- | ## standard case with letter(s) and optinal dot
193
- (?: \p{L}+
194
- \.? ## optional dot
195
- )
196
- )*
197
- ) ## name
198
- ### check/todo - add lookahead (e.g. must be space or ,$) why? why not?
199
- ) ## player_with_score
200
- }ix
201
-
202
-
203
-
204
- end # module SportDb
205
- end # class Lexer