rsssf-parser 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/Manifest.txt +15 -0
- data/README.md +11 -0
- data/Rakefile +29 -0
- data/bin/rsssf +80 -0
- data/lib/rsssf/parser/linter.rb +84 -0
- data/lib/rsssf/parser/parser.rb +100 -0
- data/lib/rsssf/parser/token-date.rb +161 -0
- data/lib/rsssf/parser/token-goals.rb +68 -0
- data/lib/rsssf/parser/token-note.rb +113 -0
- data/lib/rsssf/parser/token-round.rb +102 -0
- data/lib/rsssf/parser/token-score.rb +103 -0
- data/lib/rsssf/parser/token-text.rb +162 -0
- data/lib/rsssf/parser/token.rb +230 -0
- data/lib/rsssf/parser.rb +21 -0
- metadata +113 -0
@@ -0,0 +1,102 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
module Rsssf
|
4
|
+
class Parser
|
5
|
+
|
6
|
+
## Group A-Z
|
7
|
+
## Group 1-99
|
8
|
+
## Group HEX # used in concaf world cup quali
|
9
|
+
## Group 1A or A1, B1 - used anywhere
|
10
|
+
##
|
11
|
+
## use "key" of group - why? why not?
|
12
|
+
|
13
|
+
GROUP_RE = %r{(?<group>
|
14
|
+
\b
|
15
|
+
Group [ ]
|
16
|
+
[a-z0-9]+
|
17
|
+
\b)}ix
|
18
|
+
|
19
|
+
|
20
|
+
ROUND_RE = %r{(?<round>
|
21
|
+
\b
|
22
|
+
(?:
|
23
|
+
# round - note - requiers number e.g. round 1,2, etc.
|
24
|
+
(?: (?: Round |
|
25
|
+
Matchday |
|
26
|
+
Week
|
27
|
+
)
|
28
|
+
[ ] [0-9]+
|
29
|
+
)
|
30
|
+
|
|
31
|
+
# more (kockout) rounds
|
32
|
+
# playoffs - playoff, play-off, play-offs
|
33
|
+
(?: Play-?offs?
|
34
|
+
(?: [ ]for[ ]quarter-?finals )?
|
35
|
+
)
|
36
|
+
|
|
37
|
+
# round32
|
38
|
+
(?: Round[ ]of[ ]32 |
|
39
|
+
Last[ ]32 |
|
40
|
+
16th[ ]finals |
|
41
|
+
1/16[ ]finals
|
42
|
+
)
|
43
|
+
|
|
44
|
+
# round16
|
45
|
+
(?: Round[ ]of[ ]16 |
|
46
|
+
Last[ ]16 |
|
47
|
+
8th[ ]finals |
|
48
|
+
1/8[ ]finals
|
49
|
+
)
|
50
|
+
|
|
51
|
+
# fifthplace
|
52
|
+
(?:
|
53
|
+
(?: (Fifth|5th)[ -]place
|
54
|
+
(?: [ ] (?: match|play-?off|final ))?
|
55
|
+
) |
|
56
|
+
(?: Match[ ]for[ ](?: fifth|5th )[ -]place )
|
57
|
+
)
|
58
|
+
|
|
59
|
+
# thirdplace
|
60
|
+
(?:
|
61
|
+
(?: (Third|3rd)[ -]place
|
62
|
+
(?: [ ] (?: match|play-?off|final ))?
|
63
|
+
) |
|
64
|
+
(?: Match[ ]for[ ](?: third|3rd )[ -]place )
|
65
|
+
)
|
66
|
+
|
|
67
|
+
# quarterfinals
|
68
|
+
(?:
|
69
|
+
Quarter-?finals? |
|
70
|
+
Quarters |
|
71
|
+
Last[ ]8
|
72
|
+
)
|
73
|
+
|
|
74
|
+
# semifinals
|
75
|
+
(?:
|
76
|
+
Semi-?finals? |
|
77
|
+
Semis |
|
78
|
+
Last[ ]4
|
79
|
+
)
|
80
|
+
|
|
81
|
+
# final
|
82
|
+
Finals?
|
83
|
+
)
|
84
|
+
\b)}ix
|
85
|
+
|
86
|
+
##
|
87
|
+
## keep leg separate (from round) - why? why not?
|
88
|
+
##
|
89
|
+
LEG_RE = %r{ (?<leg>
|
90
|
+
\b
|
91
|
+
(?:
|
92
|
+
# leg1
|
93
|
+
(?: 1st|First)[ ]legs?
|
94
|
+
|
|
95
|
+
# leg2
|
96
|
+
(?: 2nd|Second)[ ]legs?
|
97
|
+
)
|
98
|
+
\b)}ix
|
99
|
+
|
100
|
+
|
101
|
+
end # class Parser
|
102
|
+
end # module Rsssf
|
@@ -0,0 +1,103 @@
|
|
1
|
+
module Rsssf
|
2
|
+
class Parser
|
3
|
+
|
4
|
+
|
5
|
+
######
|
6
|
+
## e.g. 2-1
|
7
|
+
SCORE_RE = %r{
|
8
|
+
(?<score>
|
9
|
+
(?<=[ ]) # Positive lookbehind for space
|
10
|
+
(?<score1>\d{1,2}) - (?<score2>\d{1,2})
|
11
|
+
(?=[ ]) # positive lookahead for space
|
12
|
+
)
|
13
|
+
}ix
|
14
|
+
|
15
|
+
## [aet]
|
16
|
+
## [aet, 3-2 pen]
|
17
|
+
## [aet; 3-2 pen]
|
18
|
+
## [3-2 pen]
|
19
|
+
## [3-2 pen.]
|
20
|
+
## [aet, 9-8 pen]
|
21
|
+
## [aet, 5-3 pen]
|
22
|
+
## [aet, 6-5 pen]
|
23
|
+
## [aet]
|
24
|
+
##
|
25
|
+
## - add dot (.) too ??
|
26
|
+
## [aet. 3-2 pen]
|
27
|
+
|
28
|
+
|
29
|
+
SCORE_EXT_RE = %r{ \[
|
30
|
+
(?<score_ext>
|
31
|
+
(?: ## aet only e.g. aet
|
32
|
+
aet
|
33
|
+
(?: ## optional pen
|
34
|
+
[,;][ ]*
|
35
|
+
\d{1,2}-\d{1,2} [ ]? pen\.?
|
36
|
+
)?
|
37
|
+
)
|
38
|
+
|
|
39
|
+
(?: ## penalty only e.g. 3-2 pen
|
40
|
+
\d{1,2}-\d{1,2} [ ]? pen\.?
|
41
|
+
)
|
42
|
+
)
|
43
|
+
\]
|
44
|
+
}ix
|
45
|
+
|
46
|
+
### awd - awarded
|
47
|
+
SCORE_AWD_RE = %r{ ## must be space before and after!!!
|
48
|
+
(?<score_awd>
|
49
|
+
(?<=[ ]) # Positive lookbehind for space
|
50
|
+
awd
|
51
|
+
(?=[ ]) # positive lookahead for space
|
52
|
+
)
|
53
|
+
}ix
|
54
|
+
|
55
|
+
### abd - abandoned
|
56
|
+
SCORE_ABD_RE = %r{ ## must be space before and after!!!
|
57
|
+
(?<score_abd>
|
58
|
+
(?<=[ ]) # Positive lookbehind for space
|
59
|
+
abd
|
60
|
+
(?=[ ]) # positive lookahead for space
|
61
|
+
)
|
62
|
+
}ix
|
63
|
+
|
64
|
+
### ppd - postponed
|
65
|
+
SCORE_PPD_RE = %r{ ## must be space before and after!!!
|
66
|
+
(?<score_ppd>
|
67
|
+
(?<=[ ]) # Positive lookbehind for space
|
68
|
+
ppd
|
69
|
+
(?=[ ]) # positive lookahead for space
|
70
|
+
)
|
71
|
+
}ix
|
72
|
+
|
73
|
+
|
74
|
+
### n/p - not played
|
75
|
+
SCORE_NP_RE = %r{ ## must be space before and after!!!
|
76
|
+
(?<score_np>
|
77
|
+
(?<=[ ]) # Positive lookbehind for space
|
78
|
+
n/p
|
79
|
+
(?=[ ]) # positive lookahead for space
|
80
|
+
)
|
81
|
+
}ix
|
82
|
+
|
83
|
+
## A walkover, also W.O. or w/o (originally two words: "walk over"),
|
84
|
+
## is awarded to the opposing team/player etc,
|
85
|
+
## if there are no other players available,
|
86
|
+
## or they have been disqualified,
|
87
|
+
## because the other contestants have forfeited or
|
88
|
+
# the other contestants have withdrawn from the contest.
|
89
|
+
##
|
90
|
+
## w/o - walk over
|
91
|
+
SCORE_WO_RE = %r{ ## must be space before and after!!!
|
92
|
+
(?<score_wo>
|
93
|
+
(?<=[ ]) # Positive lookbehind for space
|
94
|
+
w/o
|
95
|
+
(?=[ ]) # positive lookahead for space
|
96
|
+
)
|
97
|
+
}ix
|
98
|
+
|
99
|
+
|
100
|
+
|
101
|
+
end # class Parser
|
102
|
+
end # module Rsssf
|
103
|
+
|
@@ -0,0 +1,162 @@
|
|
1
|
+
module Rsssf
|
2
|
+
class Parser
|
3
|
+
|
4
|
+
|
5
|
+
## note - do NOT allow single alpha text for now
|
6
|
+
## add later?? A - B C - D - why?
|
7
|
+
## opt 1) one alpha
|
8
|
+
## (?<text_i> [a-z]) # only allow single letter text (not numbers!!)
|
9
|
+
|
10
|
+
## opt 2) more than one alphanum
|
11
|
+
|
12
|
+
|
13
|
+
### allow special case - starting text with number e.g.
|
14
|
+
## number must be follow by space or dot ()
|
15
|
+
# 1 FC ## allow 1-FC or 1FC - why? why not?
|
16
|
+
# 1. FC
|
17
|
+
# 1.FC - XXXX - not allowed for now, parse error
|
18
|
+
# 1FC - XXXX - now allowed for now, parse error
|
19
|
+
# 1890 Munich
|
20
|
+
#
|
21
|
+
|
22
|
+
|
23
|
+
##
|
24
|
+
# allow Cote'd Ivoir or such
|
25
|
+
## e.g. add '
|
26
|
+
|
27
|
+
|
28
|
+
## note - use a more strict text re(gex)
|
29
|
+
## if inside brackets !!!!
|
30
|
+
|
31
|
+
###
|
32
|
+
## "simple" strict text regex
|
33
|
+
### no numbers (or & or such inside)
|
34
|
+
## allows dash/hyphen (-)
|
35
|
+
## and dot (.) and apostroph (') for now
|
36
|
+
|
37
|
+
|
38
|
+
## simple (double) quoted text
|
39
|
+
## only supports a-z (unicode) PLUS (single) inline space
|
40
|
+
## add more chars - why? why not?
|
41
|
+
TEXT_QUOTED = '(?: " ' +
|
42
|
+
' \p{L}+ ' +
|
43
|
+
' (?: [ ] ' +
|
44
|
+
' \p{L}+ )* ' +
|
45
|
+
' " ) '
|
46
|
+
|
47
|
+
|
48
|
+
### might start with "" !!!
|
49
|
+
## e.g.
|
50
|
+
## "Tiago" Cardoso Mendes 80
|
51
|
+
## "Cristiano Ronaldo" dos Santos Aveiro 74
|
52
|
+
## "Zé Castro" José Eduardo Rosa Vale Castro 60og
|
53
|
+
|
54
|
+
|
55
|
+
TEXT_STRICT_RE = %r{
|
56
|
+
(?<text>
|
57
|
+
(?: \b | #{TEXT_QUOTED} [ ] ## note - leading quoted text must be followed by space!!
|
58
|
+
)
|
59
|
+
\p{L}+ ## all unicode letters (e.g. [a-z])
|
60
|
+
|
61
|
+
(?:
|
62
|
+
(?:[ ]
|
63
|
+
| # only single spaces allowed inline!!!
|
64
|
+
[-]
|
65
|
+
)?
|
66
|
+
(?:
|
67
|
+
\p{L}+ |
|
68
|
+
['.] |
|
69
|
+
(?:
|
70
|
+
(?<= [ ])
|
71
|
+
#{TEXT_QUOTED}
|
72
|
+
(?= [ ]|$) ### must be followed by space
|
73
|
+
## todo/fix - add all end of text lookaheads to (see below)
|
74
|
+
)
|
75
|
+
)
|
76
|
+
)*
|
77
|
+
## must NOT end with space or dash(-)
|
78
|
+
## todo/fix - possible in regex here
|
79
|
+
## only end in alphanum a-z0-9 (not dot or & ???)
|
80
|
+
|
81
|
+
## positive lookahead
|
82
|
+
## cannot use \b if text ends in dot (.) or other non-alphnum
|
83
|
+
## than \b will not work
|
84
|
+
(?=[ ,;@|\[\]]
|
85
|
+
|$
|
86
|
+
)
|
87
|
+
)
|
88
|
+
}ix
|
89
|
+
|
90
|
+
|
91
|
+
|
92
|
+
TEXT_RE = %r{
|
93
|
+
## must start with alpha (allow unicode letters!!)
|
94
|
+
(?<text>
|
95
|
+
\b ## use/require word boundary
|
96
|
+
(?:
|
97
|
+
# opt 1 - start with alpha
|
98
|
+
\p{L}+ ## all unicode letters (e.g. [a-z])
|
99
|
+
|
|
100
|
+
|
101
|
+
# opt 2 - start with num!! - allow special case (e.g. 1. FC)
|
102
|
+
\d+ # check for num lookahead (MUST be space or dot)
|
103
|
+
## MUST be followed by (optional dot) and
|
104
|
+
## required space !!!
|
105
|
+
## MUST be follow by a to z!!!!
|
106
|
+
\.? ## optional dot
|
107
|
+
[ ]? ## make space optional too - why? why not?
|
108
|
+
## yes - eg. 1st, 2nd, 5th etc.
|
109
|
+
\p{L}+
|
110
|
+
)
|
111
|
+
|
112
|
+
(?:(?: (?:[ ]
|
113
|
+
(?! (awd|abd|ppd|n/p|w/o)[ ]) ## note - exclude (awd[ ]/abd[ ]/n/p[ ])
|
114
|
+
)
|
115
|
+
| # only single spaces allowed inline!!!
|
116
|
+
[-]
|
117
|
+
)?
|
118
|
+
(?:
|
119
|
+
\p{L}+ | [&/'.]
|
120
|
+
|
|
121
|
+
(?:
|
122
|
+
\d+
|
123
|
+
(?![0-9.:'/+-])
|
124
|
+
## negative lookahead for numbers
|
125
|
+
## note - include digits itself!!!
|
126
|
+
)
|
127
|
+
)
|
128
|
+
)* ## must NOT end with space or dash(-)
|
129
|
+
## todo/fix - possible in regex here
|
130
|
+
## only end in alphanum a-z0-9 (not dot or & ???)
|
131
|
+
|
132
|
+
## support (Hamburg) or such at the end (ony)
|
133
|
+
## note - no numbers allowed inside () for now!!
|
134
|
+
(?:
|
135
|
+
[ ]\(\p{L}+
|
136
|
+
(?:
|
137
|
+
(?: [ ] |
|
138
|
+
[-]
|
139
|
+
)?
|
140
|
+
\p{L}+ | [&/'.]
|
141
|
+
)*
|
142
|
+
\)
|
143
|
+
)?
|
144
|
+
|
145
|
+
|
146
|
+
## add lookahead/lookbehind
|
147
|
+
## must be space!!!
|
148
|
+
## (or comma or start/end of string)
|
149
|
+
## kind of \b !!!
|
150
|
+
## positive lookahead
|
151
|
+
## note - added : too - why? why not?
|
152
|
+
(?=[ ,;@|:\[\]]
|
153
|
+
|$
|
154
|
+
)
|
155
|
+
)
|
156
|
+
}ix
|
157
|
+
|
158
|
+
|
159
|
+
|
160
|
+
end # class Parser
|
161
|
+
end # module Rsssf
|
162
|
+
|
@@ -0,0 +1,230 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
module Rsssf
|
4
|
+
class Parser
|
5
|
+
|
6
|
+
|
7
|
+
|
8
|
+
BASICS_RE = %r{
|
9
|
+
(?<spaces> [ ]{2,}) |
|
10
|
+
(?<space> [ ])
|
11
|
+
|
|
12
|
+
(?<sym>[;,@|\[\]])
|
13
|
+
}ix
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
VS_RE = %r{ ## must be space before and after!!!
|
18
|
+
(?<vs>
|
19
|
+
(?<=[ ]) # Positive lookbehind for space
|
20
|
+
-
|
21
|
+
(?=[ ]) # positive lookahead for space
|
22
|
+
)
|
23
|
+
}ix
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
|
29
|
+
RE = Regexp.union( GROUP_RE, ROUND_RE, LEG_RE,
|
30
|
+
DATE_RE,
|
31
|
+
VS_RE,
|
32
|
+
SCORE_RE,
|
33
|
+
SCORE_AWD_RE, SCORE_ABD_RE, SCORE_PPD_RE, SCORE_NP_RE,
|
34
|
+
SCORE_WO_RE,
|
35
|
+
SCORE_EXT_RE,
|
36
|
+
NOTE_RE,
|
37
|
+
BASICS_RE,
|
38
|
+
TEXT_RE )
|
39
|
+
|
40
|
+
|
41
|
+
## "strict" text match mode inside brackets
|
42
|
+
## ]
|
43
|
+
INSIDE_RE = Regexp.union( GOAL_OG_RE, GOAL_PEN_RE,
|
44
|
+
BASICS_RE,
|
45
|
+
TEXT_STRICT_RE,
|
46
|
+
MINUTE_RE,
|
47
|
+
)
|
48
|
+
|
49
|
+
def log( msg )
|
50
|
+
## append msg to ./logs.txt
|
51
|
+
## use ./errors.txt - why? why not?
|
52
|
+
File.open( './logs.txt', 'a:utf-8' ) do |f|
|
53
|
+
f.write( msg )
|
54
|
+
f.write( "\n" )
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
|
60
|
+
def tokenize_with_errors( line, debug: false )
|
61
|
+
tokens = []
|
62
|
+
errors = [] ## keep a list of errors - why? why not?
|
63
|
+
|
64
|
+
puts ">#{line}<" if debug
|
65
|
+
|
66
|
+
pos = 0
|
67
|
+
## track last offsets - to report error on no match
|
68
|
+
## or no match in end of string
|
69
|
+
offsets = [0,0]
|
70
|
+
m = nil
|
71
|
+
|
72
|
+
####
|
73
|
+
## quick hack - keep re state/mode between tokenize calls!!!
|
74
|
+
@re ||= RE ## note - switch between RE & INSIDE_RE
|
75
|
+
|
76
|
+
|
77
|
+
while m = @re.match( line, pos )
|
78
|
+
if debug
|
79
|
+
pp m
|
80
|
+
puts "pos: #{pos}"
|
81
|
+
end
|
82
|
+
offsets = [m.begin(0), m.end(0)]
|
83
|
+
|
84
|
+
if offsets[0] != pos
|
85
|
+
## match NOT starting at start/begin position!!!
|
86
|
+
## report parse error!!!
|
87
|
+
msg = "!! WARN - parse error - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
|
88
|
+
puts msg
|
89
|
+
|
90
|
+
errors << "parse error - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]}"
|
91
|
+
log( msg )
|
92
|
+
end
|
93
|
+
|
94
|
+
##
|
95
|
+
## todo/fix - also check if possible
|
96
|
+
## if no match but not yet end off string!!!!
|
97
|
+
## report skipped text run too!!!
|
98
|
+
|
99
|
+
pos = offsets[1]
|
100
|
+
|
101
|
+
pp offsets if debug
|
102
|
+
|
103
|
+
t = if @re == INSIDE_RE
|
104
|
+
if m[:space]
|
105
|
+
nil ## skip space
|
106
|
+
elsif m[:spaces]
|
107
|
+
nil ## skip spaces
|
108
|
+
elsif m[:text]
|
109
|
+
[:text, m[:text]] ## keep pos - why? why not?
|
110
|
+
elsif m[:minute]
|
111
|
+
[:minute, m[:minute]]
|
112
|
+
elsif m[:og]
|
113
|
+
[:og, m[:og]] ## for typed drop - string version/variants
|
114
|
+
elsif m[:pen]
|
115
|
+
[:pen, m[:pen]]
|
116
|
+
elsif m[:sym]
|
117
|
+
sym = m[:sym]
|
118
|
+
## return symbols "inline" as is - why? why not?
|
119
|
+
case sym
|
120
|
+
when ',' then [:',']
|
121
|
+
when ';' then [:';']
|
122
|
+
when '@' then [:'@']
|
123
|
+
when '|' then [:'|']
|
124
|
+
when '['
|
125
|
+
## report error - already in inside mode!!!
|
126
|
+
nil
|
127
|
+
when ']'
|
128
|
+
puts " leave inside match mode"
|
129
|
+
@re = RE
|
130
|
+
nil
|
131
|
+
else
|
132
|
+
nil ## ignore others (e.g. brackets [])
|
133
|
+
end
|
134
|
+
else
|
135
|
+
## report error - why? why not?
|
136
|
+
nil
|
137
|
+
end
|
138
|
+
else ## assume standard mode/ctx
|
139
|
+
if m[:space]
|
140
|
+
nil ## skip space
|
141
|
+
elsif m[:spaces]
|
142
|
+
nil ## skip spaces
|
143
|
+
elsif m[:text]
|
144
|
+
[:text, m[:text]] ## keep pos - why? why not?
|
145
|
+
elsif m[:note]
|
146
|
+
[:note, m[:note]]
|
147
|
+
elsif m[:group]
|
148
|
+
[:group, m[:group]]
|
149
|
+
elsif m[:round]
|
150
|
+
[:round, m[:round]]
|
151
|
+
elsif m[:leg]
|
152
|
+
[:leg, m[:leg]]
|
153
|
+
elsif m[:date]
|
154
|
+
[:date, m[:date]]
|
155
|
+
elsif m[:vs]
|
156
|
+
[:vs, m[:vs]]
|
157
|
+
elsif m[:score]
|
158
|
+
[:score, m[:score]]
|
159
|
+
elsif m[:score_awd] # awarded (awd)
|
160
|
+
[:score_awd, m[:score_awd]]
|
161
|
+
elsif m[:score_abd] # abandoned (abd)
|
162
|
+
[:score_abd, m[:score_abd]]
|
163
|
+
elsif m[:score_ppd] # postponed (ppd)
|
164
|
+
[:score_ppd, m[:score_ppd]]
|
165
|
+
elsif m[:score_np] # not played (n/p)
|
166
|
+
[:score_np, m[:score_np]]
|
167
|
+
elsif m[:score_wo] # walk over (w/o)
|
168
|
+
[:score_wo, m[:score_wo]]
|
169
|
+
elsif m[:score_ext]
|
170
|
+
[:score_ext, m[:score_ext]]
|
171
|
+
elsif m[:sym]
|
172
|
+
sym = m[:sym]
|
173
|
+
## return symbols "inline" as is - why? why not?
|
174
|
+
case sym
|
175
|
+
when ',' then [:',']
|
176
|
+
when ';' then [:';']
|
177
|
+
when '@' then [:'@']
|
178
|
+
when '|' then [:'|']
|
179
|
+
when '['
|
180
|
+
## switch to inside mode!!!
|
181
|
+
puts " enter inside match mode"
|
182
|
+
@re = INSIDE_RE
|
183
|
+
nil
|
184
|
+
when ']'
|
185
|
+
## already in standard mode/ctx
|
186
|
+
## report warn/error - why? why not?
|
187
|
+
nil
|
188
|
+
else
|
189
|
+
nil ## ignore others (e.g. brackets [])
|
190
|
+
end
|
191
|
+
else
|
192
|
+
## report error - why? why not?
|
193
|
+
nil
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
|
198
|
+
tokens << t if t
|
199
|
+
|
200
|
+
if debug
|
201
|
+
print ">"
|
202
|
+
print "*" * pos
|
203
|
+
puts "#{line[pos..-1]}<"
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
## check if no match in end of string
|
208
|
+
if offsets[1] != line.size
|
209
|
+
msg = "!! WARN - parse error - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size} in line >#{line}<"
|
210
|
+
puts msg
|
211
|
+
log( msg )
|
212
|
+
|
213
|
+
errors << "parse error - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size}"
|
214
|
+
end
|
215
|
+
|
216
|
+
|
217
|
+
[tokens,errors]
|
218
|
+
end
|
219
|
+
|
220
|
+
|
221
|
+
### convience helper - ignore errors by default
|
222
|
+
def tokenize( line, debug: false )
|
223
|
+
tokens, _ = tokenize_with_errors( line, debug: debug )
|
224
|
+
tokens
|
225
|
+
end
|
226
|
+
|
227
|
+
|
228
|
+
end # class Parser
|
229
|
+
end # module Rsssf
|
230
|
+
|
data/lib/rsssf/parser.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
|
2
|
+
####
|
3
|
+
## build on "standard" parse
|
4
|
+
require 'sportdb/parser'
|
5
|
+
|
6
|
+
|
7
|
+
## our own code
|
8
|
+
require_relative 'parser/token-text'
|
9
|
+
require_relative 'parser/token-note'
|
10
|
+
require_relative 'parser/token-round' ## round (& group)
|
11
|
+
require_relative 'parser/token-date'
|
12
|
+
require_relative 'parser/token-score'
|
13
|
+
require_relative 'parser/token-goals'
|
14
|
+
require_relative 'parser/token'
|
15
|
+
|
16
|
+
require_relative 'parser/parser'
|
17
|
+
|
18
|
+
require_relative 'parser/linter'
|
19
|
+
|
20
|
+
|
21
|
+
|