rsssf-parser 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +2 -0
- data/Manifest.txt +1 -0
- data/Rakefile +2 -1
- data/bin/rsssf +7 -5
- data/lib/rsssf/parser/linter.rb +90 -26
- data/lib/rsssf/parser/token-date.rb +6 -59
- data/lib/rsssf/parser/token-goals.rb +3 -3
- data/lib/rsssf/parser/token-note.rb +32 -4
- data/lib/rsssf/parser/token-text.rb +3 -2
- data/lib/rsssf/parser/token.rb +75 -16
- data/lib/rsssf/parser/version.rb +24 -0
- data/lib/rsssf/parser.rb +10 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 52ee31bde399793292f0978a0f3109be47f2df56de7e35fb013f6f47d33a5ff0
|
4
|
+
data.tar.gz: d2ecbbb9f5935d97a9520c65e30a4a4ce0fab6b6817e97e70932c4f73e02f269
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 403573de54f0dba9155ec8efa264eb5f69dc3785cb7411cbf97d4fbbf033734370bdfe61d6eaff5d1a5939a2972bd12ea9a2fcf9fb4abdd14077e85b9d9a3d3a
|
7
|
+
data.tar.gz: 54414cabdff9a1804f9ce5256dd34d5d0b70b7df7e9d3d22fb2fa3191712436679763370c019b78b8ccc051196feed31edfd5e2807b42c845789e848bf3e5e50
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
data/Rakefile
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
require 'hoe'
|
2
|
+
require './lib/rsssf/parser/version.rb'
|
2
3
|
|
3
4
|
|
4
5
|
Hoe.spec 'rsssf-parser' do
|
5
6
|
|
6
|
-
self.version =
|
7
|
+
self.version = SportDb::Module::RsssfParser::VERSION
|
7
8
|
|
8
9
|
self.summary = "rsssf-parser - football match schedule & results parser (& tokenizer) for the rsssf format / conventions"
|
9
10
|
self.description = summary
|
data/bin/rsssf
CHANGED
@@ -45,8 +45,7 @@ paths = if args.empty?
|
|
45
45
|
'../../../rsssf/austria/2010-11/cup.txt',
|
46
46
|
]
|
47
47
|
else
|
48
|
-
|
49
|
-
args
|
48
|
+
SportDb::Parser::Opts.expand_args( args )
|
50
49
|
end
|
51
50
|
|
52
51
|
|
@@ -60,15 +59,18 @@ Rsssf::Parser::Linter.debug = true if opts[:debug]
|
|
60
59
|
linter = Rsssf::Parser::Linter.new
|
61
60
|
|
62
61
|
|
62
|
+
errors = []
|
63
|
+
|
63
64
|
paths.each_with_index do |path,i|
|
64
65
|
puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
|
65
66
|
linter.read( path, parse: !opts[:metal] )
|
67
|
+
errors += linter.errors if linter.errors?
|
66
68
|
end
|
67
69
|
|
68
|
-
if
|
70
|
+
if errors.size > 0
|
69
71
|
puts
|
70
|
-
pp
|
71
|
-
puts "!! #{
|
72
|
+
pp errors
|
73
|
+
puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
|
72
74
|
else
|
73
75
|
puts "OK no parse errors found in #{paths.size} datafile(s)"
|
74
76
|
end
|
data/lib/rsssf/parser/linter.rb
CHANGED
@@ -25,45 +25,78 @@ def errors?() @errors.size > 0; end
|
|
25
25
|
|
26
26
|
|
27
27
|
|
28
|
+
def read( path, parse: false )
|
29
|
+
parse( read_text( path ), parse: parse,
|
30
|
+
path: path )
|
31
|
+
end
|
32
|
+
|
28
33
|
#########
|
29
34
|
## parse - false (default) - tokenize (only)
|
30
35
|
## - true - tokenize & parse
|
31
|
-
|
32
|
-
|
33
|
-
## fix - (re)use outline reader later!!!
|
34
|
-
## plus check for headings etc.
|
35
|
-
|
36
|
-
text = File.open( path, 'r:utf-8' ) { |f| f.read }
|
37
|
-
lines = text.split( "\n" )
|
38
|
-
|
36
|
+
##
|
37
|
+
## todo/fix - change path to file or such - why? why not?
|
39
38
|
|
40
|
-
## process lines
|
41
|
-
tree = []
|
42
|
-
lines.each do |line|
|
43
39
|
|
44
|
-
|
45
|
-
next if line.strip.empty? || line.strip.start_with?('#')
|
40
|
+
MAX_ERRORS = 13 ## stop after 13 errors
|
46
41
|
|
47
|
-
|
48
|
-
|
42
|
+
def parse( txt, parse: false,
|
43
|
+
path: 'path/to/filename/here' )
|
44
|
+
## note: every (new) read call - resets errors list to empty
|
45
|
+
@errors = []
|
49
46
|
|
47
|
+
nodes = SportDb::OutlineReader.parse( txt )
|
50
48
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
49
|
+
## process nodes
|
50
|
+
h1 = nil
|
51
|
+
orphans = 0 ## track paragraphs with no heading
|
52
|
+
paragraphs = 0 ## track paragraphs with heading
|
55
53
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
54
|
+
nodes.each do |node|
|
55
|
+
type = node[0]
|
56
|
+
|
57
|
+
if type == :h1
|
58
|
+
h1 = node[1] ## get heading text
|
59
|
+
## puts
|
60
|
+
puts " = Heading 1 >#{node[1]}<"
|
61
|
+
elsif type == :p
|
62
|
+
|
63
|
+
if h1.nil?
|
64
|
+
orphans += 1 ## only warn once (at the end; see below)
|
65
|
+
next
|
66
|
+
end
|
67
|
+
|
68
|
+
paragraphs += 1
|
69
|
+
|
70
|
+
lines = node[1]
|
71
|
+
|
72
|
+
tree = []
|
73
|
+
lines.each_with_index do |line,i|
|
74
|
+
|
75
|
+
if debug?
|
76
|
+
puts
|
77
|
+
puts "line >#{line}<"
|
78
|
+
end
|
79
|
+
|
80
|
+
t, error_messages = if parse
|
81
|
+
@parser.parse_with_errors( line )
|
82
|
+
else
|
83
|
+
@parser.tokenize_with_errors( line )
|
84
|
+
end
|
61
85
|
|
62
86
|
|
63
87
|
if error_messages.size > 0
|
64
88
|
## add to "global" error list
|
65
89
|
## make a triplet tuple (file / msg / line text)
|
66
90
|
error_messages.each do |msg|
|
91
|
+
|
92
|
+
## note - stop processing / adding errors if hit MAX ERRORS
|
93
|
+
if @errors.size >= MAX_ERRORS
|
94
|
+
@errors << [ path,
|
95
|
+
"stop after #{MAX_ERRORS} errors",
|
96
|
+
'']
|
97
|
+
return
|
98
|
+
end
|
99
|
+
|
67
100
|
@errors << [ path,
|
68
101
|
msg,
|
69
102
|
line
|
@@ -74,9 +107,40 @@ def read( path, parse: false )
|
|
74
107
|
pp t if debug?
|
75
108
|
|
76
109
|
tree << t
|
77
|
-
end
|
110
|
+
end
|
78
111
|
## pp tree
|
79
|
-
|
112
|
+
else
|
113
|
+
pp node
|
114
|
+
raise ArgumentError, "unsupported (node) type >#{type}<"
|
115
|
+
end
|
116
|
+
end # each node
|
117
|
+
|
118
|
+
## no heading and no orphans => assume empty file (comments only)!!!
|
119
|
+
if h1.nil? && orphans == 0
|
120
|
+
puts " !! WARN - no heading(s) and paragraph(s) found"
|
121
|
+
@errors << [ path,
|
122
|
+
"warn - no heading(s) and paragraph(s) found",
|
123
|
+
"" ## pass along empty line
|
124
|
+
]
|
125
|
+
end
|
126
|
+
|
127
|
+
if orphans > 0
|
128
|
+
puts " !! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
|
129
|
+
@errors << [ path,
|
130
|
+
"warn - no heading for #{orphans} text paragraph(s); skipping parse",
|
131
|
+
"" ## pass along empty line
|
132
|
+
]
|
133
|
+
end
|
134
|
+
|
135
|
+
if h1 && paragraphs == 0
|
136
|
+
puts " !! WARN - heading with no text paragraph(s)"
|
137
|
+
@errors << [ path,
|
138
|
+
"warn - heading with no text paragraph(s)",
|
139
|
+
"" ## pass along empty line
|
140
|
+
]
|
141
|
+
end
|
142
|
+
|
143
|
+
end # parse
|
80
144
|
end # class Linter
|
81
145
|
|
82
146
|
|
@@ -2,62 +2,9 @@ module Rsssf
|
|
2
2
|
class Parser
|
3
3
|
|
4
4
|
|
5
|
-
|
6
|
-
def self.parse_names( txt )
|
7
|
-
lines = [] # array of lines (with words)
|
8
|
-
|
9
|
-
txt.each_line do |line|
|
10
|
-
line = line.strip
|
11
|
-
|
12
|
-
next if line.empty?
|
13
|
-
next if line.start_with?( '#' ) ## skip comments too
|
14
|
-
|
15
|
-
## strip inline (until end-of-line) comments too
|
16
|
-
## e.g. Janvier Janv Jan ## check janv in use??
|
17
|
-
## => Janvier Janv Jan
|
18
|
-
|
19
|
-
line = line.sub( /#.*/, '' ).strip
|
20
|
-
## pp line
|
21
|
-
|
22
|
-
values = line.split( /[ \t]+/ )
|
23
|
-
## pp values
|
24
|
-
|
25
|
-
## todo/fix -- add check for duplicates
|
26
|
-
lines << values
|
27
|
-
end
|
28
|
-
lines
|
29
|
-
|
30
|
-
end # method parse
|
31
|
-
|
32
|
-
|
33
|
-
def self.build_names( lines )
|
34
|
-
## join all words together into a single string e.g.
|
35
|
-
## January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|...
|
36
|
-
lines.map { |line| line.join('|') }.join('|')
|
37
|
-
end
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
## add normalize option (for downcase) - why? why not?
|
42
|
-
def self.build_map( lines )
|
43
|
-
## note: downcase name!!!
|
44
|
-
## build a lookup map that maps the word to the index (line no) plus 1 e.g.
|
45
|
-
## {"january" => 1, "jan" => 1,
|
46
|
-
## "february" => 2, "feb" => 2,
|
47
|
-
## "march" => 3, "mar" => 3,
|
48
|
-
## "april" => 4, "apr" => 4,
|
49
|
-
## "may" => 5,
|
50
|
-
## "june" => 6, "jun" => 6, ...
|
51
|
-
lines.each_with_index.reduce( {} ) do |h,(line,i)|
|
52
|
-
line.each { |name| h[ name.downcase ] = i+1 } ## note: start mapping with 1 (and NOT zero-based, that is, 0)
|
53
|
-
h
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
|
58
5
|
## note - support only 5 letter max for now
|
59
6
|
## now January|February|August etc.
|
60
|
-
MONTH_LINES = parse_names( <<TXT )
|
7
|
+
MONTH_LINES = SportDb::Parser.parse_names( <<TXT )
|
61
8
|
Jan
|
62
9
|
Feb
|
63
10
|
March Mar
|
@@ -72,15 +19,15 @@ Nov
|
|
72
19
|
Dec
|
73
20
|
TXT
|
74
21
|
|
75
|
-
MONTH_NAMES = build_names( MONTH_LINES )
|
22
|
+
MONTH_NAMES = SportDb::Parser.build_names( MONTH_LINES )
|
76
23
|
# pp MONTH_NAMES
|
77
|
-
MONTH_MAP = build_map( MONTH_LINES )
|
24
|
+
MONTH_MAP = SportDb::Parser.build_map( MONTH_LINES, downcase: true )
|
78
25
|
# pp MONTH_MAP
|
79
26
|
|
80
27
|
|
81
28
|
### nnote - only support two or three letters
|
82
29
|
## no Tues | Thur | Thurs | Sunday etc.
|
83
|
-
DAY_LINES = parse_names( <<TXT )
|
30
|
+
DAY_LINES = SportDb::Parser.parse_names( <<TXT )
|
84
31
|
Mon Mo
|
85
32
|
Tue Tu
|
86
33
|
Wed We
|
@@ -91,9 +38,9 @@ Sun Su
|
|
91
38
|
TXT
|
92
39
|
|
93
40
|
|
94
|
-
DAY_NAMES = build_names( DAY_LINES )
|
41
|
+
DAY_NAMES = SportDb::Parser.build_names( DAY_LINES )
|
95
42
|
# pp DAY_NAMES
|
96
|
-
DAY_MAP = build_map( DAY_LINES )
|
43
|
+
DAY_MAP = SportDb::Parser.build_map( DAY_LINES, downcase: true )
|
97
44
|
# pp DAY_MAP
|
98
45
|
|
99
46
|
|
@@ -29,12 +29,12 @@ MINUTE_RE = %r{
|
|
29
29
|
(?:
|
30
30
|
\d{1,3}
|
31
31
|
'? ## optional minute quote (')
|
32
|
-
(?= (og|pen|p)? ([ ;,\]]|$))
|
32
|
+
(?= (og|o|pen|p)? ([ ;,\]\)]|$))
|
33
33
|
)
|
34
34
|
)
|
35
35
|
)
|
36
36
|
|
|
37
|
-
(?= (og|pen|p)? ([ ;,\]]|$)) # note - break can be og|pen|p too
|
37
|
+
(?= (og|o|pen|p)? ([ ;,\]\)]|$)) # note - break can be og|pen|p too
|
38
38
|
)
|
39
39
|
)}ix
|
40
40
|
### note - word boundary (\b) will NOT work for quoet (')
|
@@ -55,7 +55,7 @@ GOAL_PEN_RE = %r{
|
|
55
55
|
GOAL_OG_RE = %r{
|
56
56
|
(?<og>
|
57
57
|
(?<=\d|\+|[ ]|') ## must follow a number or plus (e.g. 45og / 45+og / 45 og) or space
|
58
|
-
og
|
58
|
+
(?: og|o )
|
59
59
|
\b
|
60
60
|
)
|
61
61
|
}ix
|
@@ -6,8 +6,8 @@ class Parser
|
|
6
6
|
## move to token-note(s) file !!!!
|
7
7
|
##
|
8
8
|
|
9
|
-
|
10
|
-
\[
|
9
|
+
NOTE_BASICS_RE = %r{
|
10
|
+
(?<note_open> \[ )
|
11
11
|
(?<note>
|
12
12
|
(?: ## starting with ___ PLUS requiring more text
|
13
13
|
(?:
|
@@ -97,16 +97,44 @@ NOTE_RE = %r{
|
|
97
97
|
|
|
98
98
|
replay
|
99
99
|
## e.g. [replay]
|
100
|
+
|
|
101
|
+
verified
|
102
|
+
## e.g. [verified 2:0 wo.]
|
100
103
|
)
|
101
104
|
([ ] ## note - optional text
|
102
105
|
[^\]]+?
|
103
106
|
)? ## slurp all to next ] - (use non-greedy)
|
104
107
|
)
|
105
|
-
) # note capture
|
106
|
-
|
108
|
+
) # note capture
|
109
|
+
|
110
|
+
(?:
|
111
|
+
(?<note_close> \] )
|
112
|
+
| $ ## note - allow open notes (that continue on next line)
|
113
|
+
)
|
114
|
+
}ix
|
115
|
+
|
116
|
+
|
117
|
+
NOTE_MORE_RE = %r{
|
118
|
+
(?<=[ ]) ## one (leading) space min. required
|
119
|
+
(?<note_cont>
|
120
|
+
[⮑…] |
|
121
|
+
\.{2,3} ### .. or ...
|
122
|
+
)
|
123
|
+
[ ]*
|
124
|
+
(?<note>
|
125
|
+
[^\]]+? ## non-greeedy
|
126
|
+
)
|
127
|
+
(?:
|
128
|
+
(?<note_close> \] )
|
129
|
+
| $ ## note - allow open notes (that continue on next line)
|
130
|
+
)
|
107
131
|
}ix
|
108
132
|
|
109
133
|
|
134
|
+
NOTE_RE = Regexp.union( NOTE_BASICS_RE,
|
135
|
+
NOTE_MORE_RE,
|
136
|
+
)
|
137
|
+
|
110
138
|
|
111
139
|
end # class Parser
|
112
140
|
end # module Rsssf
|
@@ -80,8 +80,9 @@ TEXT_STRICT_RE = %r{
|
|
80
80
|
|
81
81
|
## positive lookahead
|
82
82
|
## cannot use \b if text ends in dot (.) or other non-alphnum
|
83
|
-
## than \b will not work
|
84
|
-
|
83
|
+
## than \b will not work
|
84
|
+
## not - add () too for now - why? why not?
|
85
|
+
(?=[ ,;@|\[\]\(\)]
|
85
86
|
|$
|
86
87
|
)
|
87
88
|
)
|
data/lib/rsssf/parser/token.rb
CHANGED
@@ -9,7 +9,7 @@ BASICS_RE = %r{
|
|
9
9
|
(?<spaces> [ ]{2,}) |
|
10
10
|
(?<space> [ ])
|
11
11
|
|
|
12
|
-
(?<sym>[;,@|\[\]])
|
12
|
+
(?<sym>[;,@|\[\]\(\)]) ## note - add () too - why? why not?
|
13
13
|
}ix
|
14
14
|
|
15
15
|
|
@@ -38,10 +38,37 @@ RE = Regexp.union( GROUP_RE, ROUND_RE, LEG_RE,
|
|
38
38
|
TEXT_RE )
|
39
39
|
|
40
40
|
|
41
|
+
|
42
|
+
### rename to dash or to ???
|
43
|
+
#### used to add/allow hyphen/dash (-) in INSIDE_RE
|
44
|
+
HYPHEN_RE = %r{ ## must be space before and after (or end of line)!!!
|
45
|
+
## note - uses SYM capture
|
46
|
+
(?<sym>
|
47
|
+
(?<=[ ]) # Positive lookbehind for space
|
48
|
+
-
|
49
|
+
(?=[ ]|$) # positive lookahead for space
|
50
|
+
)
|
51
|
+
}ix
|
52
|
+
|
53
|
+
### rename to ?? use SCORE_AT for now - why? why not?
|
54
|
+
## add support for score at/score points/markers
|
55
|
+
### e.g. [1-0 Andrei 08, 1-1 Rydlewicz 24, 1-2 Prica 85, 2-2 Bella 88,
|
56
|
+
## 2-3 Arvidsson 102]
|
57
|
+
|
58
|
+
SCORE_AT_RE = %r{ (?<score_at>
|
59
|
+
\b
|
60
|
+
\d{1,2}-\d{1,2}
|
61
|
+
\b
|
62
|
+
)
|
63
|
+
}ix
|
64
|
+
|
65
|
+
|
66
|
+
|
41
67
|
## "strict" text match mode inside brackets
|
42
68
|
## ]
|
43
|
-
INSIDE_RE = Regexp.union(
|
44
|
-
|
69
|
+
INSIDE_RE = Regexp.union( SCORE_AT_RE,
|
70
|
+
GOAL_OG_RE, GOAL_PEN_RE,
|
71
|
+
BASICS_RE, HYPHEN_RE,
|
45
72
|
TEXT_STRICT_RE,
|
46
73
|
MINUTE_RE,
|
47
74
|
)
|
@@ -56,6 +83,11 @@ def log( msg )
|
|
56
83
|
end
|
57
84
|
|
58
85
|
|
86
|
+
## open/close pairs - lookup close (by open char)
|
87
|
+
SYM_CLOSE = {
|
88
|
+
'(' => ')',
|
89
|
+
'[' => ']',
|
90
|
+
}
|
59
91
|
|
60
92
|
def tokenize_with_errors( line, debug: false )
|
61
93
|
tokens = []
|
@@ -72,6 +104,7 @@ def tokenize_with_errors( line, debug: false )
|
|
72
104
|
####
|
73
105
|
## quick hack - keep re state/mode between tokenize calls!!!
|
74
106
|
@re ||= RE ## note - switch between RE & INSIDE_RE
|
107
|
+
|
75
108
|
|
76
109
|
|
77
110
|
while m = @re.match( line, pos )
|
@@ -84,10 +117,14 @@ def tokenize_with_errors( line, debug: false )
|
|
84
117
|
if offsets[0] != pos
|
85
118
|
## match NOT starting at start/begin position!!!
|
86
119
|
## report parse error!!!
|
87
|
-
|
120
|
+
|
121
|
+
ctx = @re == INSIDE_RE ? 'INSIDE_RE' : 'RE' ## assume RE
|
122
|
+
## fix/change - use str.inspect to show tabs (\t)
|
123
|
+
## and possibly other special characters causing trouble
|
124
|
+
msg = " !! WARN - parse error (#{ctx}) - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
|
88
125
|
puts msg
|
89
126
|
|
90
|
-
errors << "parse error - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]}"
|
127
|
+
errors << "parse error (#{ctx}) - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]}"
|
91
128
|
log( msg )
|
92
129
|
end
|
93
130
|
|
@@ -109,7 +146,9 @@ def tokenize_with_errors( line, debug: false )
|
|
109
146
|
[:text, m[:text]] ## keep pos - why? why not?
|
110
147
|
elsif m[:minute]
|
111
148
|
[:minute, m[:minute]]
|
112
|
-
elsif m[:
|
149
|
+
elsif m[:score_at]
|
150
|
+
[:score_at, m[:score_at]]
|
151
|
+
elsif m[:og]
|
113
152
|
[:og, m[:og]] ## for typed drop - string version/variants
|
114
153
|
elsif m[:pen]
|
115
154
|
[:pen, m[:pen]]
|
@@ -121,12 +160,21 @@ def tokenize_with_errors( line, debug: false )
|
|
121
160
|
when ';' then [:';']
|
122
161
|
when '@' then [:'@']
|
123
162
|
when '|' then [:'|']
|
124
|
-
when '['
|
125
|
-
|
163
|
+
when '-' then [:'-']
|
164
|
+
when '[', '('
|
165
|
+
if sym == @sym_open
|
166
|
+
## report error - already in inside mode!!!
|
167
|
+
## e.g. another [ in [] or ( in ()
|
168
|
+
log( "warn - unexpected (opening) #{sym} in inside (goal) mode in line >#{line}<" )
|
169
|
+
end
|
126
170
|
nil
|
127
|
-
when ']'
|
128
|
-
puts " leave inside match mode"
|
129
|
-
@
|
171
|
+
when ']', ')' ## allow [] AND () for inside mode
|
172
|
+
## puts " leave inside match mode"
|
173
|
+
if sym == @sym_close
|
174
|
+
@re = RE
|
175
|
+
@sym_open = nil ## reset sym_open/close
|
176
|
+
@sym_close = nil
|
177
|
+
end
|
130
178
|
nil
|
131
179
|
else
|
132
180
|
nil ## ignore others (e.g. brackets [])
|
@@ -176,12 +224,15 @@ def tokenize_with_errors( line, debug: false )
|
|
176
224
|
when ';' then [:';']
|
177
225
|
when '@' then [:'@']
|
178
226
|
when '|' then [:'|']
|
179
|
-
when '['
|
227
|
+
when '[', '('
|
180
228
|
## switch to inside mode!!!
|
181
|
-
puts " enter inside match mode"
|
229
|
+
## puts " enter inside match mode"
|
182
230
|
@re = INSIDE_RE
|
231
|
+
@sym_open = sym ## record open/close style - why? why not?
|
232
|
+
@sym_close = SYM_CLOSE[sym]
|
183
233
|
nil
|
184
|
-
when ']'
|
234
|
+
when ']', ')'
|
235
|
+
log( "warn - unexpected (closing) #{sym} in standard mode in line >#{line}<" )
|
185
236
|
## already in standard mode/ctx
|
186
237
|
## report warn/error - why? why not?
|
187
238
|
nil
|
@@ -204,13 +255,21 @@ def tokenize_with_errors( line, debug: false )
|
|
204
255
|
end
|
205
256
|
end
|
206
257
|
|
258
|
+
|
207
259
|
## check if no match in end of string
|
208
260
|
if offsets[1] != line.size
|
209
|
-
|
261
|
+
|
262
|
+
## note - report regex context
|
263
|
+
## e.g. RE or INSIDE_RE to help debugging/troubleshooting format errors
|
264
|
+
ctx = @re == INSIDE_RE ? 'INSIDE_RE' : 'RE' ## assume RE
|
265
|
+
## fix/change - use str.inspect to show tabs (\t)
|
266
|
+
## and possibly other special characters causing trouble
|
267
|
+
|
268
|
+
msg = " !! WARN - parse error (#{ctx}) - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size} in line >#{line}<"
|
210
269
|
puts msg
|
211
270
|
log( msg )
|
212
271
|
|
213
|
-
errors << "parse error - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size}"
|
272
|
+
errors << "parse error (#{ctx}) - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size}"
|
214
273
|
end
|
215
274
|
|
216
275
|
|
@@ -0,0 +1,24 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
module Module
|
4
|
+
module RsssfParser
|
5
|
+
MAJOR = 0 ## todo: namespace inside version or something - why? why not??
|
6
|
+
MINOR = 1
|
7
|
+
PATCH = 0
|
8
|
+
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
9
|
+
|
10
|
+
def self.version
|
11
|
+
VERSION
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.banner
|
15
|
+
"rsssf-parser/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.root
|
19
|
+
File.expand_path( File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) )
|
20
|
+
end
|
21
|
+
|
22
|
+
end # module RsssfParser
|
23
|
+
end
|
24
|
+
end
|
data/lib/rsssf/parser.rb
CHANGED
@@ -2,9 +2,17 @@
|
|
2
2
|
####
|
3
3
|
## build on "standard" parse
|
4
4
|
require 'sportdb/parser'
|
5
|
+
## pulled in for/uses only
|
6
|
+
## - SportDb::Parser::Tokens !!!
|
7
|
+
##
|
8
|
+
## plus in the future pull in SportDb::OutlineReader
|
9
|
+
##
|
10
|
+
## note - pulls in more deps e.g. cococs AND season-formats
|
11
|
+
|
5
12
|
|
6
13
|
|
7
14
|
## our own code
|
15
|
+
require_relative 'parser/version'
|
8
16
|
require_relative 'parser/token-text'
|
9
17
|
require_relative 'parser/token-note'
|
10
18
|
require_relative 'parser/token-round' ## round (& group)
|
@@ -19,3 +27,5 @@ require_relative 'parser/linter'
|
|
19
27
|
|
20
28
|
|
21
29
|
|
30
|
+
# say hello
|
31
|
+
puts SportDb::Module::RsssfParser.banner
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rsssf-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-07-
|
11
|
+
date: 2024-07-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sportdb-parser
|
@@ -84,6 +84,7 @@ files:
|
|
84
84
|
- lib/rsssf/parser/token-score.rb
|
85
85
|
- lib/rsssf/parser/token-text.rb
|
86
86
|
- lib/rsssf/parser/token.rb
|
87
|
+
- lib/rsssf/parser/version.rb
|
87
88
|
homepage: https://github.com/sportdb/sport.db
|
88
89
|
licenses:
|
89
90
|
- Public Domain
|