rsssf-parser 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +2 -0
- data/Manifest.txt +1 -0
- data/Rakefile +2 -1
- data/bin/rsssf +7 -5
- data/lib/rsssf/parser/linter.rb +90 -26
- data/lib/rsssf/parser/token-date.rb +6 -59
- data/lib/rsssf/parser/token-goals.rb +3 -3
- data/lib/rsssf/parser/token-note.rb +32 -4
- data/lib/rsssf/parser/token-text.rb +3 -2
- data/lib/rsssf/parser/token.rb +75 -16
- data/lib/rsssf/parser/version.rb +24 -0
- data/lib/rsssf/parser.rb +10 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 52ee31bde399793292f0978a0f3109be47f2df56de7e35fb013f6f47d33a5ff0
|
4
|
+
data.tar.gz: d2ecbbb9f5935d97a9520c65e30a4a4ce0fab6b6817e97e70932c4f73e02f269
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 403573de54f0dba9155ec8efa264eb5f69dc3785cb7411cbf97d4fbbf033734370bdfe61d6eaff5d1a5939a2972bd12ea9a2fcf9fb4abdd14077e85b9d9a3d3a
|
7
|
+
data.tar.gz: 54414cabdff9a1804f9ce5256dd34d5d0b70b7df7e9d3d22fb2fa3191712436679763370c019b78b8ccc051196feed31edfd5e2807b42c845789e848bf3e5e50
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
data/Rakefile
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
require 'hoe'
|
2
|
+
require './lib/rsssf/parser/version.rb'
|
2
3
|
|
3
4
|
|
4
5
|
Hoe.spec 'rsssf-parser' do
|
5
6
|
|
6
|
-
self.version =
|
7
|
+
self.version = SportDb::Module::RsssfParser::VERSION
|
7
8
|
|
8
9
|
self.summary = "rsssf-parser - football match schedule & results parser (& tokenizer) for the rsssf format / conventions"
|
9
10
|
self.description = summary
|
data/bin/rsssf
CHANGED
@@ -45,8 +45,7 @@ paths = if args.empty?
|
|
45
45
|
'../../../rsssf/austria/2010-11/cup.txt',
|
46
46
|
]
|
47
47
|
else
|
48
|
-
|
49
|
-
args
|
48
|
+
SportDb::Parser::Opts.expand_args( args )
|
50
49
|
end
|
51
50
|
|
52
51
|
|
@@ -60,15 +59,18 @@ Rsssf::Parser::Linter.debug = true if opts[:debug]
|
|
60
59
|
linter = Rsssf::Parser::Linter.new
|
61
60
|
|
62
61
|
|
62
|
+
errors = []
|
63
|
+
|
63
64
|
paths.each_with_index do |path,i|
|
64
65
|
puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
|
65
66
|
linter.read( path, parse: !opts[:metal] )
|
67
|
+
errors += linter.errors if linter.errors?
|
66
68
|
end
|
67
69
|
|
68
|
-
if
|
70
|
+
if errors.size > 0
|
69
71
|
puts
|
70
|
-
pp
|
71
|
-
puts "!! #{
|
72
|
+
pp errors
|
73
|
+
puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
|
72
74
|
else
|
73
75
|
puts "OK no parse errors found in #{paths.size} datafile(s)"
|
74
76
|
end
|
data/lib/rsssf/parser/linter.rb
CHANGED
@@ -25,45 +25,78 @@ def errors?() @errors.size > 0; end
|
|
25
25
|
|
26
26
|
|
27
27
|
|
28
|
+
def read( path, parse: false )
|
29
|
+
parse( read_text( path ), parse: parse,
|
30
|
+
path: path )
|
31
|
+
end
|
32
|
+
|
28
33
|
#########
|
29
34
|
## parse - false (default) - tokenize (only)
|
30
35
|
## - true - tokenize & parse
|
31
|
-
|
32
|
-
|
33
|
-
## fix - (re)use outline reader later!!!
|
34
|
-
## plus check for headings etc.
|
35
|
-
|
36
|
-
text = File.open( path, 'r:utf-8' ) { |f| f.read }
|
37
|
-
lines = text.split( "\n" )
|
38
|
-
|
36
|
+
##
|
37
|
+
## todo/fix - change path to file or such - why? why not?
|
39
38
|
|
40
|
-
## process lines
|
41
|
-
tree = []
|
42
|
-
lines.each do |line|
|
43
39
|
|
44
|
-
|
45
|
-
next if line.strip.empty? || line.strip.start_with?('#')
|
40
|
+
MAX_ERRORS = 13 ## stop after 13 errors
|
46
41
|
|
47
|
-
|
48
|
-
|
42
|
+
def parse( txt, parse: false,
|
43
|
+
path: 'path/to/filename/here' )
|
44
|
+
## note: every (new) read call - resets errors list to empty
|
45
|
+
@errors = []
|
49
46
|
|
47
|
+
nodes = SportDb::OutlineReader.parse( txt )
|
50
48
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
49
|
+
## process nodes
|
50
|
+
h1 = nil
|
51
|
+
orphans = 0 ## track paragraphs with no heading
|
52
|
+
paragraphs = 0 ## track paragraphs with heading
|
55
53
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
54
|
+
nodes.each do |node|
|
55
|
+
type = node[0]
|
56
|
+
|
57
|
+
if type == :h1
|
58
|
+
h1 = node[1] ## get heading text
|
59
|
+
## puts
|
60
|
+
puts " = Heading 1 >#{node[1]}<"
|
61
|
+
elsif type == :p
|
62
|
+
|
63
|
+
if h1.nil?
|
64
|
+
orphans += 1 ## only warn once (at the end; see below)
|
65
|
+
next
|
66
|
+
end
|
67
|
+
|
68
|
+
paragraphs += 1
|
69
|
+
|
70
|
+
lines = node[1]
|
71
|
+
|
72
|
+
tree = []
|
73
|
+
lines.each_with_index do |line,i|
|
74
|
+
|
75
|
+
if debug?
|
76
|
+
puts
|
77
|
+
puts "line >#{line}<"
|
78
|
+
end
|
79
|
+
|
80
|
+
t, error_messages = if parse
|
81
|
+
@parser.parse_with_errors( line )
|
82
|
+
else
|
83
|
+
@parser.tokenize_with_errors( line )
|
84
|
+
end
|
61
85
|
|
62
86
|
|
63
87
|
if error_messages.size > 0
|
64
88
|
## add to "global" error list
|
65
89
|
## make a triplet tuple (file / msg / line text)
|
66
90
|
error_messages.each do |msg|
|
91
|
+
|
92
|
+
## note - stop processing / adding errors if hit MAX ERRORS
|
93
|
+
if @errors.size >= MAX_ERRORS
|
94
|
+
@errors << [ path,
|
95
|
+
"stop after #{MAX_ERRORS} errors",
|
96
|
+
'']
|
97
|
+
return
|
98
|
+
end
|
99
|
+
|
67
100
|
@errors << [ path,
|
68
101
|
msg,
|
69
102
|
line
|
@@ -74,9 +107,40 @@ def read( path, parse: false )
|
|
74
107
|
pp t if debug?
|
75
108
|
|
76
109
|
tree << t
|
77
|
-
end
|
110
|
+
end
|
78
111
|
## pp tree
|
79
|
-
|
112
|
+
else
|
113
|
+
pp node
|
114
|
+
raise ArgumentError, "unsupported (node) type >#{type}<"
|
115
|
+
end
|
116
|
+
end # each node
|
117
|
+
|
118
|
+
## no heading and no orphans => assume empty file (comments only)!!!
|
119
|
+
if h1.nil? && orphans == 0
|
120
|
+
puts " !! WARN - no heading(s) and paragraph(s) found"
|
121
|
+
@errors << [ path,
|
122
|
+
"warn - no heading(s) and paragraph(s) found",
|
123
|
+
"" ## pass along empty line
|
124
|
+
]
|
125
|
+
end
|
126
|
+
|
127
|
+
if orphans > 0
|
128
|
+
puts " !! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
|
129
|
+
@errors << [ path,
|
130
|
+
"warn - no heading for #{orphans} text paragraph(s); skipping parse",
|
131
|
+
"" ## pass along empty line
|
132
|
+
]
|
133
|
+
end
|
134
|
+
|
135
|
+
if h1 && paragraphs == 0
|
136
|
+
puts " !! WARN - heading with no text paragraph(s)"
|
137
|
+
@errors << [ path,
|
138
|
+
"warn - heading with no text paragraph(s)",
|
139
|
+
"" ## pass along empty line
|
140
|
+
]
|
141
|
+
end
|
142
|
+
|
143
|
+
end # parse
|
80
144
|
end # class Linter
|
81
145
|
|
82
146
|
|
@@ -2,62 +2,9 @@ module Rsssf
|
|
2
2
|
class Parser
|
3
3
|
|
4
4
|
|
5
|
-
|
6
|
-
def self.parse_names( txt )
|
7
|
-
lines = [] # array of lines (with words)
|
8
|
-
|
9
|
-
txt.each_line do |line|
|
10
|
-
line = line.strip
|
11
|
-
|
12
|
-
next if line.empty?
|
13
|
-
next if line.start_with?( '#' ) ## skip comments too
|
14
|
-
|
15
|
-
## strip inline (until end-of-line) comments too
|
16
|
-
## e.g. Janvier Janv Jan ## check janv in use??
|
17
|
-
## => Janvier Janv Jan
|
18
|
-
|
19
|
-
line = line.sub( /#.*/, '' ).strip
|
20
|
-
## pp line
|
21
|
-
|
22
|
-
values = line.split( /[ \t]+/ )
|
23
|
-
## pp values
|
24
|
-
|
25
|
-
## todo/fix -- add check for duplicates
|
26
|
-
lines << values
|
27
|
-
end
|
28
|
-
lines
|
29
|
-
|
30
|
-
end # method parse
|
31
|
-
|
32
|
-
|
33
|
-
def self.build_names( lines )
|
34
|
-
## join all words together into a single string e.g.
|
35
|
-
## January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|...
|
36
|
-
lines.map { |line| line.join('|') }.join('|')
|
37
|
-
end
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
## add normalize option (for downcase) - why? why not?
|
42
|
-
def self.build_map( lines )
|
43
|
-
## note: downcase name!!!
|
44
|
-
## build a lookup map that maps the word to the index (line no) plus 1 e.g.
|
45
|
-
## {"january" => 1, "jan" => 1,
|
46
|
-
## "february" => 2, "feb" => 2,
|
47
|
-
## "march" => 3, "mar" => 3,
|
48
|
-
## "april" => 4, "apr" => 4,
|
49
|
-
## "may" => 5,
|
50
|
-
## "june" => 6, "jun" => 6, ...
|
51
|
-
lines.each_with_index.reduce( {} ) do |h,(line,i)|
|
52
|
-
line.each { |name| h[ name.downcase ] = i+1 } ## note: start mapping with 1 (and NOT zero-based, that is, 0)
|
53
|
-
h
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
|
58
5
|
## note - support only 5 letter max for now
|
59
6
|
## now January|February|August etc.
|
60
|
-
MONTH_LINES = parse_names( <<TXT )
|
7
|
+
MONTH_LINES = SportDb::Parser.parse_names( <<TXT )
|
61
8
|
Jan
|
62
9
|
Feb
|
63
10
|
March Mar
|
@@ -72,15 +19,15 @@ Nov
|
|
72
19
|
Dec
|
73
20
|
TXT
|
74
21
|
|
75
|
-
MONTH_NAMES = build_names( MONTH_LINES )
|
22
|
+
MONTH_NAMES = SportDb::Parser.build_names( MONTH_LINES )
|
76
23
|
# pp MONTH_NAMES
|
77
|
-
MONTH_MAP = build_map( MONTH_LINES )
|
24
|
+
MONTH_MAP = SportDb::Parser.build_map( MONTH_LINES, downcase: true )
|
78
25
|
# pp MONTH_MAP
|
79
26
|
|
80
27
|
|
81
28
|
### nnote - only support two or three letters
|
82
29
|
## no Tues | Thur | Thurs | Sunday etc.
|
83
|
-
DAY_LINES = parse_names( <<TXT )
|
30
|
+
DAY_LINES = SportDb::Parser.parse_names( <<TXT )
|
84
31
|
Mon Mo
|
85
32
|
Tue Tu
|
86
33
|
Wed We
|
@@ -91,9 +38,9 @@ Sun Su
|
|
91
38
|
TXT
|
92
39
|
|
93
40
|
|
94
|
-
DAY_NAMES = build_names( DAY_LINES )
|
41
|
+
DAY_NAMES = SportDb::Parser.build_names( DAY_LINES )
|
95
42
|
# pp DAY_NAMES
|
96
|
-
DAY_MAP = build_map( DAY_LINES )
|
43
|
+
DAY_MAP = SportDb::Parser.build_map( DAY_LINES, downcase: true )
|
97
44
|
# pp DAY_MAP
|
98
45
|
|
99
46
|
|
@@ -29,12 +29,12 @@ MINUTE_RE = %r{
|
|
29
29
|
(?:
|
30
30
|
\d{1,3}
|
31
31
|
'? ## optional minute quote (')
|
32
|
-
(?= (og|pen|p)? ([ ;,\]]|$))
|
32
|
+
(?= (og|o|pen|p)? ([ ;,\]\)]|$))
|
33
33
|
)
|
34
34
|
)
|
35
35
|
)
|
36
36
|
|
|
37
|
-
(?= (og|pen|p)? ([ ;,\]]|$)) # note - break can be og|pen|p too
|
37
|
+
(?= (og|o|pen|p)? ([ ;,\]\)]|$)) # note - break can be og|pen|p too
|
38
38
|
)
|
39
39
|
)}ix
|
40
40
|
### note - word boundary (\b) will NOT work for quoet (')
|
@@ -55,7 +55,7 @@ GOAL_PEN_RE = %r{
|
|
55
55
|
GOAL_OG_RE = %r{
|
56
56
|
(?<og>
|
57
57
|
(?<=\d|\+|[ ]|') ## must follow a number or plus (e.g. 45og / 45+og / 45 og) or space
|
58
|
-
og
|
58
|
+
(?: og|o )
|
59
59
|
\b
|
60
60
|
)
|
61
61
|
}ix
|
@@ -6,8 +6,8 @@ class Parser
|
|
6
6
|
## move to token-note(s) file !!!!
|
7
7
|
##
|
8
8
|
|
9
|
-
|
10
|
-
\[
|
9
|
+
NOTE_BASICS_RE = %r{
|
10
|
+
(?<note_open> \[ )
|
11
11
|
(?<note>
|
12
12
|
(?: ## starting with ___ PLUS requiring more text
|
13
13
|
(?:
|
@@ -97,16 +97,44 @@ NOTE_RE = %r{
|
|
97
97
|
|
|
98
98
|
replay
|
99
99
|
## e.g. [replay]
|
100
|
+
|
|
101
|
+
verified
|
102
|
+
## e.g. [verified 2:0 wo.]
|
100
103
|
)
|
101
104
|
([ ] ## note - optional text
|
102
105
|
[^\]]+?
|
103
106
|
)? ## slurp all to next ] - (use non-greedy)
|
104
107
|
)
|
105
|
-
) # note capture
|
106
|
-
|
108
|
+
) # note capture
|
109
|
+
|
110
|
+
(?:
|
111
|
+
(?<note_close> \] )
|
112
|
+
| $ ## note - allow open notes (that continue on next line)
|
113
|
+
)
|
114
|
+
}ix
|
115
|
+
|
116
|
+
|
117
|
+
NOTE_MORE_RE = %r{
|
118
|
+
(?<=[ ]) ## one (leading) space min. required
|
119
|
+
(?<note_cont>
|
120
|
+
[⮑…] |
|
121
|
+
\.{2,3} ### .. or ...
|
122
|
+
)
|
123
|
+
[ ]*
|
124
|
+
(?<note>
|
125
|
+
[^\]]+? ## non-greeedy
|
126
|
+
)
|
127
|
+
(?:
|
128
|
+
(?<note_close> \] )
|
129
|
+
| $ ## note - allow open notes (that continue on next line)
|
130
|
+
)
|
107
131
|
}ix
|
108
132
|
|
109
133
|
|
134
|
+
NOTE_RE = Regexp.union( NOTE_BASICS_RE,
|
135
|
+
NOTE_MORE_RE,
|
136
|
+
)
|
137
|
+
|
110
138
|
|
111
139
|
end # class Parser
|
112
140
|
end # module Rsssf
|
@@ -80,8 +80,9 @@ TEXT_STRICT_RE = %r{
|
|
80
80
|
|
81
81
|
## positive lookahead
|
82
82
|
## cannot use \b if text ends in dot (.) or other non-alphnum
|
83
|
-
## than \b will not work
|
84
|
-
|
83
|
+
## than \b will not work
|
84
|
+
## not - add () too for now - why? why not?
|
85
|
+
(?=[ ,;@|\[\]\(\)]
|
85
86
|
|$
|
86
87
|
)
|
87
88
|
)
|
data/lib/rsssf/parser/token.rb
CHANGED
@@ -9,7 +9,7 @@ BASICS_RE = %r{
|
|
9
9
|
(?<spaces> [ ]{2,}) |
|
10
10
|
(?<space> [ ])
|
11
11
|
|
|
12
|
-
(?<sym>[;,@|\[\]])
|
12
|
+
(?<sym>[;,@|\[\]\(\)]) ## note - add () too - why? why not?
|
13
13
|
}ix
|
14
14
|
|
15
15
|
|
@@ -38,10 +38,37 @@ RE = Regexp.union( GROUP_RE, ROUND_RE, LEG_RE,
|
|
38
38
|
TEXT_RE )
|
39
39
|
|
40
40
|
|
41
|
+
|
42
|
+
### rename to dash or to ???
|
43
|
+
#### used to add/allow hyphen/dash (-) in INSIDE_RE
|
44
|
+
HYPHEN_RE = %r{ ## must be space before and after (or end of line)!!!
|
45
|
+
## note - uses SYM capture
|
46
|
+
(?<sym>
|
47
|
+
(?<=[ ]) # Positive lookbehind for space
|
48
|
+
-
|
49
|
+
(?=[ ]|$) # positive lookahead for space
|
50
|
+
)
|
51
|
+
}ix
|
52
|
+
|
53
|
+
### rename to ?? use SCORE_AT for now - why? why not?
|
54
|
+
## add support for score at/score points/markers
|
55
|
+
### e.g. [1-0 Andrei 08, 1-1 Rydlewicz 24, 1-2 Prica 85, 2-2 Bella 88,
|
56
|
+
## 2-3 Arvidsson 102]
|
57
|
+
|
58
|
+
SCORE_AT_RE = %r{ (?<score_at>
|
59
|
+
\b
|
60
|
+
\d{1,2}-\d{1,2}
|
61
|
+
\b
|
62
|
+
)
|
63
|
+
}ix
|
64
|
+
|
65
|
+
|
66
|
+
|
41
67
|
## "strict" text match mode inside brackets
|
42
68
|
## ]
|
43
|
-
INSIDE_RE = Regexp.union(
|
44
|
-
|
69
|
+
INSIDE_RE = Regexp.union( SCORE_AT_RE,
|
70
|
+
GOAL_OG_RE, GOAL_PEN_RE,
|
71
|
+
BASICS_RE, HYPHEN_RE,
|
45
72
|
TEXT_STRICT_RE,
|
46
73
|
MINUTE_RE,
|
47
74
|
)
|
@@ -56,6 +83,11 @@ def log( msg )
|
|
56
83
|
end
|
57
84
|
|
58
85
|
|
86
|
+
## open/close pairs - lookup close (by open char)
|
87
|
+
SYM_CLOSE = {
|
88
|
+
'(' => ')',
|
89
|
+
'[' => ']',
|
90
|
+
}
|
59
91
|
|
60
92
|
def tokenize_with_errors( line, debug: false )
|
61
93
|
tokens = []
|
@@ -72,6 +104,7 @@ def tokenize_with_errors( line, debug: false )
|
|
72
104
|
####
|
73
105
|
## quick hack - keep re state/mode between tokenize calls!!!
|
74
106
|
@re ||= RE ## note - switch between RE & INSIDE_RE
|
107
|
+
|
75
108
|
|
76
109
|
|
77
110
|
while m = @re.match( line, pos )
|
@@ -84,10 +117,14 @@ def tokenize_with_errors( line, debug: false )
|
|
84
117
|
if offsets[0] != pos
|
85
118
|
## match NOT starting at start/begin position!!!
|
86
119
|
## report parse error!!!
|
87
|
-
|
120
|
+
|
121
|
+
ctx = @re == INSIDE_RE ? 'INSIDE_RE' : 'RE' ## assume RE
|
122
|
+
## fix/change - use str.inspect to show tabs (\t)
|
123
|
+
## and possibly other special characters causing trouble
|
124
|
+
msg = " !! WARN - parse error (#{ctx}) - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
|
88
125
|
puts msg
|
89
126
|
|
90
|
-
errors << "parse error - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]}"
|
127
|
+
errors << "parse error (#{ctx}) - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]}"
|
91
128
|
log( msg )
|
92
129
|
end
|
93
130
|
|
@@ -109,7 +146,9 @@ def tokenize_with_errors( line, debug: false )
|
|
109
146
|
[:text, m[:text]] ## keep pos - why? why not?
|
110
147
|
elsif m[:minute]
|
111
148
|
[:minute, m[:minute]]
|
112
|
-
elsif m[:
|
149
|
+
elsif m[:score_at]
|
150
|
+
[:score_at, m[:score_at]]
|
151
|
+
elsif m[:og]
|
113
152
|
[:og, m[:og]] ## for typed drop - string version/variants
|
114
153
|
elsif m[:pen]
|
115
154
|
[:pen, m[:pen]]
|
@@ -121,12 +160,21 @@ def tokenize_with_errors( line, debug: false )
|
|
121
160
|
when ';' then [:';']
|
122
161
|
when '@' then [:'@']
|
123
162
|
when '|' then [:'|']
|
124
|
-
when '['
|
125
|
-
|
163
|
+
when '-' then [:'-']
|
164
|
+
when '[', '('
|
165
|
+
if sym == @sym_open
|
166
|
+
## report error - already in inside mode!!!
|
167
|
+
## e.g. another [ in [] or ( in ()
|
168
|
+
log( "warn - unexpected (opening) #{sym} in inside (goal) mode in line >#{line}<" )
|
169
|
+
end
|
126
170
|
nil
|
127
|
-
when ']'
|
128
|
-
puts " leave inside match mode"
|
129
|
-
@
|
171
|
+
when ']', ')' ## allow [] AND () for inside mode
|
172
|
+
## puts " leave inside match mode"
|
173
|
+
if sym == @sym_close
|
174
|
+
@re = RE
|
175
|
+
@sym_open = nil ## reset sym_open/close
|
176
|
+
@sym_close = nil
|
177
|
+
end
|
130
178
|
nil
|
131
179
|
else
|
132
180
|
nil ## ignore others (e.g. brackets [])
|
@@ -176,12 +224,15 @@ def tokenize_with_errors( line, debug: false )
|
|
176
224
|
when ';' then [:';']
|
177
225
|
when '@' then [:'@']
|
178
226
|
when '|' then [:'|']
|
179
|
-
when '['
|
227
|
+
when '[', '('
|
180
228
|
## switch to inside mode!!!
|
181
|
-
puts " enter inside match mode"
|
229
|
+
## puts " enter inside match mode"
|
182
230
|
@re = INSIDE_RE
|
231
|
+
@sym_open = sym ## record open/close style - why? why not?
|
232
|
+
@sym_close = SYM_CLOSE[sym]
|
183
233
|
nil
|
184
|
-
when ']'
|
234
|
+
when ']', ')'
|
235
|
+
log( "warn - unexpected (closing) #{sym} in standard mode in line >#{line}<" )
|
185
236
|
## already in standard mode/ctx
|
186
237
|
## report warn/error - why? why not?
|
187
238
|
nil
|
@@ -204,13 +255,21 @@ def tokenize_with_errors( line, debug: false )
|
|
204
255
|
end
|
205
256
|
end
|
206
257
|
|
258
|
+
|
207
259
|
## check if no match in end of string
|
208
260
|
if offsets[1] != line.size
|
209
|
-
|
261
|
+
|
262
|
+
## note - report regex context
|
263
|
+
## e.g. RE or INSIDE_RE to help debugging/troubleshooting format errors
|
264
|
+
ctx = @re == INSIDE_RE ? 'INSIDE_RE' : 'RE' ## assume RE
|
265
|
+
## fix/change - use str.inspect to show tabs (\t)
|
266
|
+
## and possibly other special characters causing trouble
|
267
|
+
|
268
|
+
msg = " !! WARN - parse error (#{ctx}) - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size} in line >#{line}<"
|
210
269
|
puts msg
|
211
270
|
log( msg )
|
212
271
|
|
213
|
-
errors << "parse error - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size}"
|
272
|
+
errors << "parse error (#{ctx}) - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size}"
|
214
273
|
end
|
215
274
|
|
216
275
|
|
@@ -0,0 +1,24 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
module Module
|
4
|
+
module RsssfParser
|
5
|
+
MAJOR = 0 ## todo: namespace inside version or something - why? why not??
|
6
|
+
MINOR = 1
|
7
|
+
PATCH = 0
|
8
|
+
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
9
|
+
|
10
|
+
def self.version
|
11
|
+
VERSION
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.banner
|
15
|
+
"rsssf-parser/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.root
|
19
|
+
File.expand_path( File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) )
|
20
|
+
end
|
21
|
+
|
22
|
+
end # module RsssfParser
|
23
|
+
end
|
24
|
+
end
|
data/lib/rsssf/parser.rb
CHANGED
@@ -2,9 +2,17 @@
|
|
2
2
|
####
|
3
3
|
## build on "standard" parse
|
4
4
|
require 'sportdb/parser'
|
5
|
+
## pulled in for/uses only
|
6
|
+
## - SportDb::Parser::Tokens !!!
|
7
|
+
##
|
8
|
+
## plus in the future pull in SportDb::OutlineReader
|
9
|
+
##
|
10
|
+
## note - pulls in more deps e.g. cococs AND season-formats
|
11
|
+
|
5
12
|
|
6
13
|
|
7
14
|
## our own code
|
15
|
+
require_relative 'parser/version'
|
8
16
|
require_relative 'parser/token-text'
|
9
17
|
require_relative 'parser/token-note'
|
10
18
|
require_relative 'parser/token-round' ## round (& group)
|
@@ -19,3 +27,5 @@ require_relative 'parser/linter'
|
|
19
27
|
|
20
28
|
|
21
29
|
|
30
|
+
# say hello
|
31
|
+
puts SportDb::Module::RsssfParser.banner
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rsssf-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-07-
|
11
|
+
date: 2024-07-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sportdb-parser
|
@@ -84,6 +84,7 @@ files:
|
|
84
84
|
- lib/rsssf/parser/token-score.rb
|
85
85
|
- lib/rsssf/parser/token-text.rb
|
86
86
|
- lib/rsssf/parser/token.rb
|
87
|
+
- lib/rsssf/parser/version.rb
|
87
88
|
homepage: https://github.com/sportdb/sport.db
|
88
89
|
licenses:
|
89
90
|
- Public Domain
|