sportdb-parser 0.5.6 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +1 -1
- data/lib/sportdb/parser/{tokenizer.rb → lexer.rb} +38 -29
- data/lib/sportdb/parser/parser.rb +340 -320
- data/lib/sportdb/parser/racc_parser.rb +40 -12
- data/lib/sportdb/parser/racc_tree.rb +1 -1
- data/lib/sportdb/parser/token-date.rb +2 -2
- data/lib/sportdb/parser/token-score.rb +2 -2
- data/lib/sportdb/parser/token-status.rb +2 -2
- data/lib/sportdb/parser/token-text.rb +2 -2
- data/lib/sportdb/parser/token.rb +2 -2
- data/lib/sportdb/parser/version.rb +1 -1
- data/lib/sportdb/parser.rb +31 -12
- metadata +3 -3
@@ -5,12 +5,18 @@
|
|
5
5
|
class RaccMatchParser
|
6
6
|
|
7
7
|
|
8
|
-
def initialize( txt )
|
8
|
+
def initialize( txt, debug: false )
|
9
9
|
## puts "==> txt:"
|
10
10
|
## puts txt
|
11
|
-
|
12
|
-
|
13
|
-
@
|
11
|
+
|
12
|
+
@tree = []
|
13
|
+
@errors = []
|
14
|
+
|
15
|
+
### todo:
|
16
|
+
## - pass along debug flag
|
17
|
+
lexer = SportDb::Lexer.new( txt )
|
18
|
+
## note - use tokenize_with_errors and add/collect tokenize errors
|
19
|
+
@tokens, @errors = lexer.tokenize_with_errors
|
14
20
|
## pp @tokens
|
15
21
|
|
16
22
|
## quick hack - convert to racc format single char literal tokens e.g. '@' etc.
|
@@ -22,11 +28,22 @@ def initialize( txt )
|
|
22
28
|
end
|
23
29
|
end
|
24
30
|
end
|
25
|
-
|
31
|
+
|
32
|
+
|
33
|
+
def debug( value ) @debug = value; end
|
34
|
+
def debug?() @debug == true; end
|
35
|
+
|
36
|
+
## debug - trace / print message
|
37
|
+
def trace( msg )
|
38
|
+
puts " [parse] " + msg if debug?
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
|
26
43
|
|
27
44
|
def next_token
|
28
45
|
tok = @tokens.shift
|
29
|
-
|
46
|
+
trace( "next_token => #{tok.pretty_inspect}" )
|
30
47
|
tok
|
31
48
|
end
|
32
49
|
|
@@ -34,19 +51,30 @@ def initialize( txt )
|
|
34
51
|
# puts "Parse error on token: #{error_token_id}, value: #{error_value}"
|
35
52
|
# end
|
36
53
|
|
37
|
-
def
|
38
|
-
|
39
|
-
@tree = []
|
54
|
+
def parse_with_errors
|
55
|
+
trace( "start parse:" )
|
40
56
|
do_parse
|
41
|
-
@tree
|
57
|
+
[@tree, @errors]
|
58
|
+
end
|
59
|
+
|
60
|
+
def parse ## convenience shortcut (ignores errors)
|
61
|
+
tree, _ = parse_with_errors
|
62
|
+
tree
|
42
63
|
end
|
43
64
|
|
44
65
|
|
45
|
-
|
66
|
+
attr_reader :errors
|
67
|
+
def errors?() @errors.size > 0; end
|
68
|
+
|
69
|
+
|
70
|
+
def on_error(error_token_id, error_value, value_stack)
|
71
|
+
args = [error_token_id, error_value, value_stack]
|
46
72
|
puts
|
47
73
|
puts "!! on parse error:"
|
48
74
|
puts "args=#{args.pretty_inspect}"
|
49
|
-
|
75
|
+
|
76
|
+
@errors << "parse error on token: #{error_token_id} with value: #{error_value}, stack: #{value_stack.pretty_inspect}"
|
77
|
+
## exit 1 ## exit for now - get and print more info about context etc.!!
|
50
78
|
end
|
51
79
|
|
52
80
|
|
@@ -66,7 +66,7 @@ RoundDef = Struct.new( :name, :date, :duration ) do
|
|
66
66
|
printer.text( "<RoundDef " )
|
67
67
|
printer.text( self.name )
|
68
68
|
printer.text( " date=" + self.date.pretty_inspect ) if date
|
69
|
-
printer.text( "
|
69
|
+
printer.text( " duration=" + self.duration.pretty_inspect ) if duration
|
70
70
|
printer.text( ">" )
|
71
71
|
end
|
72
72
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module SportDb
|
2
|
-
class
|
2
|
+
class Lexer
|
3
3
|
|
4
4
|
|
5
5
|
## todo/check: use ‹› (unicode chars) to mark optional parts in regex constant name - why? why not?
|
@@ -130,5 +130,5 @@ SCORE_RE = Regexp.union(
|
|
130
130
|
SCORE__FT_HT__RE, # e.g. 1-1 (1-0) or 1-1 -- note - must go last!!!
|
131
131
|
)
|
132
132
|
|
133
|
-
end # class
|
133
|
+
end # class Lexer
|
134
134
|
end # module SportDb
|
data/lib/sportdb/parser/token.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
|
2
2
|
|
3
3
|
module SportDb
|
4
|
-
class
|
4
|
+
class Lexer
|
5
5
|
|
6
6
|
|
7
7
|
##
|
@@ -265,5 +265,5 @@ RE = Regexp.union( PROP_KEY_RE, ## start with prop key (match will/should swit
|
|
265
265
|
TEXT_RE )
|
266
266
|
|
267
267
|
|
268
|
-
end # class
|
268
|
+
end # class Lexer
|
269
269
|
end # module SportDb
|
data/lib/sportdb/parser.rb
CHANGED
@@ -22,7 +22,7 @@ require_relative 'parser/token-date'
|
|
22
22
|
require_relative 'parser/token-text'
|
23
23
|
require_relative 'parser/token-status'
|
24
24
|
require_relative 'parser/token'
|
25
|
-
require_relative 'parser/
|
25
|
+
require_relative 'parser/lexer'
|
26
26
|
|
27
27
|
require_relative 'parser/parser' ## auto-generated by racc (from parser.y)
|
28
28
|
require_relative 'parser/racc_parser'
|
@@ -46,18 +46,37 @@ end # module SportDb
|
|
46
46
|
|
47
47
|
|
48
48
|
module SportDb
|
49
|
-
###
|
50
|
-
## todo/fix - use LangHelper or such
|
51
|
-
## e.g. class Parser
|
52
|
-
## include LangHelper
|
53
|
-
## end
|
54
49
|
class Parser
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
def
|
59
|
-
|
60
|
-
|
50
|
+
####################
|
51
|
+
# "default" lexer & parser (wraps RaccMatchParser)
|
52
|
+
|
53
|
+
def tokenize_with_errors( lines, debug: false )
|
54
|
+
lexer = Lexer.new( lines )
|
55
|
+
tokens, errors = lexer.tokenize_with_errors
|
56
|
+
[tokens, errors]
|
57
|
+
end
|
58
|
+
|
59
|
+
### convience helper - ignore errors by default
|
60
|
+
def tokenize( lines, debug: false )
|
61
|
+
tokens, _ = tokenize_with_errors( lines, debug: debug )
|
62
|
+
tokens
|
63
|
+
end
|
64
|
+
|
65
|
+
|
66
|
+
def parse_with_errors( lines, debug: false )
|
67
|
+
## todo/check - if lines needs to chack for array of lines and such
|
68
|
+
## or handled by tokenizer???
|
69
|
+
parser = RaccMatchParser.new( lines )
|
70
|
+
tree, errors = parser.parse_with_errors
|
71
|
+
[tree, errors]
|
72
|
+
end
|
73
|
+
|
74
|
+
### convience helper - ignore errors by default
|
75
|
+
def parse( lines, debug: false )
|
76
|
+
tree, _ = parse_with_errors( lines, debug: debug )
|
77
|
+
tree
|
78
|
+
end
|
79
|
+
end # class Parser
|
61
80
|
end # module SportDb
|
62
81
|
|
63
82
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-01-
|
11
|
+
date: 2025-01-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cocos
|
@@ -97,6 +97,7 @@ files:
|
|
97
97
|
- config/rounds_pt.txt
|
98
98
|
- lib/sportdb/parser.rb
|
99
99
|
- lib/sportdb/parser/lang.rb
|
100
|
+
- lib/sportdb/parser/lexer.rb
|
100
101
|
- lib/sportdb/parser/parser.rb
|
101
102
|
- lib/sportdb/parser/racc_parser.rb
|
102
103
|
- lib/sportdb/parser/racc_tree.rb
|
@@ -105,7 +106,6 @@ files:
|
|
105
106
|
- lib/sportdb/parser/token-status.rb
|
106
107
|
- lib/sportdb/parser/token-text.rb
|
107
108
|
- lib/sportdb/parser/token.rb
|
108
|
-
- lib/sportdb/parser/tokenizer.rb
|
109
109
|
- lib/sportdb/parser/version.rb
|
110
110
|
homepage: https://github.com/sportdb/sport.db
|
111
111
|
licenses:
|