sportdb-parser 0.5.6 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,12 +5,18 @@
5
5
  class RaccMatchParser
6
6
 
7
7
 
8
- def initialize( txt )
8
+ def initialize( txt, debug: false )
9
9
  ## puts "==> txt:"
10
10
  ## puts txt
11
-
12
- parser = SportDb::Parser.new
13
- @tokens = parser.tokenize( txt )
11
+
12
+ @tree = []
13
+ @errors = []
14
+
15
+ ### todo:
16
+ ## - pass along debug flag
17
+ lexer = SportDb::Lexer.new( txt )
18
+ ## note - use tokenize_with_errors and add/collect tokenize errors
19
+ @tokens, @errors = lexer.tokenize_with_errors
14
20
  ## pp @tokens
15
21
 
16
22
  ## quick hack - convert to racc format single char literal tokens e.g. '@' etc.
@@ -22,11 +28,22 @@ def initialize( txt )
22
28
  end
23
29
  end
24
30
  end
25
-
31
+
32
+
33
+ def debug( value ) @debug = value; end
34
+ def debug?() @debug == true; end
35
+
36
+ ## debug - trace / print message
37
+ def trace( msg )
38
+ puts " [parse] " + msg if debug?
39
+ end
40
+
41
+
42
+
26
43
 
27
44
  def next_token
28
45
  tok = @tokens.shift
29
- puts "next_token => #{tok.pretty_inspect}"
46
+ trace( "next_token => #{tok.pretty_inspect}" )
30
47
  tok
31
48
  end
32
49
 
@@ -34,19 +51,30 @@ def initialize( txt )
34
51
  # puts "Parse error on token: #{error_token_id}, value: #{error_value}"
35
52
  # end
36
53
 
37
- def parse
38
- puts "parse:"
39
- @tree = []
54
+ def parse_with_errors
55
+ trace( "start parse:" )
40
56
  do_parse
41
- @tree
57
+ [@tree, @errors]
58
+ end
59
+
60
+ def parse ## convenience shortcut (ignores errors)
61
+ tree, _ = parse_with_errors
62
+ tree
42
63
  end
43
64
 
44
65
 
45
- def on_error(*args)
66
+ attr_reader :errors
67
+ def errors?() @errors.size > 0; end
68
+
69
+
70
+ def on_error(error_token_id, error_value, value_stack)
71
+ args = [error_token_id, error_value, value_stack]
46
72
  puts
47
73
  puts "!! on parse error:"
48
74
  puts "args=#{args.pretty_inspect}"
49
- exit 1 ## exit for now - get and print more info about context etc.!!
75
+
76
+ @errors << "parse error on token: #{error_token_id} with value: #{error_value}, stack: #{value_stack.pretty_inspect}"
77
+ ## exit 1 ## exit for now - get and print more info about context etc.!!
50
78
  end
51
79
 
52
80
 
@@ -66,7 +66,7 @@ RoundDef = Struct.new( :name, :date, :duration ) do
66
66
  printer.text( "<RoundDef " )
67
67
  printer.text( self.name )
68
68
  printer.text( " date=" + self.date.pretty_inspect ) if date
69
- printer.text( " durattion=" + self.duration.pretty_inspect ) if duration
69
+ printer.text( " duration=" + self.duration.pretty_inspect ) if duration
70
70
  printer.text( ">" )
71
71
  end
72
72
  end
@@ -1,5 +1,5 @@
1
1
  module SportDb
2
- class Parser
2
+ class Lexer
3
3
 
4
4
 
5
5
 
@@ -297,6 +297,6 @@ DURATION_RE = Regexp.union(
297
297
 
298
298
 
299
299
 
300
- end # class Parser
300
+ end # class Lexer
301
301
  end # module SportDb
302
302
 
@@ -1,5 +1,5 @@
1
1
  module SportDb
2
- class Parser
2
+ class Lexer
3
3
 
4
4
 
5
5
  ## todo/check: use ‹› (unicode chars) to mark optional parts in regex constant name - why? why not?
@@ -130,5 +130,5 @@ SCORE_RE = Regexp.union(
130
130
  SCORE__FT_HT__RE, # e.g. 1-1 (1-0) or 1-1 -- note - must go last!!!
131
131
  )
132
132
 
133
- end # class Parser
133
+ end # class Lexer
134
134
  end # module SportDb
@@ -1,5 +1,5 @@
1
1
  module SportDb
2
- class Parser
2
+ class Lexer
3
3
 
4
4
  ## (match) status
5
5
  ## note: english usage - cancelled (in UK), canceled (in US)
@@ -40,6 +40,6 @@ STATUS_RE = %r{
40
40
  }ix
41
41
 
42
42
 
43
- end # class Parser
43
+ end # class Lexer
44
44
  end # module SportDb
45
45
 
@@ -1,5 +1,5 @@
1
1
  module SportDb
2
- class Parser
2
+ class Lexer
3
3
 
4
4
 
5
5
  ## note - do NOT allow single alpha text for now
@@ -129,5 +129,5 @@ TEXT_RE = %r{
129
129
  }ix
130
130
 
131
131
 
132
- end # class Parser
132
+ end # class Lexer
133
133
  end # module SportDb
@@ -1,7 +1,7 @@
1
1
 
2
2
 
3
3
  module SportDb
4
- class Parser
4
+ class Lexer
5
5
 
6
6
 
7
7
  ##
@@ -265,5 +265,5 @@ RE = Regexp.union( PROP_KEY_RE, ## start with prop key (match will/should swit
265
265
  TEXT_RE )
266
266
 
267
267
 
268
- end # class Parser
268
+ end # class Lexer
269
269
  end # module SportDb
@@ -4,7 +4,7 @@ module SportDb
4
4
  module Parser
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
6
  MINOR = 5
7
- PATCH = 6
7
+ PATCH = 8
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
@@ -22,7 +22,7 @@ require_relative 'parser/token-date'
22
22
  require_relative 'parser/token-text'
23
23
  require_relative 'parser/token-status'
24
24
  require_relative 'parser/token'
25
- require_relative 'parser/tokenizer'
25
+ require_relative 'parser/lexer'
26
26
 
27
27
  require_relative 'parser/parser' ## auto-generated by racc (from parser.y)
28
28
  require_relative 'parser/racc_parser'
@@ -46,18 +46,37 @@ end # module SportDb
46
46
 
47
47
 
48
48
  module SportDb
49
- ###
50
- ## todo/fix - use LangHelper or such
51
- ## e.g. class Parser
52
- ## include LangHelper
53
- ## end
54
49
  class Parser
55
- ## keep "old" access to checking for group, round & friends
56
- ## for now for compatibility
57
- def is_group?( text ) Lang.is_group?( text ); end
58
- def is_round?( text ) Lang.is_round?( text ); end
59
- def is_leg?( text ) Lang.is_leg?( text ); end
60
- end
50
+ ####################
51
+ # "default" lexer & parser (wraps RaccMatchParser)
52
+
53
+ def tokenize_with_errors( lines, debug: false )
54
+ lexer = Lexer.new( lines )
55
+ tokens, errors = lexer.tokenize_with_errors
56
+ [tokens, errors]
57
+ end
58
+
59
+ ### convience helper - ignore errors by default
60
+ def tokenize( lines, debug: false )
61
+ tokens, _ = tokenize_with_errors( lines, debug: debug )
62
+ tokens
63
+ end
64
+
65
+
66
+ def parse_with_errors( lines, debug: false )
67
+ ## todo/check - if lines needs to chack for array of lines and such
68
+ ## or handled by tokenizer???
69
+ parser = RaccMatchParser.new( lines )
70
+ tree, errors = parser.parse_with_errors
71
+ [tree, errors]
72
+ end
73
+
74
+ ### convience helper - ignore errors by default
75
+ def parse( lines, debug: false )
76
+ tree, _ = parse_with_errors( lines, debug: debug )
77
+ tree
78
+ end
79
+ end # class Parser
61
80
  end # module SportDb
62
81
 
63
82
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.6
4
+ version: 0.5.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-01-20 00:00:00.000000000 Z
11
+ date: 2025-01-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cocos
@@ -97,6 +97,7 @@ files:
97
97
  - config/rounds_pt.txt
98
98
  - lib/sportdb/parser.rb
99
99
  - lib/sportdb/parser/lang.rb
100
+ - lib/sportdb/parser/lexer.rb
100
101
  - lib/sportdb/parser/parser.rb
101
102
  - lib/sportdb/parser/racc_parser.rb
102
103
  - lib/sportdb/parser/racc_tree.rb
@@ -105,7 +106,6 @@ files:
105
106
  - lib/sportdb/parser/token-status.rb
106
107
  - lib/sportdb/parser/token-text.rb
107
108
  - lib/sportdb/parser/token.rb
108
- - lib/sportdb/parser/tokenizer.rb
109
109
  - lib/sportdb/parser/version.rb
110
110
  homepage: https://github.com/sportdb/sport.db
111
111
  licenses: