collie 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +28 -1
  3. data/README.md +55 -258
  4. data/lib/collie/analyzer/reachability.rb +17 -20
  5. data/lib/collie/analyzer/recursion.rb +28 -9
  6. data/lib/collie/analyzer/symbol_resolver.rb +51 -0
  7. data/lib/collie/ast.rb +18 -4
  8. data/lib/collie/cli.rb +388 -50
  9. data/lib/collie/config/schema.rb +117 -0
  10. data/lib/collie/config.rb +106 -22
  11. data/lib/collie/formatter/formatter.rb +95 -50
  12. data/lib/collie/formatter/options.rb +17 -5
  13. data/lib/collie/formatter/signature.rb +72 -0
  14. data/lib/collie/linter/base.rb +49 -0
  15. data/lib/collie/linter/rules/ambiguous_precedence.rb +5 -2
  16. data/lib/collie/linter/rules/circular_reference.rb +96 -38
  17. data/lib/collie/linter/rules/consistent_tag_naming.rb +13 -13
  18. data/lib/collie/linter/rules/empty_action.rb +42 -11
  19. data/lib/collie/linter/rules/factorizable_rules.rb +2 -2
  20. data/lib/collie/linter/rules/left_recursion.rb +5 -4
  21. data/lib/collie/linter/rules/long_rule.rb +3 -3
  22. data/lib/collie/linter/rules/nonterminal_naming.rb +6 -4
  23. data/lib/collie/linter/rules/prec_improvement.rb +1 -1
  24. data/lib/collie/linter/rules/redundant_epsilon.rb +11 -11
  25. data/lib/collie/linter/rules/right_recursion.rb +4 -1
  26. data/lib/collie/linter/rules/symbol_conflict.rb +130 -0
  27. data/lib/collie/linter/rules/token_naming.rb +2 -1
  28. data/lib/collie/linter/rules/trailing_whitespace.rb +7 -1
  29. data/lib/collie/linter/rules/undefined_symbol.rb +50 -8
  30. data/lib/collie/linter/rules/unused_nonterminal.rb +36 -1
  31. data/lib/collie/linter/rules/unused_token.rb +34 -9
  32. data/lib/collie/parser/debug_serializer.rb +205 -0
  33. data/lib/collie/parser/lexer.rb +182 -11
  34. data/lib/collie/parser/parser.rb +73 -13
  35. data/lib/collie/reporter/github.rb +15 -2
  36. data/lib/collie/reporter/json.rb +4 -1
  37. data/lib/collie/reporter/sarif.rb +81 -0
  38. data/lib/collie/version.rb +1 -1
  39. data/lib/collie.rb +6 -1
  40. metadata +8 -2
@@ -15,29 +15,65 @@ module Collie
15
15
  def check(ast, context = {})
16
16
  symbol_table = context[:symbol_table] || build_symbol_table(ast)
17
17
 
18
- ast.rules.each do |rule|
18
+ each_rule_like(ast) do |rule|
19
+ allowed_symbols = rule_like_parameters(rule)
19
20
  rule.alternatives.each do |alt|
20
- alt.symbols.each do |symbol|
21
- next if symbol_table.declared?(symbol.name)
22
-
23
- add_offense(symbol,
24
- message: "Undefined symbol '#{symbol.name}'")
25
- end
21
+ alt.symbols.each { |symbol| check_symbol(symbol_table, symbol, allowed_symbols: allowed_symbols) }
26
22
  end
27
23
  end
28
24
 
25
+ check_declarations(ast, symbol_table)
26
+
29
27
  @offenses
30
28
  end
31
29
 
32
30
  private
33
31
 
32
+ def check_declarations(ast, symbol_table)
33
+ ast.declarations.each do |decl|
34
+ case decl
35
+ when AST::StartDeclaration
36
+ next if symbol_table.nonterminal?(decl.symbol)
37
+
38
+ add_offense(decl, message: "Undefined start symbol '#{decl.symbol}'")
39
+ when AST::TypeDeclaration
40
+ decl.names.each do |name|
41
+ next if symbol_table.declared?(name)
42
+
43
+ add_offense(decl, message: "%type references undefined symbol '#{name}'")
44
+ end
45
+ end
46
+ end
47
+ end
48
+
49
+ def check_symbol(symbol_table, symbol, allowed_symbols: [])
50
+ unless allowed_symbols.include?(symbol.name) || symbol_table.declared?(symbol.name)
51
+ add_offense(symbol,
52
+ message: "Undefined symbol '#{symbol.name}'")
53
+ end
54
+
55
+ symbol.arguments&.each do |argument|
56
+ check_symbol(symbol_table, argument, allowed_symbols: allowed_symbols)
57
+ end
58
+ end
59
+
34
60
  def build_symbol_table(ast)
35
61
  table = Analyzer::SymbolTable.new
36
62
 
37
63
  ast.declarations.each do |decl|
38
64
  case decl
39
65
  when AST::TokenDeclaration
40
- decl.names.each { |name| table.add_token(name, type_tag: decl.type_tag, location: decl.location) }
66
+ decl.names.each do |name|
67
+ add_token(table, name, type_tag: decl.type_tag, location: decl.location)
68
+ end
69
+ when AST::PrecedenceDeclaration
70
+ decl.tokens.each do |name|
71
+ add_token(table, name, location: decl.location)
72
+ end
73
+ when AST::ParameterizedRule
74
+ table.add_nonterminal(decl.name, location: decl.location)
75
+ when AST::InlineRule
76
+ table.add_nonterminal(decl.rule, location: decl.location)
41
77
  end
42
78
  end
43
79
 
@@ -47,6 +83,12 @@ module Collie
47
83
 
48
84
  table
49
85
  end
86
+
87
+ def add_token(table, name, type_tag: nil, location: nil)
88
+ table.add_token(name, type_tag: type_tag, location: location)
89
+ rescue Error
90
+ # Ignore duplicates while building the resolver table.
91
+ end
50
92
  end
51
93
  end
52
94
  end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "set"
4
+
3
5
  require_relative "../base"
4
6
 
5
7
  module Collie
@@ -20,6 +22,29 @@ module Collie
20
22
  symbol_table.add_nonterminal(rule.name, location: rule.location)
21
23
  end
22
24
 
25
+ ast.declarations.each do |decl|
26
+ case decl
27
+ when AST::TokenDeclaration
28
+ decl.names.each do |name|
29
+ symbol_table.add_token(name, type_tag: decl.type_tag, location: decl.location)
30
+ rescue Error
31
+ # Ignore duplicates while building the resolver table.
32
+ end
33
+ when AST::PrecedenceDeclaration
34
+ decl.tokens.each do |name|
35
+ symbol_table.add_token(name, location: decl.location)
36
+ rescue Error
37
+ # Ignore duplicates while building the resolver table.
38
+ end
39
+ when AST::ParameterizedRule
40
+ symbol_table.add_nonterminal(decl.name, location: decl.location)
41
+ when AST::InlineRule
42
+ symbol_table.add_nonterminal(decl.rule, location: decl.location)
43
+ end
44
+ end
45
+
46
+ Analyzer::SymbolResolver.resolve(ast, symbol_table)
47
+
23
48
  # Find start symbol
24
49
  start_symbol = find_start_symbol(ast)
25
50
 
@@ -42,7 +67,7 @@ module Collie
42
67
 
43
68
  # Track nonterminal usage in parameterized rules (%rule)
44
69
  ast.declarations.each do |decl|
45
- next unless decl.is_a?(AST::ParameterizedRule)
70
+ next unless decl.is_a?(AST::ParameterizedRule) || decl.is_a?(AST::InlineRule)
46
71
 
47
72
  decl.alternatives.each do |alt|
48
73
  alt.symbols.each do |symbol|
@@ -60,11 +85,13 @@ module Collie
60
85
 
61
86
  # Mark start symbol as used
62
87
  symbol_table.use_nonterminal(start_symbol) if start_symbol
88
+ unreachable_rules = unreachable_rules(ast, start_symbol)
63
89
 
64
90
  # Find unused nonterminals
65
91
  symbol_table.unused_nonterminals.each do |nonterminal_name|
66
92
  # Skip start symbol
67
93
  next if nonterminal_name == start_symbol
94
+ next if unreachable_rules.include?(nonterminal_name)
68
95
 
69
96
  rule = ast.rules.find { |r| r.name == nonterminal_name }
70
97
  next unless rule
@@ -85,6 +112,14 @@ module Collie
85
112
  # Default to first rule
86
113
  ast.rules.first&.name
87
114
  end
115
+
116
+ def unreachable_rules(ast, start_symbol)
117
+ return Set.new unless start_symbol
118
+
119
+ analyzer = Analyzer::Reachability.new(ast)
120
+ analyzer.analyze(start_symbol)
121
+ analyzer.unreachable_rules
122
+ end
88
123
  end
89
124
  end
90
125
  end
@@ -14,24 +14,23 @@ module Collie
14
14
 
15
15
  def check(ast, context = {})
16
16
  symbol_table = context[:symbol_table] || build_symbol_table(ast)
17
+ Analyzer::SymbolResolver.resolve(ast, symbol_table)
17
18
 
18
19
  # Track token usage in normal rules
19
20
  ast.rules.each do |rule|
20
21
  rule.alternatives.each do |alt|
21
- alt.symbols.each do |symbol|
22
- symbol_table.use_token(symbol.name) if symbol.terminal?
23
- end
22
+ alt.symbols.each { |symbol| mark_token_usage(symbol_table, symbol) }
23
+ mark_precedence_usage(symbol_table, alt)
24
24
  end
25
25
  end
26
26
 
27
27
  # Track token usage in parameterized rules (%rule)
28
28
  ast.declarations.each do |decl|
29
- next unless decl.is_a?(AST::ParameterizedRule)
29
+ next unless decl.is_a?(AST::ParameterizedRule) || decl.is_a?(AST::InlineRule)
30
30
 
31
31
  decl.alternatives.each do |alt|
32
- alt.symbols.each do |symbol|
33
- symbol_table.use_token(symbol.name) if symbol.terminal?
34
- end
32
+ alt.symbols.each { |symbol| mark_token_usage(symbol_table, symbol) }
33
+ mark_precedence_usage(symbol_table, alt)
35
34
  end
36
35
  end
37
36
 
@@ -46,6 +45,15 @@ module Collie
46
45
 
47
46
  private
48
47
 
48
+ def mark_token_usage(symbol_table, symbol)
49
+ symbol_table.use_token(symbol.name) if symbol.terminal?
50
+ symbol.arguments&.each { |argument| mark_token_usage(symbol_table, argument) }
51
+ end
52
+
53
+ def mark_precedence_usage(symbol_table, alternative)
54
+ symbol_table.use_token(alternative.prec) if alternative.prec
55
+ end
56
+
49
57
  def build_symbol_table(ast)
50
58
  table = Analyzer::SymbolTable.new
51
59
 
@@ -55,7 +63,13 @@ module Collie
55
63
  decl.names.each do |name|
56
64
  table.add_token(name, type_tag: decl.type_tag, location: decl.location)
57
65
  rescue Error
58
- # Ignore duplicates
66
+ # Ignore duplicates while building the resolver table.
67
+ end
68
+ when AST::PrecedenceDeclaration
69
+ decl.tokens.each do |name|
70
+ table.add_token(name, location: decl.location)
71
+ rescue Error
72
+ # Ignore duplicates while building the resolver table.
59
73
  end
60
74
  end
61
75
  end
@@ -66,7 +80,7 @@ module Collie
66
80
  def add_offense_for_declaration(ast, token_name, _location)
67
81
  # Find the declaration node
68
82
  decl = ast.declarations.find do |d|
69
- d.is_a?(AST::TokenDeclaration) && d.names.include?(token_name)
83
+ token_declares?(d, token_name)
70
84
  end
71
85
 
72
86
  return unless decl
@@ -74,6 +88,17 @@ module Collie
74
88
  add_offense(decl,
75
89
  message: "Token '#{token_name}' is declared but never used")
76
90
  end
91
+
92
+ def token_declares?(declaration, token_name)
93
+ case declaration
94
+ when AST::TokenDeclaration
95
+ declaration.names.include?(token_name)
96
+ when AST::PrecedenceDeclaration
97
+ declaration.tokens.include?(token_name)
98
+ else
99
+ false
100
+ end
101
+ end
77
102
  end
78
103
  end
79
104
  end
@@ -0,0 +1,205 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../ast"
4
+
5
+ module Collie
6
+ module Parser
7
+ # Serializes parser internals for CLI debug commands.
8
+ module DebugSerializer
9
+ class << self
10
+ def token(token)
11
+ {
12
+ type: token.type,
13
+ value: token.value,
14
+ raw_value: token.raw_value,
15
+ location: location(token.location)
16
+ }
17
+ end
18
+
19
+ def ast(node)
20
+ return nil unless node
21
+
22
+ case node
23
+ when AST::GrammarFile
24
+ grammar_file(node)
25
+ when AST::Prologue, AST::Epilogue
26
+ code_node(node)
27
+ when AST::TokenDeclaration
28
+ token_declaration(node)
29
+ when AST::TypeDeclaration
30
+ type_declaration(node)
31
+ when AST::PrecedenceDeclaration
32
+ precedence_declaration(node)
33
+ when AST::StartDeclaration
34
+ start_declaration(node)
35
+ when AST::UnionDeclaration
36
+ union_declaration(node)
37
+ when AST::UnknownDeclaration
38
+ unknown_declaration(node)
39
+ when AST::Rule
40
+ rule(node)
41
+ when AST::ParameterizedRule
42
+ parameterized_rule(node)
43
+ when AST::InlineRule
44
+ inline_rule(node)
45
+ when AST::Alternative
46
+ alternative(node)
47
+ when AST::Symbol
48
+ symbol(node)
49
+ when AST::Action
50
+ action(node)
51
+ else
52
+ { type: node.class.name }
53
+ end
54
+ end
55
+
56
+ private
57
+
58
+ def grammar_file(node)
59
+ {
60
+ type: "GrammarFile",
61
+ prologue: ast(node.prologue),
62
+ declarations: node.declarations.map { |declaration| ast(declaration) },
63
+ rules: node.rules.map { |rule| ast(rule) },
64
+ epilogue: ast(node.epilogue),
65
+ location: location(node.location)
66
+ }
67
+ end
68
+
69
+ def code_node(node)
70
+ {
71
+ type: node_type(node),
72
+ code: node.code,
73
+ location: location(node.location)
74
+ }
75
+ end
76
+
77
+ def token_declaration(node)
78
+ {
79
+ type: "TokenDeclaration",
80
+ names: node.names,
81
+ type_tag: node.type_tag,
82
+ location: location(node.location)
83
+ }
84
+ end
85
+
86
+ def type_declaration(node)
87
+ {
88
+ type: "TypeDeclaration",
89
+ names: node.names,
90
+ type_tag: node.type_tag,
91
+ location: location(node.location)
92
+ }
93
+ end
94
+
95
+ def precedence_declaration(node)
96
+ {
97
+ type: "PrecedenceDeclaration",
98
+ associativity: node.associativity,
99
+ tokens: node.tokens,
100
+ location: location(node.location)
101
+ }
102
+ end
103
+
104
+ def start_declaration(node)
105
+ {
106
+ type: "StartDeclaration",
107
+ symbol: node.symbol,
108
+ location: location(node.location)
109
+ }
110
+ end
111
+
112
+ def union_declaration(node)
113
+ {
114
+ type: "UnionDeclaration",
115
+ body: node.body,
116
+ location: location(node.location)
117
+ }
118
+ end
119
+
120
+ def unknown_declaration(node)
121
+ {
122
+ type: "UnknownDeclaration",
123
+ source: node.source,
124
+ location: location(node.location)
125
+ }
126
+ end
127
+
128
+ def rule(node)
129
+ {
130
+ type: "Rule",
131
+ name: node.name,
132
+ alternatives: node.alternatives.map { |alternative| ast(alternative) },
133
+ location: location(node.location)
134
+ }
135
+ end
136
+
137
+ def parameterized_rule(node)
138
+ {
139
+ type: "ParameterizedRule",
140
+ name: node.name,
141
+ parameters: node.parameters,
142
+ alternatives: node.alternatives.map { |alternative| ast(alternative) },
143
+ location: location(node.location)
144
+ }
145
+ end
146
+
147
+ def inline_rule(node)
148
+ {
149
+ type: "InlineRule",
150
+ rule: node.rule,
151
+ parameters: node.parameters,
152
+ alternatives: node.alternatives.map { |alternative| ast(alternative) },
153
+ location: location(node.location)
154
+ }
155
+ end
156
+
157
+ def alternative(node)
158
+ {
159
+ type: "Alternative",
160
+ symbols: node.symbols.map { |symbol| ast(symbol) },
161
+ action: ast(node.action),
162
+ prec: node.prec,
163
+ explicit_empty: node.explicit_empty,
164
+ empty_marker: node.empty_marker,
165
+ location: location(node.location)
166
+ }
167
+ end
168
+
169
+ def symbol(node)
170
+ {
171
+ type: "Symbol",
172
+ name: node.name,
173
+ kind: node.kind,
174
+ alias_name: node.alias_name,
175
+ arguments: node.arguments&.map { |argument| ast(argument) },
176
+ location: location(node.location)
177
+ }
178
+ end
179
+
180
+ def action(node)
181
+ {
182
+ type: "Action",
183
+ code: node.code,
184
+ location: location(node.location)
185
+ }
186
+ end
187
+
188
+ def node_type(node)
189
+ node.class.name.split("::").last
190
+ end
191
+
192
+ def location(location)
193
+ return nil unless location
194
+
195
+ {
196
+ file: location.file,
197
+ line: location.line,
198
+ column: location.column,
199
+ length: location.length
200
+ }
201
+ end
202
+ end
203
+ end
204
+ end
205
+ end