collie 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +28 -1
- data/README.md +55 -258
- data/lib/collie/analyzer/reachability.rb +17 -20
- data/lib/collie/analyzer/recursion.rb +28 -9
- data/lib/collie/analyzer/symbol_resolver.rb +51 -0
- data/lib/collie/ast.rb +18 -4
- data/lib/collie/cli.rb +388 -50
- data/lib/collie/config/schema.rb +117 -0
- data/lib/collie/config.rb +106 -22
- data/lib/collie/formatter/formatter.rb +95 -50
- data/lib/collie/formatter/options.rb +17 -5
- data/lib/collie/formatter/signature.rb +72 -0
- data/lib/collie/linter/base.rb +49 -0
- data/lib/collie/linter/rules/ambiguous_precedence.rb +5 -2
- data/lib/collie/linter/rules/circular_reference.rb +96 -38
- data/lib/collie/linter/rules/consistent_tag_naming.rb +13 -13
- data/lib/collie/linter/rules/empty_action.rb +42 -11
- data/lib/collie/linter/rules/factorizable_rules.rb +2 -2
- data/lib/collie/linter/rules/left_recursion.rb +5 -4
- data/lib/collie/linter/rules/long_rule.rb +3 -3
- data/lib/collie/linter/rules/nonterminal_naming.rb +6 -4
- data/lib/collie/linter/rules/prec_improvement.rb +1 -1
- data/lib/collie/linter/rules/redundant_epsilon.rb +11 -11
- data/lib/collie/linter/rules/right_recursion.rb +4 -1
- data/lib/collie/linter/rules/symbol_conflict.rb +130 -0
- data/lib/collie/linter/rules/token_naming.rb +2 -1
- data/lib/collie/linter/rules/trailing_whitespace.rb +7 -1
- data/lib/collie/linter/rules/undefined_symbol.rb +50 -8
- data/lib/collie/linter/rules/unused_nonterminal.rb +36 -1
- data/lib/collie/linter/rules/unused_token.rb +34 -9
- data/lib/collie/parser/debug_serializer.rb +205 -0
- data/lib/collie/parser/lexer.rb +182 -11
- data/lib/collie/parser/parser.rb +73 -13
- data/lib/collie/reporter/github.rb +15 -2
- data/lib/collie/reporter/json.rb +4 -1
- data/lib/collie/reporter/sarif.rb +81 -0
- data/lib/collie/version.rb +1 -1
- data/lib/collie.rb +6 -1
- metadata +8 -2
|
@@ -15,29 +15,65 @@ module Collie
|
|
|
15
15
|
def check(ast, context = {})
|
|
16
16
|
symbol_table = context[:symbol_table] || build_symbol_table(ast)
|
|
17
17
|
|
|
18
|
-
ast
|
|
18
|
+
each_rule_like(ast) do |rule|
|
|
19
|
+
allowed_symbols = rule_like_parameters(rule)
|
|
19
20
|
rule.alternatives.each do |alt|
|
|
20
|
-
alt.symbols.each
|
|
21
|
-
next if symbol_table.declared?(symbol.name)
|
|
22
|
-
|
|
23
|
-
add_offense(symbol,
|
|
24
|
-
message: "Undefined symbol '#{symbol.name}'")
|
|
25
|
-
end
|
|
21
|
+
alt.symbols.each { |symbol| check_symbol(symbol_table, symbol, allowed_symbols: allowed_symbols) }
|
|
26
22
|
end
|
|
27
23
|
end
|
|
28
24
|
|
|
25
|
+
check_declarations(ast, symbol_table)
|
|
26
|
+
|
|
29
27
|
@offenses
|
|
30
28
|
end
|
|
31
29
|
|
|
32
30
|
private
|
|
33
31
|
|
|
32
|
+
def check_declarations(ast, symbol_table)
|
|
33
|
+
ast.declarations.each do |decl|
|
|
34
|
+
case decl
|
|
35
|
+
when AST::StartDeclaration
|
|
36
|
+
next if symbol_table.nonterminal?(decl.symbol)
|
|
37
|
+
|
|
38
|
+
add_offense(decl, message: "Undefined start symbol '#{decl.symbol}'")
|
|
39
|
+
when AST::TypeDeclaration
|
|
40
|
+
decl.names.each do |name|
|
|
41
|
+
next if symbol_table.declared?(name)
|
|
42
|
+
|
|
43
|
+
add_offense(decl, message: "%type references undefined symbol '#{name}'")
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def check_symbol(symbol_table, symbol, allowed_symbols: [])
|
|
50
|
+
unless allowed_symbols.include?(symbol.name) || symbol_table.declared?(symbol.name)
|
|
51
|
+
add_offense(symbol,
|
|
52
|
+
message: "Undefined symbol '#{symbol.name}'")
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
symbol.arguments&.each do |argument|
|
|
56
|
+
check_symbol(symbol_table, argument, allowed_symbols: allowed_symbols)
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
34
60
|
def build_symbol_table(ast)
|
|
35
61
|
table = Analyzer::SymbolTable.new
|
|
36
62
|
|
|
37
63
|
ast.declarations.each do |decl|
|
|
38
64
|
case decl
|
|
39
65
|
when AST::TokenDeclaration
|
|
40
|
-
decl.names.each
|
|
66
|
+
decl.names.each do |name|
|
|
67
|
+
add_token(table, name, type_tag: decl.type_tag, location: decl.location)
|
|
68
|
+
end
|
|
69
|
+
when AST::PrecedenceDeclaration
|
|
70
|
+
decl.tokens.each do |name|
|
|
71
|
+
add_token(table, name, location: decl.location)
|
|
72
|
+
end
|
|
73
|
+
when AST::ParameterizedRule
|
|
74
|
+
table.add_nonterminal(decl.name, location: decl.location)
|
|
75
|
+
when AST::InlineRule
|
|
76
|
+
table.add_nonterminal(decl.rule, location: decl.location)
|
|
41
77
|
end
|
|
42
78
|
end
|
|
43
79
|
|
|
@@ -47,6 +83,12 @@ module Collie
|
|
|
47
83
|
|
|
48
84
|
table
|
|
49
85
|
end
|
|
86
|
+
|
|
87
|
+
def add_token(table, name, type_tag: nil, location: nil)
|
|
88
|
+
table.add_token(name, type_tag: type_tag, location: location)
|
|
89
|
+
rescue Error
|
|
90
|
+
# Ignore duplicates while building the resolver table.
|
|
91
|
+
end
|
|
50
92
|
end
|
|
51
93
|
end
|
|
52
94
|
end
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "set"
|
|
4
|
+
|
|
3
5
|
require_relative "../base"
|
|
4
6
|
|
|
5
7
|
module Collie
|
|
@@ -20,6 +22,29 @@ module Collie
|
|
|
20
22
|
symbol_table.add_nonterminal(rule.name, location: rule.location)
|
|
21
23
|
end
|
|
22
24
|
|
|
25
|
+
ast.declarations.each do |decl|
|
|
26
|
+
case decl
|
|
27
|
+
when AST::TokenDeclaration
|
|
28
|
+
decl.names.each do |name|
|
|
29
|
+
symbol_table.add_token(name, type_tag: decl.type_tag, location: decl.location)
|
|
30
|
+
rescue Error
|
|
31
|
+
# Ignore duplicates while building the resolver table.
|
|
32
|
+
end
|
|
33
|
+
when AST::PrecedenceDeclaration
|
|
34
|
+
decl.tokens.each do |name|
|
|
35
|
+
symbol_table.add_token(name, location: decl.location)
|
|
36
|
+
rescue Error
|
|
37
|
+
# Ignore duplicates while building the resolver table.
|
|
38
|
+
end
|
|
39
|
+
when AST::ParameterizedRule
|
|
40
|
+
symbol_table.add_nonterminal(decl.name, location: decl.location)
|
|
41
|
+
when AST::InlineRule
|
|
42
|
+
symbol_table.add_nonterminal(decl.rule, location: decl.location)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
Analyzer::SymbolResolver.resolve(ast, symbol_table)
|
|
47
|
+
|
|
23
48
|
# Find start symbol
|
|
24
49
|
start_symbol = find_start_symbol(ast)
|
|
25
50
|
|
|
@@ -42,7 +67,7 @@ module Collie
|
|
|
42
67
|
|
|
43
68
|
# Track nonterminal usage in parameterized rules (%rule)
|
|
44
69
|
ast.declarations.each do |decl|
|
|
45
|
-
next unless decl.is_a?(AST::ParameterizedRule)
|
|
70
|
+
next unless decl.is_a?(AST::ParameterizedRule) || decl.is_a?(AST::InlineRule)
|
|
46
71
|
|
|
47
72
|
decl.alternatives.each do |alt|
|
|
48
73
|
alt.symbols.each do |symbol|
|
|
@@ -60,11 +85,13 @@ module Collie
|
|
|
60
85
|
|
|
61
86
|
# Mark start symbol as used
|
|
62
87
|
symbol_table.use_nonterminal(start_symbol) if start_symbol
|
|
88
|
+
unreachable_rules = unreachable_rules(ast, start_symbol)
|
|
63
89
|
|
|
64
90
|
# Find unused nonterminals
|
|
65
91
|
symbol_table.unused_nonterminals.each do |nonterminal_name|
|
|
66
92
|
# Skip start symbol
|
|
67
93
|
next if nonterminal_name == start_symbol
|
|
94
|
+
next if unreachable_rules.include?(nonterminal_name)
|
|
68
95
|
|
|
69
96
|
rule = ast.rules.find { |r| r.name == nonterminal_name }
|
|
70
97
|
next unless rule
|
|
@@ -85,6 +112,14 @@ module Collie
|
|
|
85
112
|
# Default to first rule
|
|
86
113
|
ast.rules.first&.name
|
|
87
114
|
end
|
|
115
|
+
|
|
116
|
+
def unreachable_rules(ast, start_symbol)
|
|
117
|
+
return Set.new unless start_symbol
|
|
118
|
+
|
|
119
|
+
analyzer = Analyzer::Reachability.new(ast)
|
|
120
|
+
analyzer.analyze(start_symbol)
|
|
121
|
+
analyzer.unreachable_rules
|
|
122
|
+
end
|
|
88
123
|
end
|
|
89
124
|
end
|
|
90
125
|
end
|
|
@@ -14,24 +14,23 @@ module Collie
|
|
|
14
14
|
|
|
15
15
|
def check(ast, context = {})
|
|
16
16
|
symbol_table = context[:symbol_table] || build_symbol_table(ast)
|
|
17
|
+
Analyzer::SymbolResolver.resolve(ast, symbol_table)
|
|
17
18
|
|
|
18
19
|
# Track token usage in normal rules
|
|
19
20
|
ast.rules.each do |rule|
|
|
20
21
|
rule.alternatives.each do |alt|
|
|
21
|
-
alt.symbols.each
|
|
22
|
-
|
|
23
|
-
end
|
|
22
|
+
alt.symbols.each { |symbol| mark_token_usage(symbol_table, symbol) }
|
|
23
|
+
mark_precedence_usage(symbol_table, alt)
|
|
24
24
|
end
|
|
25
25
|
end
|
|
26
26
|
|
|
27
27
|
# Track token usage in parameterized rules (%rule)
|
|
28
28
|
ast.declarations.each do |decl|
|
|
29
|
-
next unless decl.is_a?(AST::ParameterizedRule)
|
|
29
|
+
next unless decl.is_a?(AST::ParameterizedRule) || decl.is_a?(AST::InlineRule)
|
|
30
30
|
|
|
31
31
|
decl.alternatives.each do |alt|
|
|
32
|
-
alt.symbols.each
|
|
33
|
-
|
|
34
|
-
end
|
|
32
|
+
alt.symbols.each { |symbol| mark_token_usage(symbol_table, symbol) }
|
|
33
|
+
mark_precedence_usage(symbol_table, alt)
|
|
35
34
|
end
|
|
36
35
|
end
|
|
37
36
|
|
|
@@ -46,6 +45,15 @@ module Collie
|
|
|
46
45
|
|
|
47
46
|
private
|
|
48
47
|
|
|
48
|
+
def mark_token_usage(symbol_table, symbol)
|
|
49
|
+
symbol_table.use_token(symbol.name) if symbol.terminal?
|
|
50
|
+
symbol.arguments&.each { |argument| mark_token_usage(symbol_table, argument) }
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def mark_precedence_usage(symbol_table, alternative)
|
|
54
|
+
symbol_table.use_token(alternative.prec) if alternative.prec
|
|
55
|
+
end
|
|
56
|
+
|
|
49
57
|
def build_symbol_table(ast)
|
|
50
58
|
table = Analyzer::SymbolTable.new
|
|
51
59
|
|
|
@@ -55,7 +63,13 @@ module Collie
|
|
|
55
63
|
decl.names.each do |name|
|
|
56
64
|
table.add_token(name, type_tag: decl.type_tag, location: decl.location)
|
|
57
65
|
rescue Error
|
|
58
|
-
# Ignore duplicates
|
|
66
|
+
# Ignore duplicates while building the resolver table.
|
|
67
|
+
end
|
|
68
|
+
when AST::PrecedenceDeclaration
|
|
69
|
+
decl.tokens.each do |name|
|
|
70
|
+
table.add_token(name, location: decl.location)
|
|
71
|
+
rescue Error
|
|
72
|
+
# Ignore duplicates while building the resolver table.
|
|
59
73
|
end
|
|
60
74
|
end
|
|
61
75
|
end
|
|
@@ -66,7 +80,7 @@ module Collie
|
|
|
66
80
|
def add_offense_for_declaration(ast, token_name, _location)
|
|
67
81
|
# Find the declaration node
|
|
68
82
|
decl = ast.declarations.find do |d|
|
|
69
|
-
|
|
83
|
+
token_declares?(d, token_name)
|
|
70
84
|
end
|
|
71
85
|
|
|
72
86
|
return unless decl
|
|
@@ -74,6 +88,17 @@ module Collie
|
|
|
74
88
|
add_offense(decl,
|
|
75
89
|
message: "Token '#{token_name}' is declared but never used")
|
|
76
90
|
end
|
|
91
|
+
|
|
92
|
+
def token_declares?(declaration, token_name)
|
|
93
|
+
case declaration
|
|
94
|
+
when AST::TokenDeclaration
|
|
95
|
+
declaration.names.include?(token_name)
|
|
96
|
+
when AST::PrecedenceDeclaration
|
|
97
|
+
declaration.tokens.include?(token_name)
|
|
98
|
+
else
|
|
99
|
+
false
|
|
100
|
+
end
|
|
101
|
+
end
|
|
77
102
|
end
|
|
78
103
|
end
|
|
79
104
|
end
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../ast"
|
|
4
|
+
|
|
5
|
+
module Collie
|
|
6
|
+
module Parser
|
|
7
|
+
# Serializes parser internals for CLI debug commands.
|
|
8
|
+
module DebugSerializer
|
|
9
|
+
class << self
|
|
10
|
+
def token(token)
|
|
11
|
+
{
|
|
12
|
+
type: token.type,
|
|
13
|
+
value: token.value,
|
|
14
|
+
raw_value: token.raw_value,
|
|
15
|
+
location: location(token.location)
|
|
16
|
+
}
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def ast(node)
|
|
20
|
+
return nil unless node
|
|
21
|
+
|
|
22
|
+
case node
|
|
23
|
+
when AST::GrammarFile
|
|
24
|
+
grammar_file(node)
|
|
25
|
+
when AST::Prologue, AST::Epilogue
|
|
26
|
+
code_node(node)
|
|
27
|
+
when AST::TokenDeclaration
|
|
28
|
+
token_declaration(node)
|
|
29
|
+
when AST::TypeDeclaration
|
|
30
|
+
type_declaration(node)
|
|
31
|
+
when AST::PrecedenceDeclaration
|
|
32
|
+
precedence_declaration(node)
|
|
33
|
+
when AST::StartDeclaration
|
|
34
|
+
start_declaration(node)
|
|
35
|
+
when AST::UnionDeclaration
|
|
36
|
+
union_declaration(node)
|
|
37
|
+
when AST::UnknownDeclaration
|
|
38
|
+
unknown_declaration(node)
|
|
39
|
+
when AST::Rule
|
|
40
|
+
rule(node)
|
|
41
|
+
when AST::ParameterizedRule
|
|
42
|
+
parameterized_rule(node)
|
|
43
|
+
when AST::InlineRule
|
|
44
|
+
inline_rule(node)
|
|
45
|
+
when AST::Alternative
|
|
46
|
+
alternative(node)
|
|
47
|
+
when AST::Symbol
|
|
48
|
+
symbol(node)
|
|
49
|
+
when AST::Action
|
|
50
|
+
action(node)
|
|
51
|
+
else
|
|
52
|
+
{ type: node.class.name }
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
private
|
|
57
|
+
|
|
58
|
+
def grammar_file(node)
|
|
59
|
+
{
|
|
60
|
+
type: "GrammarFile",
|
|
61
|
+
prologue: ast(node.prologue),
|
|
62
|
+
declarations: node.declarations.map { |declaration| ast(declaration) },
|
|
63
|
+
rules: node.rules.map { |rule| ast(rule) },
|
|
64
|
+
epilogue: ast(node.epilogue),
|
|
65
|
+
location: location(node.location)
|
|
66
|
+
}
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def code_node(node)
|
|
70
|
+
{
|
|
71
|
+
type: node_type(node),
|
|
72
|
+
code: node.code,
|
|
73
|
+
location: location(node.location)
|
|
74
|
+
}
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def token_declaration(node)
|
|
78
|
+
{
|
|
79
|
+
type: "TokenDeclaration",
|
|
80
|
+
names: node.names,
|
|
81
|
+
type_tag: node.type_tag,
|
|
82
|
+
location: location(node.location)
|
|
83
|
+
}
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def type_declaration(node)
|
|
87
|
+
{
|
|
88
|
+
type: "TypeDeclaration",
|
|
89
|
+
names: node.names,
|
|
90
|
+
type_tag: node.type_tag,
|
|
91
|
+
location: location(node.location)
|
|
92
|
+
}
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def precedence_declaration(node)
|
|
96
|
+
{
|
|
97
|
+
type: "PrecedenceDeclaration",
|
|
98
|
+
associativity: node.associativity,
|
|
99
|
+
tokens: node.tokens,
|
|
100
|
+
location: location(node.location)
|
|
101
|
+
}
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def start_declaration(node)
|
|
105
|
+
{
|
|
106
|
+
type: "StartDeclaration",
|
|
107
|
+
symbol: node.symbol,
|
|
108
|
+
location: location(node.location)
|
|
109
|
+
}
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def union_declaration(node)
|
|
113
|
+
{
|
|
114
|
+
type: "UnionDeclaration",
|
|
115
|
+
body: node.body,
|
|
116
|
+
location: location(node.location)
|
|
117
|
+
}
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def unknown_declaration(node)
|
|
121
|
+
{
|
|
122
|
+
type: "UnknownDeclaration",
|
|
123
|
+
source: node.source,
|
|
124
|
+
location: location(node.location)
|
|
125
|
+
}
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def rule(node)
|
|
129
|
+
{
|
|
130
|
+
type: "Rule",
|
|
131
|
+
name: node.name,
|
|
132
|
+
alternatives: node.alternatives.map { |alternative| ast(alternative) },
|
|
133
|
+
location: location(node.location)
|
|
134
|
+
}
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def parameterized_rule(node)
|
|
138
|
+
{
|
|
139
|
+
type: "ParameterizedRule",
|
|
140
|
+
name: node.name,
|
|
141
|
+
parameters: node.parameters,
|
|
142
|
+
alternatives: node.alternatives.map { |alternative| ast(alternative) },
|
|
143
|
+
location: location(node.location)
|
|
144
|
+
}
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def inline_rule(node)
|
|
148
|
+
{
|
|
149
|
+
type: "InlineRule",
|
|
150
|
+
rule: node.rule,
|
|
151
|
+
parameters: node.parameters,
|
|
152
|
+
alternatives: node.alternatives.map { |alternative| ast(alternative) },
|
|
153
|
+
location: location(node.location)
|
|
154
|
+
}
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def alternative(node)
|
|
158
|
+
{
|
|
159
|
+
type: "Alternative",
|
|
160
|
+
symbols: node.symbols.map { |symbol| ast(symbol) },
|
|
161
|
+
action: ast(node.action),
|
|
162
|
+
prec: node.prec,
|
|
163
|
+
explicit_empty: node.explicit_empty,
|
|
164
|
+
empty_marker: node.empty_marker,
|
|
165
|
+
location: location(node.location)
|
|
166
|
+
}
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def symbol(node)
|
|
170
|
+
{
|
|
171
|
+
type: "Symbol",
|
|
172
|
+
name: node.name,
|
|
173
|
+
kind: node.kind,
|
|
174
|
+
alias_name: node.alias_name,
|
|
175
|
+
arguments: node.arguments&.map { |argument| ast(argument) },
|
|
176
|
+
location: location(node.location)
|
|
177
|
+
}
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def action(node)
|
|
181
|
+
{
|
|
182
|
+
type: "Action",
|
|
183
|
+
code: node.code,
|
|
184
|
+
location: location(node.location)
|
|
185
|
+
}
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def node_type(node)
|
|
189
|
+
node.class.name.split("::").last
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def location(location)
|
|
193
|
+
return nil unless location
|
|
194
|
+
|
|
195
|
+
{
|
|
196
|
+
file: location.file,
|
|
197
|
+
line: location.line,
|
|
198
|
+
column: location.column,
|
|
199
|
+
length: location.length
|
|
200
|
+
}
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
end
|