collie 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +12 -0
  3. data/Gemfile +10 -0
  4. data/LICENSE.txt +21 -0
  5. data/README.md +333 -0
  6. data/Rakefile +9 -0
  7. data/collie.gemspec +37 -0
  8. data/docs/TUTORIAL.md +588 -0
  9. data/docs/index.html +56 -0
  10. data/docs/playground/README.md +134 -0
  11. data/docs/playground/build-collie-bundle.rb +85 -0
  12. data/docs/playground/css/styles.css +402 -0
  13. data/docs/playground/index.html +146 -0
  14. data/docs/playground/js/app.js +231 -0
  15. data/docs/playground/js/collie-bridge.js +186 -0
  16. data/docs/playground/js/editor.js +129 -0
  17. data/docs/playground/js/examples.js +80 -0
  18. data/docs/playground/js/ruby-runner.js +75 -0
  19. data/docs/playground/test-server.sh +18 -0
  20. data/exe/collie +15 -0
  21. data/lib/collie/analyzer/conflict.rb +114 -0
  22. data/lib/collie/analyzer/reachability.rb +83 -0
  23. data/lib/collie/analyzer/recursion.rb +96 -0
  24. data/lib/collie/analyzer/symbol_table.rb +67 -0
  25. data/lib/collie/ast.rb +183 -0
  26. data/lib/collie/cli.rb +249 -0
  27. data/lib/collie/config.rb +91 -0
  28. data/lib/collie/formatter/formatter.rb +196 -0
  29. data/lib/collie/formatter/options.rb +23 -0
  30. data/lib/collie/linter/base.rb +62 -0
  31. data/lib/collie/linter/registry.rb +34 -0
  32. data/lib/collie/linter/rules/ambiguous_precedence.rb +87 -0
  33. data/lib/collie/linter/rules/circular_reference.rb +89 -0
  34. data/lib/collie/linter/rules/consistent_tag_naming.rb +69 -0
  35. data/lib/collie/linter/rules/duplicate_token.rb +38 -0
  36. data/lib/collie/linter/rules/empty_action.rb +52 -0
  37. data/lib/collie/linter/rules/factorizable_rules.rb +67 -0
  38. data/lib/collie/linter/rules/left_recursion.rb +34 -0
  39. data/lib/collie/linter/rules/long_rule.rb +37 -0
  40. data/lib/collie/linter/rules/missing_start_symbol.rb +38 -0
  41. data/lib/collie/linter/rules/nonterminal_naming.rb +34 -0
  42. data/lib/collie/linter/rules/prec_improvement.rb +54 -0
  43. data/lib/collie/linter/rules/redundant_epsilon.rb +44 -0
  44. data/lib/collie/linter/rules/right_recursion.rb +35 -0
  45. data/lib/collie/linter/rules/token_naming.rb +39 -0
  46. data/lib/collie/linter/rules/trailing_whitespace.rb +46 -0
  47. data/lib/collie/linter/rules/undefined_symbol.rb +55 -0
  48. data/lib/collie/linter/rules/unreachable_rule.rb +49 -0
  49. data/lib/collie/linter/rules/unused_nonterminal.rb +93 -0
  50. data/lib/collie/linter/rules/unused_token.rb +82 -0
  51. data/lib/collie/parser/lexer.rb +349 -0
  52. data/lib/collie/parser/parser.rb +416 -0
  53. data/lib/collie/reporter/github.rb +35 -0
  54. data/lib/collie/reporter/json.rb +52 -0
  55. data/lib/collie/reporter/text.rb +97 -0
  56. data/lib/collie/version.rb +5 -0
  57. data/lib/collie.rb +52 -0
  58. metadata +145 -0
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Collie
4
+ module Linter
5
+ module Rules
6
+ # Suggests improvements for %prec usage
7
+ class PrecImprovement < Base
8
+ self.rule_name = "PrecImprovement"
9
+ self.description = "Suggests improvements for %prec directive usage"
10
+ self.severity = :info
11
+ self.autocorrectable = false
12
+
13
+ def check(ast, _context = {})
14
+ precedence_tokens = collect_precedence_tokens(ast)
15
+
16
+ ast.rules.each do |rule|
17
+ check_rule(rule, precedence_tokens)
18
+ end
19
+
20
+ @offenses
21
+ end
22
+
23
+ private
24
+
25
+ def collect_precedence_tokens(ast)
26
+ tokens = []
27
+ ast.declarations.each do |decl|
28
+ next unless decl.is_a?(AST::PrecedenceDeclaration)
29
+
30
+ tokens.concat(decl.tokens)
31
+ end
32
+ tokens
33
+ end
34
+
35
+ def check_rule(rule, precedence_tokens)
36
+ rule.alternatives.each do |alt|
37
+ next unless alt.prec
38
+
39
+ # Check if the %prec token has a precedence declaration
40
+ next if precedence_tokens.include?(alt.prec)
41
+
42
+ add_offense(
43
+ alt,
44
+ message: "%%prec token '#{alt.prec}' is not declared in precedence directives. " \
45
+ "Consider adding it to %left, %right, or %nonassoc."
46
+ )
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
53
+
54
+ Collie::Linter::Registry.register(Collie::Linter::Rules::PrecImprovement)
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Collie
4
+ module Linter
5
+ module Rules
6
+ # Detects potentially redundant epsilon productions
7
+ class RedundantEpsilon < Base
8
+ self.rule_name = "RedundantEpsilon"
9
+ self.description = "Detects potentially redundant epsilon (empty) productions"
10
+ self.severity = :info
11
+ self.autocorrectable = false
12
+
13
+ def check(ast, _context = {})
14
+ ast.rules.each do |rule|
15
+ check_rule(rule)
16
+ end
17
+
18
+ @offenses
19
+ end
20
+
21
+ private
22
+
23
+ def check_rule(rule)
24
+ epsilon_alternatives = rule.alternatives.select { |alt| alt.symbols.empty? }
25
+ return if epsilon_alternatives.empty?
26
+
27
+ # Only report if there are other non-epsilon alternatives
28
+ non_epsilon_alternatives = rule.alternatives.reject { |alt| alt.symbols.empty? }
29
+ return if non_epsilon_alternatives.empty?
30
+
31
+ epsilon_alternatives.each do |alt|
32
+ add_offense(
33
+ alt,
34
+ message: "Rule '#{rule.name}' has an epsilon production. " \
35
+ "Verify if it's necessary or if the rule can be made optional elsewhere."
36
+ )
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+
44
+ Collie::Linter::Registry.register(Collie::Linter::Rules::RedundantEpsilon)
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Collie
4
+ module Linter
5
+ module Rules
6
+ # Detects right recursion and suggests left recursion conversion
7
+ class RightRecursion < Base
8
+ self.rule_name = "RightRecursion"
9
+ self.description = "Detects right recursion (consider converting to left recursion for LR parsers)"
10
+ self.severity = :warning
11
+ self.autocorrectable = false
12
+
13
+ def check(ast, _context = {})
14
+ analyzer = Analyzer::Recursion.new(ast)
15
+ result = analyzer.analyze
16
+
17
+ result[:right_recursive].each do |rule_name|
18
+ rule = ast.rules.find { |r| r.name == rule_name }
19
+ next unless rule
20
+
21
+ add_offense(
22
+ rule,
23
+ message: "Rule '#{rule_name}' uses right recursion " \
24
+ "(consider left recursion for better LR parser performance)"
25
+ )
26
+ end
27
+
28
+ @offenses
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+
35
+ Collie::Linter::Registry.register(Collie::Linter::Rules::RightRecursion)
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../base"
4
+
5
+ module Collie
6
+ module Linter
7
+ module Rules
8
+ # Checks token naming conventions
9
+ class TokenNaming < Base
10
+ self.rule_name = "TokenNaming"
11
+ self.description = "Tokens should follow UPPER_CASE naming convention"
12
+ self.severity = :convention
13
+ self.autocorrectable = false
14
+
15
+ DEFAULT_PATTERN = /^[A-Z][A-Z0-9_]*$/
16
+
17
+ def check(ast, _context = {})
18
+ pattern = @config[:pattern] ? Regexp.new(@config[:pattern]) : DEFAULT_PATTERN
19
+
20
+ ast.declarations.each do |decl|
21
+ next unless decl.is_a?(AST::TokenDeclaration)
22
+
23
+ decl.names.each do |name|
24
+ next if name.match?(pattern)
25
+ next if name.start_with?('"', "'") # Skip literals
26
+
27
+ add_offense(decl,
28
+ message: "Token '#{name}' should match pattern #{pattern.inspect}")
29
+ end
30
+ end
31
+
32
+ @offenses
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+
39
+ Collie::Linter::Registry.register(Collie::Linter::Rules::TokenNaming)
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Collie
4
+ module Linter
5
+ module Rules
6
+ # Detects trailing whitespace in source code
7
+ class TrailingWhitespace < Base
8
+ self.rule_name = "TrailingWhitespace"
9
+ self.description = "Detects trailing whitespace at the end of lines"
10
+ self.severity = :convention
11
+ self.autocorrectable = true
12
+
13
+ # Simple node class for holding location
14
+ Node = Struct.new(:location)
15
+
16
+ def check(_ast, context = {})
17
+ source = context[:source]
18
+ return @offenses unless source
19
+
20
+ source.lines.each_with_index do |line, index|
21
+ line_number = index + 1
22
+ next unless line.match?(/[ \t]+\n$|[ \t]+$/)
23
+
24
+ location = AST::Location.new(
25
+ file: context[:file] || "grammar",
26
+ line: line_number,
27
+ column: line.rstrip.length + 1
28
+ )
29
+
30
+ add_offense(
31
+ Node.new(location),
32
+ message: "Trailing whitespace detected",
33
+ autocorrect: lambda {
34
+ context[:source] = source.gsub(/[ \t]+\n/, "\n").gsub(/[ \t]+$/, "")
35
+ }
36
+ )
37
+ end
38
+
39
+ @offenses
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+
46
+ Collie::Linter::Registry.register(Collie::Linter::Rules::TrailingWhitespace)
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../base"
4
+
5
+ module Collie
6
+ module Linter
7
+ module Rules
8
+ # Detects references to undeclared tokens or nonterminals
9
+ class UndefinedSymbol < Base
10
+ self.rule_name = "UndefinedSymbol"
11
+ self.description = "Detects references to undeclared tokens or nonterminals"
12
+ self.severity = :error
13
+ self.autocorrectable = false
14
+
15
+ def check(ast, context = {})
16
+ symbol_table = context[:symbol_table] || build_symbol_table(ast)
17
+
18
+ ast.rules.each do |rule|
19
+ rule.alternatives.each do |alt|
20
+ alt.symbols.each do |symbol|
21
+ next if symbol_table.declared?(symbol.name)
22
+
23
+ add_offense(symbol,
24
+ message: "Undefined symbol '#{symbol.name}'")
25
+ end
26
+ end
27
+ end
28
+
29
+ @offenses
30
+ end
31
+
32
+ private
33
+
34
+ def build_symbol_table(ast)
35
+ table = Analyzer::SymbolTable.new
36
+
37
+ ast.declarations.each do |decl|
38
+ case decl
39
+ when AST::TokenDeclaration
40
+ decl.names.each { |name| table.add_token(name, type_tag: decl.type_tag, location: decl.location) }
41
+ end
42
+ end
43
+
44
+ ast.rules.each do |rule|
45
+ table.add_nonterminal(rule.name, location: rule.location)
46
+ end
47
+
48
+ table
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
54
+
55
+ Collie::Linter::Registry.register(Collie::Linter::Rules::UndefinedSymbol)
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../base"
4
+
5
+ module Collie
6
+ module Linter
7
+ module Rules
8
+ # Detects rules that are not reachable from the start symbol
9
+ class UnreachableRule < Base
10
+ self.rule_name = "UnreachableRule"
11
+ self.description = "Detects rules that are not reachable from the start symbol"
12
+ self.severity = :warning
13
+ self.autocorrectable = false
14
+
15
+ def check(ast, _context = {})
16
+ return @offenses if ast.rules.empty?
17
+
18
+ analyzer = Analyzer::Reachability.new(ast)
19
+ start_symbol = find_start_symbol(ast)
20
+ analyzer.analyze(start_symbol)
21
+
22
+ unreachable = analyzer.unreachable_rules
23
+
24
+ unreachable.each do |rule_name|
25
+ rule = ast.rules.find { |r| r.name == rule_name }
26
+ next unless rule
27
+
28
+ add_offense(rule,
29
+ message: "Rule '#{rule_name}' is not reachable from start symbol '#{start_symbol}'")
30
+ end
31
+
32
+ @offenses
33
+ end
34
+
35
+ private
36
+
37
+ def find_start_symbol(ast)
38
+ start_decl = ast.declarations.find { |d| d.is_a?(AST::StartDeclaration) }
39
+ return start_decl.symbol if start_decl
40
+
41
+ # Default to first rule
42
+ ast.rules.first&.name
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+
49
+ Collie::Linter::Registry.register(Collie::Linter::Rules::UnreachableRule)
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../base"
4
+
5
+ module Collie
6
+ module Linter
7
+ module Rules
8
+ # Detects nonterminals that are defined but never referenced
9
+ class UnusedNonterminal < Base
10
+ self.rule_name = "UnusedNonterminal"
11
+ self.description = "Detects nonterminals that are defined but never referenced"
12
+ self.severity = :warning
13
+ self.autocorrectable = false
14
+
15
+ def check(ast, _context = {})
16
+ symbol_table = Analyzer::SymbolTable.new
17
+
18
+ # Register all nonterminals
19
+ ast.rules.each do |rule|
20
+ symbol_table.add_nonterminal(rule.name, location: rule.location)
21
+ end
22
+
23
+ # Find start symbol
24
+ start_symbol = find_start_symbol(ast)
25
+
26
+ # Track nonterminal usage in normal rules
27
+ ast.rules.each do |rule|
28
+ rule.alternatives.each do |alt|
29
+ alt.symbols.each do |symbol|
30
+ if symbol.nonterminal?
31
+ symbol_table.use_nonterminal(symbol.name)
32
+ # Also consider parameterized rule call arguments: list(expr)
33
+ if symbol.arguments
34
+ symbol.arguments.each do |arg|
35
+ symbol_table.use_nonterminal(arg.name) if arg.nonterminal?
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+
43
+ # Track nonterminal usage in parameterized rules (%rule)
44
+ ast.declarations.each do |decl|
45
+ next unless decl.is_a?(AST::ParameterizedRule)
46
+
47
+ decl.alternatives.each do |alt|
48
+ alt.symbols.each do |symbol|
49
+ if symbol.nonterminal?
50
+ symbol_table.use_nonterminal(symbol.name)
51
+ if symbol.arguments
52
+ symbol.arguments.each do |arg|
53
+ symbol_table.use_nonterminal(arg.name) if arg.nonterminal?
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+
61
+ # Mark start symbol as used
62
+ symbol_table.use_nonterminal(start_symbol) if start_symbol
63
+
64
+ # Find unused nonterminals
65
+ symbol_table.unused_nonterminals.each do |nonterminal_name|
66
+ # Skip start symbol
67
+ next if nonterminal_name == start_symbol
68
+
69
+ rule = ast.rules.find { |r| r.name == nonterminal_name }
70
+ next unless rule
71
+
72
+ add_offense(rule,
73
+ message: "Nonterminal '#{nonterminal_name}' is defined but never used")
74
+ end
75
+
76
+ @offenses
77
+ end
78
+
79
+ private
80
+
81
+ def find_start_symbol(ast)
82
+ start_decl = ast.declarations.find { |d| d.is_a?(AST::StartDeclaration) }
83
+ return start_decl.symbol if start_decl
84
+
85
+ # Default to first rule
86
+ ast.rules.first&.name
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
92
+
93
+ Collie::Linter::Registry.register(Collie::Linter::Rules::UnusedNonterminal)
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../base"
4
+
5
+ module Collie
6
+ module Linter
7
+ module Rules
8
+ # Detects tokens that are declared but never used
9
+ class UnusedToken < Base
10
+ self.rule_name = "UnusedToken"
11
+ self.description = "Detects tokens that are declared but never used in rules"
12
+ self.severity = :warning
13
+ self.autocorrectable = false
14
+
15
+ def check(ast, context = {})
16
+ symbol_table = context[:symbol_table] || build_symbol_table(ast)
17
+
18
+ # Track token usage in normal rules
19
+ ast.rules.each do |rule|
20
+ rule.alternatives.each do |alt|
21
+ alt.symbols.each do |symbol|
22
+ symbol_table.use_token(symbol.name) if symbol.terminal?
23
+ end
24
+ end
25
+ end
26
+
27
+ # Track token usage in parameterized rules (%rule)
28
+ ast.declarations.each do |decl|
29
+ next unless decl.is_a?(AST::ParameterizedRule)
30
+
31
+ decl.alternatives.each do |alt|
32
+ alt.symbols.each do |symbol|
33
+ symbol_table.use_token(symbol.name) if symbol.terminal?
34
+ end
35
+ end
36
+ end
37
+
38
+ # Find unused tokens
39
+ symbol_table.unused_tokens.each do |token_name|
40
+ token_info = symbol_table.tokens[token_name]
41
+ add_offense_for_declaration(ast, token_name, token_info[:location])
42
+ end
43
+
44
+ @offenses
45
+ end
46
+
47
+ private
48
+
49
+ def build_symbol_table(ast)
50
+ table = Analyzer::SymbolTable.new
51
+
52
+ ast.declarations.each do |decl|
53
+ case decl
54
+ when AST::TokenDeclaration
55
+ decl.names.each do |name|
56
+ table.add_token(name, type_tag: decl.type_tag, location: decl.location)
57
+ rescue Error
58
+ # Ignore duplicates
59
+ end
60
+ end
61
+ end
62
+
63
+ table
64
+ end
65
+
66
+ def add_offense_for_declaration(ast, token_name, _location)
67
+ # Find the declaration node
68
+ decl = ast.declarations.find do |d|
69
+ d.is_a?(AST::TokenDeclaration) && d.names.include?(token_name)
70
+ end
71
+
72
+ return unless decl
73
+
74
+ add_offense(decl,
75
+ message: "Token '#{token_name}' is declared but never used")
76
+ end
77
+ end
78
+ end
79
+ end
80
+ end
81
+
82
+ Collie::Linter::Registry.register(Collie::Linter::Rules::UnusedToken)