collie 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +12 -0
  3. data/Gemfile +10 -0
  4. data/LICENSE.txt +21 -0
  5. data/README.md +333 -0
  6. data/Rakefile +9 -0
  7. data/collie.gemspec +37 -0
  8. data/docs/TUTORIAL.md +588 -0
  9. data/docs/index.html +56 -0
  10. data/docs/playground/README.md +134 -0
  11. data/docs/playground/build-collie-bundle.rb +85 -0
  12. data/docs/playground/css/styles.css +402 -0
  13. data/docs/playground/index.html +146 -0
  14. data/docs/playground/js/app.js +231 -0
  15. data/docs/playground/js/collie-bridge.js +186 -0
  16. data/docs/playground/js/editor.js +129 -0
  17. data/docs/playground/js/examples.js +80 -0
  18. data/docs/playground/js/ruby-runner.js +75 -0
  19. data/docs/playground/test-server.sh +18 -0
  20. data/exe/collie +15 -0
  21. data/lib/collie/analyzer/conflict.rb +114 -0
  22. data/lib/collie/analyzer/reachability.rb +83 -0
  23. data/lib/collie/analyzer/recursion.rb +96 -0
  24. data/lib/collie/analyzer/symbol_table.rb +67 -0
  25. data/lib/collie/ast.rb +183 -0
  26. data/lib/collie/cli.rb +249 -0
  27. data/lib/collie/config.rb +91 -0
  28. data/lib/collie/formatter/formatter.rb +196 -0
  29. data/lib/collie/formatter/options.rb +23 -0
  30. data/lib/collie/linter/base.rb +62 -0
  31. data/lib/collie/linter/registry.rb +34 -0
  32. data/lib/collie/linter/rules/ambiguous_precedence.rb +87 -0
  33. data/lib/collie/linter/rules/circular_reference.rb +89 -0
  34. data/lib/collie/linter/rules/consistent_tag_naming.rb +69 -0
  35. data/lib/collie/linter/rules/duplicate_token.rb +38 -0
  36. data/lib/collie/linter/rules/empty_action.rb +52 -0
  37. data/lib/collie/linter/rules/factorizable_rules.rb +67 -0
  38. data/lib/collie/linter/rules/left_recursion.rb +34 -0
  39. data/lib/collie/linter/rules/long_rule.rb +37 -0
  40. data/lib/collie/linter/rules/missing_start_symbol.rb +38 -0
  41. data/lib/collie/linter/rules/nonterminal_naming.rb +34 -0
  42. data/lib/collie/linter/rules/prec_improvement.rb +54 -0
  43. data/lib/collie/linter/rules/redundant_epsilon.rb +44 -0
  44. data/lib/collie/linter/rules/right_recursion.rb +35 -0
  45. data/lib/collie/linter/rules/token_naming.rb +39 -0
  46. data/lib/collie/linter/rules/trailing_whitespace.rb +46 -0
  47. data/lib/collie/linter/rules/undefined_symbol.rb +55 -0
  48. data/lib/collie/linter/rules/unreachable_rule.rb +49 -0
  49. data/lib/collie/linter/rules/unused_nonterminal.rb +93 -0
  50. data/lib/collie/linter/rules/unused_token.rb +82 -0
  51. data/lib/collie/parser/lexer.rb +349 -0
  52. data/lib/collie/parser/parser.rb +416 -0
  53. data/lib/collie/reporter/github.rb +35 -0
  54. data/lib/collie/reporter/json.rb +52 -0
  55. data/lib/collie/reporter/text.rb +97 -0
  56. data/lib/collie/version.rb +5 -0
  57. data/lib/collie.rb +52 -0
  58. metadata +145 -0
@@ -0,0 +1,80 @@
1
+ // Example grammar files for the playground
2
+
3
+ const EXAMPLES = {
4
+ simple: {
5
+ name: 'Simple Calculator',
6
+ code: `%token NUMBER
7
+ %token PLUS MINUS TIMES DIVIDE
8
+ %token LPAREN RPAREN
9
+
10
+ %left PLUS MINUS
11
+ %left TIMES DIVIDE
12
+
13
+ %%
14
+
15
+ program
16
+ : expr
17
+ ;
18
+
19
+ expr
20
+ : expr PLUS expr { $$ = $1 + $3; }
21
+ | expr MINUS expr { $$ = $1 - $3; }
22
+ | expr TIMES expr { $$ = $1 * $3; }
23
+ | expr DIVIDE expr { $$ = $1 / $3; }
24
+ | LPAREN expr RPAREN { $$ = $2; }
25
+ | NUMBER { $$ = $1; }
26
+ ;
27
+
28
+ %%
29
+ `
30
+ },
31
+
32
+ lrama: {
33
+ name: 'Lrama Features Demo',
34
+ code: `%token NUMBER IDENTIFIER
35
+ %token LPAREN RPAREN COMMA
36
+
37
+ %rule pair(X, Y): X COMMA Y ;
38
+ %rule list(X): X | list(X) COMMA X ;
39
+
40
+ %%
41
+
42
+ program
43
+ : function_call
44
+ ;
45
+
46
+ function_call
47
+ : IDENTIFIER[func] LPAREN argument_list RPAREN
48
+ { call_function($func, $3); }
49
+ ;
50
+
51
+ argument_list
52
+ : list(expr)
53
+ | /* empty */ { $$ = empty_list(); }
54
+ ;
55
+
56
+ expr
57
+ : NUMBER[n] { $$ = make_number($n); }
58
+ | IDENTIFIER[id] { $$ = make_variable($id); }
59
+ | pair(NUMBER, NUMBER)
60
+ ;
61
+
62
+ %%
63
+ `
64
+ },
65
+
66
+ invalid: {
67
+ name: 'Invalid Grammar (Demo)',
68
+ code: `%token NUMBER
69
+ %token NUMBER
70
+
71
+ %%
72
+
73
+ expr: UNDEFINED_TOKEN ;
74
+
75
+ unused_rule: NUMBER ;
76
+
77
+ %%
78
+ `
79
+ }
80
+ };
@@ -0,0 +1,75 @@
1
+ // Ruby.wasm runner for the playground
2
+
3
+ class RubyRunner {
4
+ constructor() {
5
+ this.vm = null;
6
+ this.isReady = false;
7
+ }
8
+
9
+ async initialize() {
10
+ try {
11
+ await this.waitForRubyWasm();
12
+
13
+ if (typeof window.rubyWasm === 'object' && window.rubyWasm.eval) {
14
+ this.vm = window.rubyWasm;
15
+ } else if (window.rubyWasm && window.rubyWasm.DefaultRubyVM) {
16
+ const { DefaultRubyVM } = window.rubyWasm;
17
+ const response = await fetch(
18
+ 'https://cdn.jsdelivr.net/npm/@ruby/3.3-wasm-wasi@2.6.2/dist/ruby+stdlib.wasm'
19
+ );
20
+ const module = await WebAssembly.compileStreaming(response);
21
+ const { vm } = await DefaultRubyVM(module);
22
+ this.vm = vm;
23
+ } else {
24
+ throw new Error('Unexpected Ruby.wasm API structure');
25
+ }
26
+
27
+ await this.loadCollieBundle();
28
+
29
+ this.isReady = true;
30
+ return true;
31
+ } catch (error) {
32
+ console.error('Failed to initialize Ruby.wasm:', error);
33
+ throw error;
34
+ }
35
+ }
36
+
37
+ async waitForRubyWasm() {
38
+ const maxAttempts = 50;
39
+ let attempts = 0;
40
+ const possibleNames = ['rubyVM', 'rubyWasm', 'RubyWasm', 'ruby', 'Ruby'];
41
+
42
+ while (attempts < maxAttempts) {
43
+ for (const name of possibleNames) {
44
+ if (window[name]) {
45
+ window.rubyWasm = window[name];
46
+ return;
47
+ }
48
+ }
49
+
50
+ await new Promise(resolve => setTimeout(resolve, 100));
51
+ attempts++;
52
+ }
53
+
54
+ throw new Error('Ruby.wasm failed to load. Please refresh the page.');
55
+ }
56
+
57
+ async loadCollieBundle() {
58
+ try {
59
+ const response = await fetch(`collie-bundle.rb?v=${Date.now()}`);
60
+ const code = await response.text();
61
+ await this.eval(code);
62
+ } catch (error) {
63
+ console.error('Failed to load Collie bundle:', error);
64
+ throw error;
65
+ }
66
+ }
67
+
68
+ async eval(code) {
69
+ if (!this.isReady && !code.includes('module Collie')) {
70
+ throw new Error('Ruby VM is not ready');
71
+ }
72
+
73
+ return this.vm.eval(code);
74
+ }
75
+ }
@@ -0,0 +1,18 @@
1
+ #!/bin/bash
2
+ # Simple HTTP server for testing the playground locally
3
+
4
+ cd "$(dirname "$0")"
5
+ echo "Starting HTTP server at http://localhost:8000"
6
+ echo "Open http://localhost:8000 in your browser"
7
+ echo "Press Ctrl+C to stop"
8
+ echo ""
9
+
10
+ # Try Python 3 first, then Python 2
11
+ if command -v python3 &> /dev/null; then
12
+ python3 -m http.server 8000
13
+ elif command -v python &> /dev/null; then
14
+ python -m SimpleHTTPServer 8000
15
+ else
16
+ echo "Error: Python not found. Please install Python to run the test server."
17
+ exit 1
18
+ fi
data/exe/collie ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require_relative "../lib/collie"
5
+
6
+ begin
7
+ Collie::CLI.start(ARGV)
8
+ rescue Interrupt
9
+ warn "\nInterrupted"
10
+ exit 130
11
+ rescue StandardError => e
12
+ warn "Error: #{e.message}"
13
+ warn e.backtrace.join("\n") if ENV["DEBUG"]
14
+ exit 1
15
+ end
@@ -0,0 +1,114 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../ast"
4
+
5
+ module Collie
6
+ module Analyzer
7
+ # Conflict detection helpers for grammar analysis
8
+ class Conflict
9
+ def initialize(grammar, symbol_table)
10
+ @grammar = grammar
11
+ @symbol_table = symbol_table
12
+ @precedence_map = {}
13
+ end
14
+
15
+ def analyze
16
+ build_precedence_map
17
+ {
18
+ potential_shift_reduce: detect_shift_reduce_conflicts,
19
+ potential_reduce_reduce: detect_reduce_reduce_conflicts,
20
+ ambiguous_precedence: detect_ambiguous_precedence
21
+ }
22
+ end
23
+
24
+ private
25
+
26
+ def build_precedence_map
27
+ precedence_level = 0
28
+ @grammar.declarations.each do |decl|
29
+ next unless decl.is_a?(AST::PrecedenceDeclaration)
30
+
31
+ precedence_level += 1
32
+ decl.tokens.each do |token|
33
+ @precedence_map[token] = {
34
+ level: precedence_level,
35
+ associativity: decl.associativity
36
+ }
37
+ end
38
+ end
39
+ end
40
+
41
+ def detect_shift_reduce_conflicts
42
+ conflicts = []
43
+
44
+ @grammar.rules.each do |rule|
45
+ rule.alternatives.each_with_index do |alt, alt_idx|
46
+ alt.symbols.each_with_index do |symbol, sym_idx|
47
+ next unless symbol.terminal?
48
+ next if sym_idx == alt.symbols.length - 1
49
+
50
+ # Check if this could cause a shift-reduce conflict
51
+ next_symbol = alt.symbols[sym_idx + 1]
52
+ next unless next_symbol.nonterminal? && !has_precedence?(symbol.name)
53
+
54
+ conflicts << {
55
+ rule: rule.name,
56
+ alternative: alt_idx,
57
+ symbol: symbol.name,
58
+ location: symbol.location
59
+ }
60
+ end
61
+ end
62
+ end
63
+
64
+ conflicts
65
+ end
66
+
67
+ def detect_reduce_reduce_conflicts
68
+ conflicts = []
69
+ rule_groups = @grammar.rules.group_by { |r| r.alternatives.map { |a| a.symbols.map(&:name) } }
70
+
71
+ rule_groups.each_value do |rules|
72
+ next if rules.length <= 1
73
+
74
+ conflicts << {
75
+ rules: rules.map(&:name),
76
+ location: rules.first.location
77
+ }
78
+ end
79
+
80
+ conflicts
81
+ end
82
+
83
+ def detect_ambiguous_precedence
84
+ ambiguous = []
85
+
86
+ @grammar.rules.each do |rule|
87
+ rule.alternatives.each do |alt|
88
+ operators = alt.symbols.select { |s| s.terminal? && operator?(s.name) }
89
+ next if operators.empty?
90
+
91
+ operators_without_prec = operators.reject { |op| has_precedence?(op.name) }
92
+ next if operators_without_prec.empty?
93
+
94
+ ambiguous << {
95
+ rule: rule.name,
96
+ operators: operators_without_prec.map(&:name),
97
+ location: rule.location
98
+ }
99
+ end
100
+ end
101
+
102
+ ambiguous
103
+ end
104
+
105
+ def has_precedence?(token)
106
+ @precedence_map.key?(token)
107
+ end
108
+
109
+ def operator?(token)
110
+ token.match?(%r{^[+\-*/%^<>=!&|]+$})
111
+ end
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../ast"
4
+
5
+ module Collie
6
+ module Analyzer
7
+ # Reachability analysis for grammar rules
8
+ class Reachability
9
+ def initialize(grammar)
10
+ @grammar = grammar
11
+ @reachable = Set.new
12
+ @dependencies = Hash.new { |h, k| h[k] = Set.new }
13
+ end
14
+
15
+ def analyze(start_symbol = nil)
16
+ build_dependency_graph
17
+ start = start_symbol || infer_start_symbol
18
+ mark_reachable(start) if start
19
+ @reachable
20
+ end
21
+
22
+ def unreachable_rules
23
+ all_rules = @grammar.rules.to_set(&:name)
24
+ all_rules - @reachable
25
+ end
26
+
27
+ private
28
+
29
+ def build_dependency_graph
30
+ # Process normal rules
31
+ @grammar.rules.each do |rule|
32
+ rule.alternatives.each do |alt|
33
+ alt.symbols.each do |symbol|
34
+ if symbol.nonterminal?
35
+ @dependencies[rule.name] << symbol.name
36
+ # Also consider parameterized rule call arguments: list(expr)
37
+ if symbol.arguments
38
+ symbol.arguments.each do |arg|
39
+ @dependencies[rule.name] << arg.name if arg.nonterminal?
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+
47
+ # Process parameterized rules (%rule)
48
+ @grammar.declarations.each do |decl|
49
+ next unless decl.is_a?(AST::ParameterizedRule)
50
+
51
+ decl.alternatives.each do |alt|
52
+ alt.symbols.each do |symbol|
53
+ if symbol.nonterminal?
54
+ @dependencies[decl.name] << symbol.name
55
+ if symbol.arguments
56
+ symbol.arguments.each do |arg|
57
+ @dependencies[decl.name] << arg.name if arg.nonterminal?
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
65
+
66
+ def infer_start_symbol
67
+ # Find start symbol from %start declaration
68
+ start_decl = @grammar.declarations.find { |d| d.is_a?(AST::StartDeclaration) }
69
+ return start_decl.symbol if start_decl
70
+
71
+ # Otherwise, use the first rule
72
+ @grammar.rules.first&.name
73
+ end
74
+
75
+ def mark_reachable(symbol)
76
+ return if @reachable.include?(symbol)
77
+
78
+ @reachable << symbol
79
+ @dependencies[symbol].each { |dep| mark_reachable(dep) }
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../ast"
4
+
5
+ module Collie
6
+ module Analyzer
7
+ # Recursion analysis for grammar rules
8
+ class Recursion
9
+ def initialize(grammar)
10
+ @grammar = grammar
11
+ @left_recursive = []
12
+ @right_recursive = []
13
+ end
14
+
15
+ def analyze
16
+ @grammar.rules.each do |rule|
17
+ check_left_recursion(rule)
18
+ check_right_recursion(rule)
19
+ end
20
+
21
+ {
22
+ left_recursive: @left_recursive,
23
+ right_recursive: @right_recursive
24
+ }
25
+ end
26
+
27
+ def left_recursive?(rule_name)
28
+ @left_recursive.include?(rule_name)
29
+ end
30
+
31
+ def right_recursive?(rule_name)
32
+ @right_recursive.include?(rule_name)
33
+ end
34
+
35
+ private
36
+
37
+ def check_left_recursion(rule)
38
+ rule.alternatives.each do |alt|
39
+ next if alt.symbols.empty?
40
+
41
+ first_symbol = alt.symbols.first
42
+ if first_symbol.nonterminal? && first_symbol.name == rule.name && !@left_recursive.include?(rule.name)
43
+ @left_recursive << rule.name
44
+ end
45
+ end
46
+
47
+ # Check for indirect left recursion
48
+ check_indirect_left_recursion(rule)
49
+ end
50
+
51
+ def check_right_recursion(rule)
52
+ rule.alternatives.each do |alt|
53
+ next if alt.symbols.empty?
54
+
55
+ last_symbol = alt.symbols.last
56
+ if last_symbol.nonterminal? && last_symbol.name == rule.name && !@right_recursive.include?(rule.name)
57
+ @right_recursive << rule.name
58
+ end
59
+ end
60
+ end
61
+
62
+ def check_indirect_left_recursion(rule, visited = Set.new)
63
+ return if visited.include?(rule.name)
64
+
65
+ visited << rule.name
66
+
67
+ rule.alternatives.each do |alt|
68
+ check_alternative_for_indirect_recursion(alt, rule.name)
69
+ end
70
+ end
71
+
72
+ def check_alternative_for_indirect_recursion(alt, rule_name)
73
+ return if alt.symbols.empty?
74
+
75
+ first_symbol = alt.symbols.first
76
+ return unless first_symbol.nonterminal?
77
+
78
+ dependent_rule = @grammar.rules.find { |r| r.name == first_symbol.name }
79
+ return unless dependent_rule
80
+
81
+ check_dependent_rule_for_recursion(dependent_rule, rule_name)
82
+ end
83
+
84
+ def check_dependent_rule_for_recursion(dependent_rule, rule_name)
85
+ dependent_rule.alternatives.each do |dep_alt|
86
+ next if dep_alt.symbols.empty?
87
+ next unless dep_alt.symbols.first.nonterminal?
88
+ next unless dep_alt.symbols.first.name == rule_name
89
+ next if @left_recursive.include?(rule_name)
90
+
91
+ @left_recursive << rule_name
92
+ end
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../ast"
4
+
5
+ module Collie
6
+ module Analyzer
7
+ # Symbol table for tracking declared tokens and nonterminals
8
+ class SymbolTable
9
+ attr_reader :tokens, :nonterminals, :types
10
+
11
+ def initialize
12
+ @tokens = {} # name => {type_tag:, location:, usage_count:}
13
+ @nonterminals = {} # name => {location:, usage_count:}
14
+ @types = {} # type_tag => [names]
15
+ end
16
+
17
+ def add_token(name, type_tag: nil, location: nil)
18
+ raise Error, "Token '#{name}' already declared at #{@tokens[name][:location]}" if @tokens.key?(name)
19
+
20
+ @tokens[name] = { type_tag: type_tag, location: location, usage_count: 0 }
21
+ (@types[type_tag] ||= []) << name if type_tag
22
+ end
23
+
24
+ def add_nonterminal(name, location: nil)
25
+ return if @nonterminals.key?(name)
26
+
27
+ @nonterminals[name] = { location: location, usage_count: 0 }
28
+ end
29
+
30
+ def use_token(name)
31
+ return unless @tokens.key?(name)
32
+
33
+ @tokens[name][:usage_count] += 1
34
+ end
35
+
36
+ def use_nonterminal(name)
37
+ return unless @nonterminals.key?(name)
38
+
39
+ @nonterminals[name][:usage_count] += 1
40
+ end
41
+
42
+ def token?(name)
43
+ @tokens.key?(name)
44
+ end
45
+
46
+ def nonterminal?(name)
47
+ @nonterminals.key?(name)
48
+ end
49
+
50
+ def declared?(name)
51
+ token?(name) || nonterminal?(name)
52
+ end
53
+
54
+ def unused_tokens
55
+ @tokens.select { |_name, info| info[:usage_count].zero? }.keys
56
+ end
57
+
58
+ def unused_nonterminals
59
+ @nonterminals.select { |_name, info| info[:usage_count].zero? }.keys
60
+ end
61
+
62
+ def duplicate_symbols
63
+ @tokens.keys & @nonterminals.keys
64
+ end
65
+ end
66
+ end
67
+ end