collie 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +28 -1
  3. data/README.md +55 -258
  4. data/lib/collie/analyzer/reachability.rb +17 -20
  5. data/lib/collie/analyzer/recursion.rb +28 -9
  6. data/lib/collie/analyzer/symbol_resolver.rb +51 -0
  7. data/lib/collie/ast.rb +18 -4
  8. data/lib/collie/cli.rb +388 -50
  9. data/lib/collie/config/schema.rb +117 -0
  10. data/lib/collie/config.rb +106 -22
  11. data/lib/collie/formatter/formatter.rb +95 -50
  12. data/lib/collie/formatter/options.rb +17 -5
  13. data/lib/collie/formatter/signature.rb +72 -0
  14. data/lib/collie/linter/base.rb +49 -0
  15. data/lib/collie/linter/rules/ambiguous_precedence.rb +5 -2
  16. data/lib/collie/linter/rules/circular_reference.rb +96 -38
  17. data/lib/collie/linter/rules/consistent_tag_naming.rb +13 -13
  18. data/lib/collie/linter/rules/empty_action.rb +42 -11
  19. data/lib/collie/linter/rules/factorizable_rules.rb +2 -2
  20. data/lib/collie/linter/rules/left_recursion.rb +5 -4
  21. data/lib/collie/linter/rules/long_rule.rb +3 -3
  22. data/lib/collie/linter/rules/nonterminal_naming.rb +6 -4
  23. data/lib/collie/linter/rules/prec_improvement.rb +1 -1
  24. data/lib/collie/linter/rules/redundant_epsilon.rb +11 -11
  25. data/lib/collie/linter/rules/right_recursion.rb +4 -1
  26. data/lib/collie/linter/rules/symbol_conflict.rb +130 -0
  27. data/lib/collie/linter/rules/token_naming.rb +2 -1
  28. data/lib/collie/linter/rules/trailing_whitespace.rb +7 -1
  29. data/lib/collie/linter/rules/undefined_symbol.rb +50 -8
  30. data/lib/collie/linter/rules/unused_nonterminal.rb +36 -1
  31. data/lib/collie/linter/rules/unused_token.rb +34 -9
  32. data/lib/collie/parser/debug_serializer.rb +205 -0
  33. data/lib/collie/parser/lexer.rb +182 -11
  34. data/lib/collie/parser/parser.rb +73 -13
  35. data/lib/collie/reporter/github.rb +15 -2
  36. data/lib/collie/reporter/json.rb +4 -1
  37. data/lib/collie/reporter/sarif.rb +81 -0
  38. data/lib/collie/version.rb +1 -1
  39. data/lib/collie.rb +6 -1
  40. metadata +8 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c8b86b09838d7091834f3243ae717931e2085c16013aa17e0dff5e8569e8975b
4
- data.tar.gz: 4d09ba7a44de27ff966cf4edc0da4b9824749d719f1051d7fce6d8c39e36c97d
3
+ metadata.gz: 94fb085cbc3a181a76d745558276b76e4922ad401f5335a42d95e8ca4fcfd1b1
4
+ data.tar.gz: 4648e0cf533ddfb58a5d0d4b640951a42cb8f0c80e00ebd207757c8c8f450312
5
5
  SHA512:
6
- metadata.gz: fbf34be5a91e5a0fb0d5bf334e0bffe92ad289720b059022e9e6c050facbba9e0956ea49d4820eec03fc06e81138a4beb06a1e872031472eaffb2f55ec770f18
7
- data.tar.gz: 7d323a0aaca55514da222f2ba48f8bd436fb1139a43d83d7bac90845f06844bd29f9ab280d1859f42e54dc260d9e142af410260042546848f6f7693bf4a9bd11
6
+ metadata.gz: 80384473f720db64d99532419949bcf3b26227d03a0288b5e4dd5c27cd73ea4fdde583518ec659d80ff8a439bcd1a0058cc3a936f95487097b0413c00112d4b9
7
+ data.tar.gz: 13883b3575a3efc000fcb867a526a8288de0000512e910608e5b628541ab185355146bb37c6540c4d2bb25fca95c76786cdb5644161d38bef92fa1e23399a6d8
data/CHANGELOG.md CHANGED
@@ -7,6 +7,33 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## Unreleased
9
9
 
10
+ ## 1.0.0 - 2026-05-20
11
+
12
+ ### Added
13
+
14
+ - Added stable `collie lint`, `collie fmt`, `collie rules`, and `collie version` CLI workflows for Lrama-style grammar files.
15
+ - Added `collie explain` for rule metadata, `collie init` for generating profile-based configuration, and `collie config-schema` for editor/tooling integration.
16
+ - Added parser debug commands, `collie tokens` and `collie ast`, with JSON output.
17
+ - Added stdin support for linting and formatting with `--stdin` and `--stdin-filename`.
18
+ - Added SARIF output for code scanning, alongside text, JSON, and GitHub Actions annotation reporters.
19
+ - Added lint coverage for symbol conflicts, duplicate precedence declarations, non-productive grammar cycles, Lrama declarations, inline rule dependencies, and precedence declarations used as tokens.
20
+
21
+ ### Changed
22
+
23
+ - Expanded file target handling so `lint` and `fmt` can accept directories and glob patterns.
24
+ - Improved formatter preservation for comments, unknown directive blocks, declaration order, parameterless rule declarations, and grammar structure.
25
+ - Made formatter layout options configurable and verified formatted output by reparsing it before writing.
26
+ - Improved CLI failure behavior for CI, including parse errors as diagnostics and configurable `--fail-level` thresholds.
27
+ - Made rule filtering stricter by validating unknown `--only` and `--except` rule names before running lint checks.
28
+ - Reflected configured rule enablement and severity in `collie rules --format json`.
29
+
30
+ ### Fixed
31
+
32
+ - Fixed repeated `--only` and `--except` handling so rule filters do not consume positional file arguments.
33
+ - Fixed config loading errors for missing, invalid, inherited, and non-mapping YAML files.
34
+ - Fixed autocorrection so one correction does not clobber changes made by another correction.
35
+ - Reduced false positives in right-recursion and unused-rule diagnostics for LR grammar workflows.
36
+
10
37
  ## 0.1.0 - 2025-12-17
11
38
 
12
- - Initial commit
39
+ - Initial commit
data/README.md CHANGED
@@ -1,18 +1,11 @@
1
1
  # Collie
2
2
 
3
- A linter and formatter for Lrama Style BNF grammar files (.y files). Collie helps you write clean, maintainable, and error-free grammar files for parser generators like Lrama, Yacc, and Bison.
3
+ A linter and formatter for Lrama Style BNF grammar files (`.y` files).
4
4
 
5
5
  [![CI](https://github.com/ydah/collie/workflows/CI/badge.svg)](https://github.com/ydah/collie/actions)
6
6
  [![Gem Version](https://badge.fury.io/rb/collie.svg)](https://badge.fury.io/rb/collie)
7
7
 
8
- ## Features
9
-
10
- - 18 Built-in Lint Rules - Catch common errors and suggest improvements
11
- - Lrama Extension Support - Full support for parameterized rules, named references, and inline rules
12
- - Smart Formatting - Consistent indentation, alignment, and spacing
13
- - Configurable - Customize rules and formatting options via `.collie.yml`
14
- - Multiple Output Formats - Text, JSON, and GitHub Actions annotations
15
- - Auto-correction - Automatically fix certain issues
8
+ Collie checks grammar files for common mistakes, formats them consistently, and supports Lrama-specific syntax such as parameterized rules, named references, and inline rules.
16
9
 
17
10
  ## Installation
18
11
 
@@ -20,199 +13,86 @@ A linter and formatter for Lrama Style BNF grammar files (.y files). Collie help
20
13
  gem install collie
21
14
  ```
22
15
 
23
- Or add to your Gemfile:
16
+ Or add it to your Gemfile:
24
17
 
25
18
  ```ruby
26
- gem 'collie', require: false
19
+ gem "collie", require: false
27
20
  ```
28
21
 
29
- ## Quick Start
30
-
31
- ### Try Online
32
-
33
- Try Collie in your browser without installing anything:
34
-
35
- [Open Playground](https://ydah.github.io/collie/playground/) (Coming soon)
22
+ Collie requires Ruby 3.2 or newer.
36
23
 
37
- ### Lint a grammar file
24
+ ## Quick Start
38
25
 
39
26
  ```bash
40
- # Check for issues
27
+ # Lint files, globs, or directories
41
28
  collie lint parse.y
29
+ collie lint "src/**/*.y"
30
+ collie lint grammars/
42
31
 
43
- # Auto-fix issues where possible
44
- collie lint -a parse.y
45
- ```
46
-
47
- ### Format a grammar file
48
-
49
- ```bash
50
- # Check formatting
32
+ # Format
33
+ collie fmt parse.y
51
34
  collie fmt --check parse.y
35
+ collie fmt --diff parse.y
52
36
 
53
- # Format in-place
54
- collie fmt parse.y
37
+ # Auto-correct supported lint offenses
38
+ collie lint -a parse.y
55
39
 
56
- # Show diff
57
- collie fmt --diff parse.y
40
+ # Inspect rules
41
+ collie rules
42
+ collie explain DuplicateToken
58
43
  ```
59
44
 
60
- ### List all available rules
45
+ ## Configuration
46
+
47
+ Generate a config file:
61
48
 
62
49
  ```bash
63
- collie rules
50
+ collie init
51
+ collie init --profile lrama
52
+ collie init --profile strict --path .collie.yml
64
53
  ```
65
54
 
66
- ## Configuration
55
+ Profiles: `default`, `lrama`, `bison`, `strict`, `minimal`.
67
56
 
68
- Create a `.collie.yml` file in your project root:
57
+ Minimal `.collie.yml`:
69
58
 
70
59
  ```yaml
71
- # Inherit from another config (optional)
72
- inherit_from: .collie_base.yml
60
+ include:
61
+ - "src/**/*.y"
62
+ exclude:
63
+ - "vendor/**/*"
73
64
 
74
- # Rule configuration
75
- rules:
76
- DuplicateToken:
77
- enabled: true
78
- severity: error
65
+ formatter:
66
+ indent_size: 2
67
+ max_line_length: 120
79
68
 
69
+ rules:
80
70
  TokenNaming:
81
- enabled: true
82
71
  severity: convention
83
- pattern: '^[A-Z][A-Z0-9_]*$'
84
-
85
- LongRule:
86
- enabled: true
87
- max_alternatives: 10
88
-
89
- # Disable specific rules
90
72
  LeftRecursion:
91
73
  enabled: false
92
-
93
- # Formatter options
94
- formatter:
95
- indent_size: 4
96
- align_tokens: true
97
- align_alternatives: true
98
- blank_lines_around_sections: 2
99
- max_line_length: 120
100
-
101
- # File patterns
102
- include:
103
- - '**/*.y'
104
- exclude:
105
- - 'vendor/**/*'
106
- - 'tmp/**/*'
107
- ```
108
-
109
- ## Available Rules
110
-
111
- ### Validation Rules (Error)
112
-
113
- | Rule | Description | Auto-fix |
114
- |------|-------------|----------|
115
- | `DuplicateToken` | Token defined multiple times | No |
116
- | `UndefinedSymbol` | Reference to undeclared token/nonterminal | No |
117
- | `UnreachableRule` | Rule not derivable from start symbol | No |
118
- | `CircularReference` | Infinite recursion in grammar | No |
119
- | `MissingStartSymbol` | No `%start` declaration with ambiguous default | No |
120
-
121
- ### Warning Rules
122
-
123
- | Rule | Description | Auto-fix |
124
- |------|-------------|----------|
125
- | `UnusedNonterminal` | Nonterminal defined but never referenced | No |
126
- | `UnusedToken` | Token declared but never used | No |
127
- | `LeftRecursion` | Detects left recursion (informational) | No |
128
- | `RightRecursion` | Suggests left recursion conversion | No |
129
- | `AmbiguousPrecedence` | Operators without explicit precedence | No |
130
-
131
- ### Style Rules (Convention)
132
-
133
- | Rule | Description | Auto-fix |
134
- |------|-------------|----------|
135
- | `TokenNaming` | Tokens should be UPPER_CASE | No |
136
- | `NonterminalNaming` | Nonterminals should be snake_case | No |
137
- | `ConsistentTagNaming` | Type tags should be consistent | No |
138
- | `TrailingWhitespace` | No trailing whitespace at end of lines | Yes |
139
- | `EmptyAction` | Warns on empty `{ }` actions | Yes |
140
- | `LongRule` | Rule with too many alternatives | No |
141
-
142
- ### Optimization Rules (Info)
143
-
144
- | Rule | Description | Auto-fix |
145
- |------|-------------|----------|
146
- | `FactorizableRules` | Suggests factoring common prefixes | No |
147
- | `RedundantEpsilon` | Unnecessary epsilon productions | No |
148
- | `PrecImprovement` | Suggests `%prec` improvements | No |
149
-
150
- ## Usage Examples
151
-
152
- ### Example Grammar File
153
-
154
- ```yacc
155
- %token <node> CLASS MODULE DEF
156
- %token <id> IDENTIFIER CONSTANT
157
- %token <num> INTEGER FLOAT
158
-
159
- %left '+' '-'
160
- %left '*' '/'
161
- %right '^'
162
-
163
- %%
164
-
165
- program
166
- : class_definition
167
- | module_definition
168
- ;
169
-
170
- class_definition
171
- : CLASS CONSTANT '{' class_body '}'
172
- { $$ = make_class($2, $4); }
173
- ;
174
-
175
- expr
176
- : expr '+' expr { $$ = add($1, $3); }
177
- | expr '-' expr { $$ = sub($1, $3); }
178
- | expr '*' expr { $$ = mul($1, $3); }
179
- | '(' expr ')' { $$ = $2; }
180
- | IDENTIFIER { $$ = var($1); }
181
- | INTEGER { $$ = num($1); }
182
- ;
183
-
184
- %%
185
74
  ```
186
75
 
187
- ### Lrama Extensions
188
-
189
- Collie fully supports Lrama-specific syntax:
190
-
191
- ```yacc
192
- # Parameterized Rules
193
- %rule pair(X, Y): X COMMA Y ;
76
+ ## Commands
194
77
 
195
- number_pair
196
- : pair(NUMBER, NUMBER)
197
- { $$ = make_pair($1, $3); }
198
- ;
78
+ | Command | Purpose |
79
+ | --- | --- |
80
+ | `collie lint [OPTIONS] FILES` | Lint grammar files. Supports `--format text\|json\|github\|sarif`, `--fail-level`, `--only`, `--except`, `--stdin`, and `--autocorrect`. |
81
+ | `collie fmt [OPTIONS] FILES` | Format grammar files. Supports `--check`, `--diff`, `--config`, and `--stdin`. |
82
+ | `collie rules [--format text\|json]` | List available lint rules. |
83
+ | `collie explain RULE [--format text\|json]` | Show rule metadata. |
84
+ | `collie config-schema` | Print the JSON Schema for `.collie.yml`. |
85
+ | `collie tokens FILE` | Print lexer tokens as JSON. |
86
+ | `collie ast FILE` | Print the parsed AST as JSON. |
87
+ | `collie version` | Print the installed version. |
199
88
 
200
- # Named References
201
- assignment
202
- : IDENTIFIER[var] EQUALS NUMBER[value]
203
- { assign($var, $value); }
204
- ;
205
-
206
- # Inline Rules
207
- %inline opt(X): /* empty */ | X ;
208
- ```
89
+ Run `collie help COMMAND` for all options.
209
90
 
210
- ### CI Integration (GitHub Actions)
91
+ ## CI
211
92
 
212
- Use the reusable workflow in your project:
93
+ Use the reusable GitHub Actions workflow:
213
94
 
214
95
  ```yaml
215
- # .github/workflows/lint.yml
216
96
  name: Lint Grammar Files
217
97
 
218
98
  on: [push, pull_request]
@@ -221,113 +101,30 @@ jobs:
221
101
  lint:
222
102
  uses: ydah/collie/.github/workflows/lint.yml@main
223
103
  with:
224
- files: 'src/**/*.y'
225
- config: '.collie.yml'
104
+ files: "src/**/*.y"
105
+ config: ".collie.yml"
226
106
  fail-on-warnings: true
227
107
  ```
228
108
 
229
- ### Programmatic Usage
230
-
231
- ```ruby
232
- require 'collie'
233
-
234
- # Parse a grammar file
235
- parser = Collie::Parser::Parser.new(source_code)
236
- ast = parser.parse
237
-
238
- # Analyze the grammar
239
- symbol_table = Collie::Analyzer::SymbolTable.new(ast)
240
- symbol_table.build
241
-
242
- # Run linter
243
- config = Collie::Config.new
244
- linter = Collie::Linter.new(config)
245
- offenses = linter.lint(ast)
246
-
247
- # Format the grammar
248
- formatter = Collie::Formatter::Formatter.new(config.formatter_options)
249
- formatted_code = formatter.format(ast)
250
- ```
251
-
252
- ## Command Line Options
253
-
254
- ### `collie lint`
255
-
256
- ```bash
257
- collie lint [OPTIONS] FILES
258
-
259
- Options:
260
- --config PATH Path to config file (default: .collie.yml)
261
- --format FORMAT Output format: text, json, github (default: text)
262
- -a, --autocorrect Auto-fix offenses where possible
263
- --only RULES Run only specified rules (comma-separated)
264
- --except RULES Exclude specified rules (comma-separated)
265
- ```
266
-
267
- ### `collie fmt`
109
+ For code scanning integrations:
268
110
 
269
111
  ```bash
270
- collie fmt [OPTIONS] FILES
271
-
272
- Options:
273
- --config PATH Path to config file
274
- --check Check only, don't modify files
275
- --diff Show diff of changes
276
- ```
277
-
278
- ### `collie rules`
279
-
280
- ```bash
281
- collie rules [OPTIONS]
282
-
283
- Options:
284
- --format FORMAT Output format: text, json (default: text)
112
+ collie lint --format sarif parse.y
285
113
  ```
286
114
 
287
115
  ## Development
288
116
 
289
117
  ```bash
290
- # Install dependencies
291
118
  bundle install
292
-
293
- # Run tests
294
119
  bundle exec rspec
295
-
296
- # Run linter
297
- bundle exec rubocop
298
-
299
- # Run all checks
300
120
  bundle exec rake
301
121
  ```
302
122
 
303
- ## Contributing
123
+ ## Documentation
304
124
 
305
- 1. Fork it
306
- 2. Create your feature branch (`git checkout -b my-new-feature`)
307
- 3. Commit your changes (`git commit -am 'Add some feature'`)
308
- 4. Push to the branch (`git push origin my-new-feature`)
309
- 5. Create new Pull Request
125
+ - [Tutorial](docs/TUTORIAL.md)
126
+ - [Changelog](CHANGELOG.md)
310
127
 
311
128
  ## License
312
129
 
313
- The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
314
-
315
- ## Credits
316
-
317
- Developed for improving the development experience with [Lrama](https://github.com/ruby/lrama), the next-generation parser generator for Ruby.
318
-
319
- ## Related Projects
320
-
321
- ### Editor Integration (Planned)
322
-
323
- - collie-lsp - LSP (Language Server Protocol) implementation for Collie
324
- - vscode-collie - VS Code extension for Collie
325
-
326
- ### Parser Generators
327
-
328
- - [Lrama](https://github.com/ruby/lrama) - LALR (1) parser generator
329
- - [Bison](https://www.gnu.org/software/bison/) - GNU parser generator
330
-
331
- ### Inspiration
332
-
333
- - [RuboCop](https://github.com/rubocop/rubocop) - Ruby static code analyzer (inspiration for architecture)
130
+ MIT License. See [LICENSE.txt](LICENSE.txt).
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "set"
4
+
3
5
  require_relative "../ast"
4
6
 
5
7
  module Collie
@@ -27,40 +29,35 @@ module Collie
27
29
  private
28
30
 
29
31
  def build_dependency_graph
30
- # Process normal rules
31
- @grammar.rules.each do |rule|
32
+ rule_like_nodes.each do |rule|
33
+ current_rule_name = rule_name(rule)
34
+
32
35
  rule.alternatives.each do |alt|
33
36
  alt.symbols.each do |symbol|
34
37
  if symbol.nonterminal?
35
- @dependencies[rule.name] << symbol.name
38
+ @dependencies[current_rule_name] << symbol.name
36
39
  # Also consider parameterized rule call arguments: list(expr)
37
40
  if symbol.arguments
38
41
  symbol.arguments.each do |arg|
39
- @dependencies[rule.name] << arg.name if arg.nonterminal?
42
+ @dependencies[current_rule_name] << arg.name if arg.nonterminal?
40
43
  end
41
44
  end
42
45
  end
43
46
  end
44
47
  end
45
48
  end
49
+ end
46
50
 
47
- # Process parameterized rules (%rule)
48
- @grammar.declarations.each do |decl|
49
- next unless decl.is_a?(AST::ParameterizedRule)
51
+ def rule_like_nodes
52
+ @grammar.rules + @grammar.declarations.select { |declaration| rule_like_declaration?(declaration) }
53
+ end
50
54
 
51
- decl.alternatives.each do |alt|
52
- alt.symbols.each do |symbol|
53
- if symbol.nonterminal?
54
- @dependencies[decl.name] << symbol.name
55
- if symbol.arguments
56
- symbol.arguments.each do |arg|
57
- @dependencies[decl.name] << arg.name if arg.nonterminal?
58
- end
59
- end
60
- end
61
- end
62
- end
63
- end
55
+ def rule_like_declaration?(declaration)
56
+ declaration.is_a?(AST::ParameterizedRule) || declaration.is_a?(AST::InlineRule)
57
+ end
58
+
59
+ def rule_name(rule)
60
+ rule.is_a?(AST::InlineRule) ? rule.rule : rule.name
64
61
  end
65
62
 
66
63
  def infer_start_symbol
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "set"
4
+
3
5
  require_relative "../ast"
4
6
 
5
7
  module Collie
@@ -13,7 +15,7 @@ module Collie
13
15
  end
14
16
 
15
17
  def analyze
16
- @grammar.rules.each do |rule|
18
+ rule_like_nodes.each do |rule|
17
19
  check_left_recursion(rule)
18
20
  check_right_recursion(rule)
19
21
  end
@@ -34,13 +36,27 @@ module Collie
34
36
 
35
37
  private
36
38
 
39
+ def rule_like_nodes
40
+ @grammar.rules + @grammar.declarations.select { |declaration| rule_like_declaration?(declaration) }
41
+ end
42
+
43
+ def rule_like_declaration?(declaration)
44
+ declaration.is_a?(AST::ParameterizedRule) || declaration.is_a?(AST::InlineRule)
45
+ end
46
+
47
+ def rule_name(rule)
48
+ rule.is_a?(AST::InlineRule) ? rule.rule : rule.name
49
+ end
50
+
37
51
  def check_left_recursion(rule)
52
+ name = rule_name(rule)
53
+
38
54
  rule.alternatives.each do |alt|
39
55
  next if alt.symbols.empty?
40
56
 
41
57
  first_symbol = alt.symbols.first
42
- if first_symbol.nonterminal? && first_symbol.name == rule.name && !@left_recursive.include?(rule.name)
43
- @left_recursive << rule.name
58
+ if first_symbol.nonterminal? && first_symbol.name == name && !@left_recursive.include?(name)
59
+ @left_recursive << name
44
60
  end
45
61
  end
46
62
 
@@ -49,23 +65,26 @@ module Collie
49
65
  end
50
66
 
51
67
  def check_right_recursion(rule)
68
+ name = rule_name(rule)
69
+
52
70
  rule.alternatives.each do |alt|
53
71
  next if alt.symbols.empty?
54
72
 
55
73
  last_symbol = alt.symbols.last
56
- if last_symbol.nonterminal? && last_symbol.name == rule.name && !@right_recursive.include?(rule.name)
57
- @right_recursive << rule.name
74
+ if last_symbol.nonterminal? && last_symbol.name == name && !@right_recursive.include?(name)
75
+ @right_recursive << name
58
76
  end
59
77
  end
60
78
  end
61
79
 
62
80
  def check_indirect_left_recursion(rule, visited = Set.new)
63
- return if visited.include?(rule.name)
81
+ name = rule_name(rule)
82
+ return if visited.include?(name)
64
83
 
65
- visited << rule.name
84
+ visited << name
66
85
 
67
86
  rule.alternatives.each do |alt|
68
- check_alternative_for_indirect_recursion(alt, rule.name)
87
+ check_alternative_for_indirect_recursion(alt, name)
69
88
  end
70
89
  end
71
90
 
@@ -75,7 +94,7 @@ module Collie
75
94
  first_symbol = alt.symbols.first
76
95
  return unless first_symbol.nonterminal?
77
96
 
78
- dependent_rule = @grammar.rules.find { |r| r.name == first_symbol.name }
97
+ dependent_rule = rule_like_nodes.find { |candidate| rule_name(candidate) == first_symbol.name }
79
98
  return unless dependent_rule
80
99
 
81
100
  check_dependent_rule_for_recursion(dependent_rule, rule_name)
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../ast"
4
+
5
+ module Collie
6
+ module Analyzer
7
+ # Resolves symbol kinds using declared tokens and nonterminals.
8
+ class SymbolResolver
9
+ def self.resolve(grammar, symbol_table)
10
+ new(grammar, symbol_table).resolve
11
+ end
12
+
13
+ def initialize(grammar, symbol_table)
14
+ @grammar = grammar
15
+ @symbol_table = symbol_table
16
+ end
17
+
18
+ def resolve
19
+ each_alternative do |alternative|
20
+ alternative.symbols.each { |symbol| resolve_symbol(symbol) }
21
+ end
22
+
23
+ @grammar
24
+ end
25
+
26
+ private
27
+
28
+ def each_alternative
29
+ @grammar.rules.each do |rule|
30
+ rule.alternatives.each { |alternative| yield alternative }
31
+ end
32
+
33
+ @grammar.declarations.each do |declaration|
34
+ next unless declaration.is_a?(AST::ParameterizedRule) || declaration.is_a?(AST::InlineRule)
35
+
36
+ declaration.alternatives.each { |alternative| yield alternative }
37
+ end
38
+ end
39
+
40
+ def resolve_symbol(symbol)
41
+ if @symbol_table.token?(symbol.name)
42
+ symbol.kind = :terminal
43
+ elsif @symbol_table.nonterminal?(symbol.name)
44
+ symbol.kind = :nonterminal
45
+ end
46
+
47
+ symbol.arguments&.each { |argument| resolve_symbol(argument) }
48
+ end
49
+ end
50
+ end
51
+ end
data/lib/collie/ast.rb CHANGED
@@ -84,6 +84,16 @@ module Collie
84
84
  end
85
85
  end
86
86
 
87
+ # Declaration node for directives Collie does not understand yet.
88
+ class UnknownDeclaration
89
+ attr_accessor :source, :location
90
+
91
+ def initialize(source:, location: nil)
92
+ @source = source
93
+ @location = location
94
+ end
95
+ end
96
+
87
97
  # Grammar rule node
88
98
  class Rule
89
99
  attr_accessor :name, :alternatives, :location
@@ -97,12 +107,14 @@ module Collie
97
107
 
98
108
  # Alternative production for a rule
99
109
  class Alternative
100
- attr_accessor :symbols, :action, :prec, :location
110
+ attr_accessor :symbols, :action, :prec, :explicit_empty, :empty_marker, :location
101
111
 
102
- def initialize(symbols: [], action: nil, prec: nil, location: nil)
112
+ def initialize(symbols: [], action: nil, prec: nil, explicit_empty: false, empty_marker: nil, location: nil)
103
113
  @symbols = symbols
104
114
  @action = action
105
115
  @prec = prec
116
+ @explicit_empty = explicit_empty
117
+ @empty_marker = empty_marker
106
118
  @location = location
107
119
  end
108
120
  end
@@ -152,10 +164,12 @@ module Collie
152
164
 
153
165
  # Lrama extension: Inline rule
154
166
  class InlineRule
155
- attr_accessor :rule, :location
167
+ attr_accessor :rule, :parameters, :alternatives, :location
156
168
 
157
- def initialize(rule:, location: nil)
169
+ def initialize(rule:, parameters: [], alternatives: [], location: nil)
158
170
  @rule = rule
171
+ @parameters = parameters
172
+ @alternatives = alternatives
159
173
  @location = location
160
174
  end
161
175
  end