collie 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +28 -1
- data/README.md +55 -258
- data/lib/collie/analyzer/reachability.rb +17 -20
- data/lib/collie/analyzer/recursion.rb +28 -9
- data/lib/collie/analyzer/symbol_resolver.rb +51 -0
- data/lib/collie/ast.rb +18 -4
- data/lib/collie/cli.rb +388 -50
- data/lib/collie/config/schema.rb +117 -0
- data/lib/collie/config.rb +106 -22
- data/lib/collie/formatter/formatter.rb +95 -50
- data/lib/collie/formatter/options.rb +17 -5
- data/lib/collie/formatter/signature.rb +72 -0
- data/lib/collie/linter/base.rb +49 -0
- data/lib/collie/linter/rules/ambiguous_precedence.rb +5 -2
- data/lib/collie/linter/rules/circular_reference.rb +96 -38
- data/lib/collie/linter/rules/consistent_tag_naming.rb +13 -13
- data/lib/collie/linter/rules/empty_action.rb +42 -11
- data/lib/collie/linter/rules/factorizable_rules.rb +2 -2
- data/lib/collie/linter/rules/left_recursion.rb +5 -4
- data/lib/collie/linter/rules/long_rule.rb +3 -3
- data/lib/collie/linter/rules/nonterminal_naming.rb +6 -4
- data/lib/collie/linter/rules/prec_improvement.rb +1 -1
- data/lib/collie/linter/rules/redundant_epsilon.rb +11 -11
- data/lib/collie/linter/rules/right_recursion.rb +4 -1
- data/lib/collie/linter/rules/symbol_conflict.rb +130 -0
- data/lib/collie/linter/rules/token_naming.rb +2 -1
- data/lib/collie/linter/rules/trailing_whitespace.rb +7 -1
- data/lib/collie/linter/rules/undefined_symbol.rb +50 -8
- data/lib/collie/linter/rules/unused_nonterminal.rb +36 -1
- data/lib/collie/linter/rules/unused_token.rb +34 -9
- data/lib/collie/parser/debug_serializer.rb +205 -0
- data/lib/collie/parser/lexer.rb +182 -11
- data/lib/collie/parser/parser.rb +73 -13
- data/lib/collie/reporter/github.rb +15 -2
- data/lib/collie/reporter/json.rb +4 -1
- data/lib/collie/reporter/sarif.rb +81 -0
- data/lib/collie/version.rb +1 -1
- data/lib/collie.rb +6 -1
- metadata +8 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 94fb085cbc3a181a76d745558276b76e4922ad401f5335a42d95e8ca4fcfd1b1
|
|
4
|
+
data.tar.gz: 4648e0cf533ddfb58a5d0d4b640951a42cb8f0c80e00ebd207757c8c8f450312
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 80384473f720db64d99532419949bcf3b26227d03a0288b5e4dd5c27cd73ea4fdde583518ec659d80ff8a439bcd1a0058cc3a936f95487097b0413c00112d4b9
|
|
7
|
+
data.tar.gz: 13883b3575a3efc000fcb867a526a8288de0000512e910608e5b628541ab185355146bb37c6540c4d2bb25fca95c76786cdb5644161d38bef92fa1e23399a6d8
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,33 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## Unreleased
|
|
9
9
|
|
|
10
|
+
## 1.0.0 - 2026-05-20
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- Added stable `collie lint`, `collie fmt`, `collie rules`, and `collie version` CLI workflows for Lrama-style grammar files.
|
|
15
|
+
- Added `collie explain` for rule metadata, `collie init` for generating profile-based configuration, and `collie config-schema` for editor/tooling integration.
|
|
16
|
+
- Added parser debug commands, `collie tokens` and `collie ast`, with JSON output.
|
|
17
|
+
- Added stdin support for linting and formatting with `--stdin` and `--stdin-filename`.
|
|
18
|
+
- Added SARIF output for code scanning, alongside text, JSON, and GitHub Actions annotation reporters.
|
|
19
|
+
- Added lint coverage for symbol conflicts, duplicate precedence declarations, non-productive grammar cycles, Lrama declarations, inline rule dependencies, and precedence declarations used as tokens.
|
|
20
|
+
|
|
21
|
+
### Changed
|
|
22
|
+
|
|
23
|
+
- Expanded file target handling so `lint` and `fmt` can accept directories and glob patterns.
|
|
24
|
+
- Improved formatter preservation for comments, unknown directive blocks, declaration order, parameterless rule declarations, and grammar structure.
|
|
25
|
+
- Made formatter layout options configurable and verified formatted output by reparsing it before writing.
|
|
26
|
+
- Improved CLI failure behavior for CI, including parse errors as diagnostics and configurable `--fail-level` thresholds.
|
|
27
|
+
- Made rule filtering stricter by validating unknown `--only` and `--except` rule names before running lint checks.
|
|
28
|
+
- Reflected configured rule enablement and severity in `collie rules --format json`.
|
|
29
|
+
|
|
30
|
+
### Fixed
|
|
31
|
+
|
|
32
|
+
- Fixed repeated `--only` and `--except` handling so rule filters do not consume positional file arguments.
|
|
33
|
+
- Fixed config loading errors for missing, invalid, inherited, and non-mapping YAML files.
|
|
34
|
+
- Fixed autocorrection so one correction does not clobber changes made by another correction.
|
|
35
|
+
- Reduced false positives in right-recursion and unused-rule diagnostics for LR grammar workflows.
|
|
36
|
+
|
|
10
37
|
## 0.1.0 - 2025-12-17
|
|
11
38
|
|
|
12
|
-
- Initial commit
|
|
39
|
+
- Initial commit
|
data/README.md
CHANGED
|
@@ -1,18 +1,11 @@
|
|
|
1
1
|
# Collie
|
|
2
2
|
|
|
3
|
-
A linter and formatter for Lrama Style BNF grammar files (
|
|
3
|
+
A linter and formatter for Lrama Style BNF grammar files (`.y` files).
|
|
4
4
|
|
|
5
5
|
[](https://github.com/ydah/collie/actions)
|
|
6
6
|
[](https://badge.fury.io/rb/collie)
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
- 18 Built-in Lint Rules - Catch common errors and suggest improvements
|
|
11
|
-
- Lrama Extension Support - Full support for parameterized rules, named references, and inline rules
|
|
12
|
-
- Smart Formatting - Consistent indentation, alignment, and spacing
|
|
13
|
-
- Configurable - Customize rules and formatting options via `.collie.yml`
|
|
14
|
-
- Multiple Output Formats - Text, JSON, and GitHub Actions annotations
|
|
15
|
-
- Auto-correction - Automatically fix certain issues
|
|
8
|
+
Collie checks grammar files for common mistakes, formats them consistently, and supports Lrama-specific syntax such as parameterized rules, named references, and inline rules.
|
|
16
9
|
|
|
17
10
|
## Installation
|
|
18
11
|
|
|
@@ -20,199 +13,86 @@ A linter and formatter for Lrama Style BNF grammar files (.y files). Collie help
|
|
|
20
13
|
gem install collie
|
|
21
14
|
```
|
|
22
15
|
|
|
23
|
-
Or add to your Gemfile:
|
|
16
|
+
Or add it to your Gemfile:
|
|
24
17
|
|
|
25
18
|
```ruby
|
|
26
|
-
gem
|
|
19
|
+
gem "collie", require: false
|
|
27
20
|
```
|
|
28
21
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
### Try Online
|
|
32
|
-
|
|
33
|
-
Try Collie in your browser without installing anything:
|
|
34
|
-
|
|
35
|
-
[Open Playground](https://ydah.github.io/collie/playground/) (Coming soon)
|
|
22
|
+
Collie requires Ruby 3.2 or newer.
|
|
36
23
|
|
|
37
|
-
|
|
24
|
+
## Quick Start
|
|
38
25
|
|
|
39
26
|
```bash
|
|
40
|
-
#
|
|
27
|
+
# Lint files, globs, or directories
|
|
41
28
|
collie lint parse.y
|
|
29
|
+
collie lint "src/**/*.y"
|
|
30
|
+
collie lint grammars/
|
|
42
31
|
|
|
43
|
-
#
|
|
44
|
-
collie
|
|
45
|
-
```
|
|
46
|
-
|
|
47
|
-
### Format a grammar file
|
|
48
|
-
|
|
49
|
-
```bash
|
|
50
|
-
# Check formatting
|
|
32
|
+
# Format
|
|
33
|
+
collie fmt parse.y
|
|
51
34
|
collie fmt --check parse.y
|
|
35
|
+
collie fmt --diff parse.y
|
|
52
36
|
|
|
53
|
-
#
|
|
54
|
-
collie
|
|
37
|
+
# Auto-correct supported lint offenses
|
|
38
|
+
collie lint -a parse.y
|
|
55
39
|
|
|
56
|
-
#
|
|
57
|
-
collie
|
|
40
|
+
# Inspect rules
|
|
41
|
+
collie rules
|
|
42
|
+
collie explain DuplicateToken
|
|
58
43
|
```
|
|
59
44
|
|
|
60
|
-
|
|
45
|
+
## Configuration
|
|
46
|
+
|
|
47
|
+
Generate a config file:
|
|
61
48
|
|
|
62
49
|
```bash
|
|
63
|
-
collie
|
|
50
|
+
collie init
|
|
51
|
+
collie init --profile lrama
|
|
52
|
+
collie init --profile strict --path .collie.yml
|
|
64
53
|
```
|
|
65
54
|
|
|
66
|
-
|
|
55
|
+
Profiles: `default`, `lrama`, `bison`, `strict`, `minimal`.
|
|
67
56
|
|
|
68
|
-
|
|
57
|
+
Minimal `.collie.yml`:
|
|
69
58
|
|
|
70
59
|
```yaml
|
|
71
|
-
|
|
72
|
-
|
|
60
|
+
include:
|
|
61
|
+
- "src/**/*.y"
|
|
62
|
+
exclude:
|
|
63
|
+
- "vendor/**/*"
|
|
73
64
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
enabled: true
|
|
78
|
-
severity: error
|
|
65
|
+
formatter:
|
|
66
|
+
indent_size: 2
|
|
67
|
+
max_line_length: 120
|
|
79
68
|
|
|
69
|
+
rules:
|
|
80
70
|
TokenNaming:
|
|
81
|
-
enabled: true
|
|
82
71
|
severity: convention
|
|
83
|
-
pattern: '^[A-Z][A-Z0-9_]*$'
|
|
84
|
-
|
|
85
|
-
LongRule:
|
|
86
|
-
enabled: true
|
|
87
|
-
max_alternatives: 10
|
|
88
|
-
|
|
89
|
-
# Disable specific rules
|
|
90
72
|
LeftRecursion:
|
|
91
73
|
enabled: false
|
|
92
|
-
|
|
93
|
-
# Formatter options
|
|
94
|
-
formatter:
|
|
95
|
-
indent_size: 4
|
|
96
|
-
align_tokens: true
|
|
97
|
-
align_alternatives: true
|
|
98
|
-
blank_lines_around_sections: 2
|
|
99
|
-
max_line_length: 120
|
|
100
|
-
|
|
101
|
-
# File patterns
|
|
102
|
-
include:
|
|
103
|
-
- '**/*.y'
|
|
104
|
-
exclude:
|
|
105
|
-
- 'vendor/**/*'
|
|
106
|
-
- 'tmp/**/*'
|
|
107
|
-
```
|
|
108
|
-
|
|
109
|
-
## Available Rules
|
|
110
|
-
|
|
111
|
-
### Validation Rules (Error)
|
|
112
|
-
|
|
113
|
-
| Rule | Description | Auto-fix |
|
|
114
|
-
|------|-------------|----------|
|
|
115
|
-
| `DuplicateToken` | Token defined multiple times | No |
|
|
116
|
-
| `UndefinedSymbol` | Reference to undeclared token/nonterminal | No |
|
|
117
|
-
| `UnreachableRule` | Rule not derivable from start symbol | No |
|
|
118
|
-
| `CircularReference` | Infinite recursion in grammar | No |
|
|
119
|
-
| `MissingStartSymbol` | No `%start` declaration with ambiguous default | No |
|
|
120
|
-
|
|
121
|
-
### Warning Rules
|
|
122
|
-
|
|
123
|
-
| Rule | Description | Auto-fix |
|
|
124
|
-
|------|-------------|----------|
|
|
125
|
-
| `UnusedNonterminal` | Nonterminal defined but never referenced | No |
|
|
126
|
-
| `UnusedToken` | Token declared but never used | No |
|
|
127
|
-
| `LeftRecursion` | Detects left recursion (informational) | No |
|
|
128
|
-
| `RightRecursion` | Suggests left recursion conversion | No |
|
|
129
|
-
| `AmbiguousPrecedence` | Operators without explicit precedence | No |
|
|
130
|
-
|
|
131
|
-
### Style Rules (Convention)
|
|
132
|
-
|
|
133
|
-
| Rule | Description | Auto-fix |
|
|
134
|
-
|------|-------------|----------|
|
|
135
|
-
| `TokenNaming` | Tokens should be UPPER_CASE | No |
|
|
136
|
-
| `NonterminalNaming` | Nonterminals should be snake_case | No |
|
|
137
|
-
| `ConsistentTagNaming` | Type tags should be consistent | No |
|
|
138
|
-
| `TrailingWhitespace` | No trailing whitespace at end of lines | Yes |
|
|
139
|
-
| `EmptyAction` | Warns on empty `{ }` actions | Yes |
|
|
140
|
-
| `LongRule` | Rule with too many alternatives | No |
|
|
141
|
-
|
|
142
|
-
### Optimization Rules (Info)
|
|
143
|
-
|
|
144
|
-
| Rule | Description | Auto-fix |
|
|
145
|
-
|------|-------------|----------|
|
|
146
|
-
| `FactorizableRules` | Suggests factoring common prefixes | No |
|
|
147
|
-
| `RedundantEpsilon` | Unnecessary epsilon productions | No |
|
|
148
|
-
| `PrecImprovement` | Suggests `%prec` improvements | No |
|
|
149
|
-
|
|
150
|
-
## Usage Examples
|
|
151
|
-
|
|
152
|
-
### Example Grammar File
|
|
153
|
-
|
|
154
|
-
```yacc
|
|
155
|
-
%token <node> CLASS MODULE DEF
|
|
156
|
-
%token <id> IDENTIFIER CONSTANT
|
|
157
|
-
%token <num> INTEGER FLOAT
|
|
158
|
-
|
|
159
|
-
%left '+' '-'
|
|
160
|
-
%left '*' '/'
|
|
161
|
-
%right '^'
|
|
162
|
-
|
|
163
|
-
%%
|
|
164
|
-
|
|
165
|
-
program
|
|
166
|
-
: class_definition
|
|
167
|
-
| module_definition
|
|
168
|
-
;
|
|
169
|
-
|
|
170
|
-
class_definition
|
|
171
|
-
: CLASS CONSTANT '{' class_body '}'
|
|
172
|
-
{ $$ = make_class($2, $4); }
|
|
173
|
-
;
|
|
174
|
-
|
|
175
|
-
expr
|
|
176
|
-
: expr '+' expr { $$ = add($1, $3); }
|
|
177
|
-
| expr '-' expr { $$ = sub($1, $3); }
|
|
178
|
-
| expr '*' expr { $$ = mul($1, $3); }
|
|
179
|
-
| '(' expr ')' { $$ = $2; }
|
|
180
|
-
| IDENTIFIER { $$ = var($1); }
|
|
181
|
-
| INTEGER { $$ = num($1); }
|
|
182
|
-
;
|
|
183
|
-
|
|
184
|
-
%%
|
|
185
74
|
```
|
|
186
75
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
Collie fully supports Lrama-specific syntax:
|
|
190
|
-
|
|
191
|
-
```yacc
|
|
192
|
-
# Parameterized Rules
|
|
193
|
-
%rule pair(X, Y): X COMMA Y ;
|
|
76
|
+
## Commands
|
|
194
77
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
78
|
+
| Command | Purpose |
|
|
79
|
+
| --- | --- |
|
|
80
|
+
| `collie lint [OPTIONS] FILES` | Lint grammar files. Supports `--format text\|json\|github\|sarif`, `--fail-level`, `--only`, `--except`, `--stdin`, and `--autocorrect`. |
|
|
81
|
+
| `collie fmt [OPTIONS] FILES` | Format grammar files. Supports `--check`, `--diff`, `--config`, and `--stdin`. |
|
|
82
|
+
| `collie rules [--format text\|json]` | List available lint rules. |
|
|
83
|
+
| `collie explain RULE [--format text\|json]` | Show rule metadata. |
|
|
84
|
+
| `collie config-schema` | Print the JSON Schema for `.collie.yml`. |
|
|
85
|
+
| `collie tokens FILE` | Print lexer tokens as JSON. |
|
|
86
|
+
| `collie ast FILE` | Print the parsed AST as JSON. |
|
|
87
|
+
| `collie version` | Print the installed version. |
|
|
199
88
|
|
|
200
|
-
|
|
201
|
-
assignment
|
|
202
|
-
: IDENTIFIER[var] EQUALS NUMBER[value]
|
|
203
|
-
{ assign($var, $value); }
|
|
204
|
-
;
|
|
205
|
-
|
|
206
|
-
# Inline Rules
|
|
207
|
-
%inline opt(X): /* empty */ | X ;
|
|
208
|
-
```
|
|
89
|
+
Run `collie help COMMAND` for all options.
|
|
209
90
|
|
|
210
|
-
|
|
91
|
+
## CI
|
|
211
92
|
|
|
212
|
-
Use the reusable
|
|
93
|
+
Use the reusable GitHub Actions workflow:
|
|
213
94
|
|
|
214
95
|
```yaml
|
|
215
|
-
# .github/workflows/lint.yml
|
|
216
96
|
name: Lint Grammar Files
|
|
217
97
|
|
|
218
98
|
on: [push, pull_request]
|
|
@@ -221,113 +101,30 @@ jobs:
|
|
|
221
101
|
lint:
|
|
222
102
|
uses: ydah/collie/.github/workflows/lint.yml@main
|
|
223
103
|
with:
|
|
224
|
-
files:
|
|
225
|
-
config:
|
|
104
|
+
files: "src/**/*.y"
|
|
105
|
+
config: ".collie.yml"
|
|
226
106
|
fail-on-warnings: true
|
|
227
107
|
```
|
|
228
108
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
```ruby
|
|
232
|
-
require 'collie'
|
|
233
|
-
|
|
234
|
-
# Parse a grammar file
|
|
235
|
-
parser = Collie::Parser::Parser.new(source_code)
|
|
236
|
-
ast = parser.parse
|
|
237
|
-
|
|
238
|
-
# Analyze the grammar
|
|
239
|
-
symbol_table = Collie::Analyzer::SymbolTable.new(ast)
|
|
240
|
-
symbol_table.build
|
|
241
|
-
|
|
242
|
-
# Run linter
|
|
243
|
-
config = Collie::Config.new
|
|
244
|
-
linter = Collie::Linter.new(config)
|
|
245
|
-
offenses = linter.lint(ast)
|
|
246
|
-
|
|
247
|
-
# Format the grammar
|
|
248
|
-
formatter = Collie::Formatter::Formatter.new(config.formatter_options)
|
|
249
|
-
formatted_code = formatter.format(ast)
|
|
250
|
-
```
|
|
251
|
-
|
|
252
|
-
## Command Line Options
|
|
253
|
-
|
|
254
|
-
### `collie lint`
|
|
255
|
-
|
|
256
|
-
```bash
|
|
257
|
-
collie lint [OPTIONS] FILES
|
|
258
|
-
|
|
259
|
-
Options:
|
|
260
|
-
--config PATH Path to config file (default: .collie.yml)
|
|
261
|
-
--format FORMAT Output format: text, json, github (default: text)
|
|
262
|
-
-a, --autocorrect Auto-fix offenses where possible
|
|
263
|
-
--only RULES Run only specified rules (comma-separated)
|
|
264
|
-
--except RULES Exclude specified rules (comma-separated)
|
|
265
|
-
```
|
|
266
|
-
|
|
267
|
-
### `collie fmt`
|
|
109
|
+
For code scanning integrations:
|
|
268
110
|
|
|
269
111
|
```bash
|
|
270
|
-
collie
|
|
271
|
-
|
|
272
|
-
Options:
|
|
273
|
-
--config PATH Path to config file
|
|
274
|
-
--check Check only, don't modify files
|
|
275
|
-
--diff Show diff of changes
|
|
276
|
-
```
|
|
277
|
-
|
|
278
|
-
### `collie rules`
|
|
279
|
-
|
|
280
|
-
```bash
|
|
281
|
-
collie rules [OPTIONS]
|
|
282
|
-
|
|
283
|
-
Options:
|
|
284
|
-
--format FORMAT Output format: text, json (default: text)
|
|
112
|
+
collie lint --format sarif parse.y
|
|
285
113
|
```
|
|
286
114
|
|
|
287
115
|
## Development
|
|
288
116
|
|
|
289
117
|
```bash
|
|
290
|
-
# Install dependencies
|
|
291
118
|
bundle install
|
|
292
|
-
|
|
293
|
-
# Run tests
|
|
294
119
|
bundle exec rspec
|
|
295
|
-
|
|
296
|
-
# Run linter
|
|
297
|
-
bundle exec rubocop
|
|
298
|
-
|
|
299
|
-
# Run all checks
|
|
300
120
|
bundle exec rake
|
|
301
121
|
```
|
|
302
122
|
|
|
303
|
-
##
|
|
123
|
+
## Documentation
|
|
304
124
|
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
3. Commit your changes (`git commit -am 'Add some feature'`)
|
|
308
|
-
4. Push to the branch (`git push origin my-new-feature`)
|
|
309
|
-
5. Create new Pull Request
|
|
125
|
+
- [Tutorial](docs/TUTORIAL.md)
|
|
126
|
+
- [Changelog](CHANGELOG.md)
|
|
310
127
|
|
|
311
128
|
## License
|
|
312
129
|
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
## Credits
|
|
316
|
-
|
|
317
|
-
Developed for improving the development experience with [Lrama](https://github.com/ruby/lrama), the next-generation parser generator for Ruby.
|
|
318
|
-
|
|
319
|
-
## Related Projects
|
|
320
|
-
|
|
321
|
-
### Editor Integration (Planned)
|
|
322
|
-
|
|
323
|
-
- collie-lsp - LSP (Language Server Protocol) implementation for Collie
|
|
324
|
-
- vscode-collie - VS Code extension for Collie
|
|
325
|
-
|
|
326
|
-
### Parser Generators
|
|
327
|
-
|
|
328
|
-
- [Lrama](https://github.com/ruby/lrama) - LALR (1) parser generator
|
|
329
|
-
- [Bison](https://www.gnu.org/software/bison/) - GNU parser generator
|
|
330
|
-
|
|
331
|
-
### Inspiration
|
|
332
|
-
|
|
333
|
-
- [RuboCop](https://github.com/rubocop/rubocop) - Ruby static code analyzer (inspiration for architecture)
|
|
130
|
+
MIT License. See [LICENSE.txt](LICENSE.txt).
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "set"
|
|
4
|
+
|
|
3
5
|
require_relative "../ast"
|
|
4
6
|
|
|
5
7
|
module Collie
|
|
@@ -27,40 +29,35 @@ module Collie
|
|
|
27
29
|
private
|
|
28
30
|
|
|
29
31
|
def build_dependency_graph
|
|
30
|
-
|
|
31
|
-
|
|
32
|
+
rule_like_nodes.each do |rule|
|
|
33
|
+
current_rule_name = rule_name(rule)
|
|
34
|
+
|
|
32
35
|
rule.alternatives.each do |alt|
|
|
33
36
|
alt.symbols.each do |symbol|
|
|
34
37
|
if symbol.nonterminal?
|
|
35
|
-
@dependencies[
|
|
38
|
+
@dependencies[current_rule_name] << symbol.name
|
|
36
39
|
# Also consider parameterized rule call arguments: list(expr)
|
|
37
40
|
if symbol.arguments
|
|
38
41
|
symbol.arguments.each do |arg|
|
|
39
|
-
@dependencies[
|
|
42
|
+
@dependencies[current_rule_name] << arg.name if arg.nonterminal?
|
|
40
43
|
end
|
|
41
44
|
end
|
|
42
45
|
end
|
|
43
46
|
end
|
|
44
47
|
end
|
|
45
48
|
end
|
|
49
|
+
end
|
|
46
50
|
|
|
47
|
-
|
|
48
|
-
@grammar.declarations.
|
|
49
|
-
|
|
51
|
+
def rule_like_nodes
|
|
52
|
+
@grammar.rules + @grammar.declarations.select { |declaration| rule_like_declaration?(declaration) }
|
|
53
|
+
end
|
|
50
54
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
@dependencies[decl.name] << arg.name if arg.nonterminal?
|
|
58
|
-
end
|
|
59
|
-
end
|
|
60
|
-
end
|
|
61
|
-
end
|
|
62
|
-
end
|
|
63
|
-
end
|
|
55
|
+
def rule_like_declaration?(declaration)
|
|
56
|
+
declaration.is_a?(AST::ParameterizedRule) || declaration.is_a?(AST::InlineRule)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def rule_name(rule)
|
|
60
|
+
rule.is_a?(AST::InlineRule) ? rule.rule : rule.name
|
|
64
61
|
end
|
|
65
62
|
|
|
66
63
|
def infer_start_symbol
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "set"
|
|
4
|
+
|
|
3
5
|
require_relative "../ast"
|
|
4
6
|
|
|
5
7
|
module Collie
|
|
@@ -13,7 +15,7 @@ module Collie
|
|
|
13
15
|
end
|
|
14
16
|
|
|
15
17
|
def analyze
|
|
16
|
-
|
|
18
|
+
rule_like_nodes.each do |rule|
|
|
17
19
|
check_left_recursion(rule)
|
|
18
20
|
check_right_recursion(rule)
|
|
19
21
|
end
|
|
@@ -34,13 +36,27 @@ module Collie
|
|
|
34
36
|
|
|
35
37
|
private
|
|
36
38
|
|
|
39
|
+
def rule_like_nodes
|
|
40
|
+
@grammar.rules + @grammar.declarations.select { |declaration| rule_like_declaration?(declaration) }
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def rule_like_declaration?(declaration)
|
|
44
|
+
declaration.is_a?(AST::ParameterizedRule) || declaration.is_a?(AST::InlineRule)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def rule_name(rule)
|
|
48
|
+
rule.is_a?(AST::InlineRule) ? rule.rule : rule.name
|
|
49
|
+
end
|
|
50
|
+
|
|
37
51
|
def check_left_recursion(rule)
|
|
52
|
+
name = rule_name(rule)
|
|
53
|
+
|
|
38
54
|
rule.alternatives.each do |alt|
|
|
39
55
|
next if alt.symbols.empty?
|
|
40
56
|
|
|
41
57
|
first_symbol = alt.symbols.first
|
|
42
|
-
if first_symbol.nonterminal? && first_symbol.name ==
|
|
43
|
-
@left_recursive <<
|
|
58
|
+
if first_symbol.nonterminal? && first_symbol.name == name && !@left_recursive.include?(name)
|
|
59
|
+
@left_recursive << name
|
|
44
60
|
end
|
|
45
61
|
end
|
|
46
62
|
|
|
@@ -49,23 +65,26 @@ module Collie
|
|
|
49
65
|
end
|
|
50
66
|
|
|
51
67
|
def check_right_recursion(rule)
|
|
68
|
+
name = rule_name(rule)
|
|
69
|
+
|
|
52
70
|
rule.alternatives.each do |alt|
|
|
53
71
|
next if alt.symbols.empty?
|
|
54
72
|
|
|
55
73
|
last_symbol = alt.symbols.last
|
|
56
|
-
if last_symbol.nonterminal? && last_symbol.name ==
|
|
57
|
-
@right_recursive <<
|
|
74
|
+
if last_symbol.nonterminal? && last_symbol.name == name && !@right_recursive.include?(name)
|
|
75
|
+
@right_recursive << name
|
|
58
76
|
end
|
|
59
77
|
end
|
|
60
78
|
end
|
|
61
79
|
|
|
62
80
|
def check_indirect_left_recursion(rule, visited = Set.new)
|
|
63
|
-
|
|
81
|
+
name = rule_name(rule)
|
|
82
|
+
return if visited.include?(name)
|
|
64
83
|
|
|
65
|
-
visited <<
|
|
84
|
+
visited << name
|
|
66
85
|
|
|
67
86
|
rule.alternatives.each do |alt|
|
|
68
|
-
check_alternative_for_indirect_recursion(alt,
|
|
87
|
+
check_alternative_for_indirect_recursion(alt, name)
|
|
69
88
|
end
|
|
70
89
|
end
|
|
71
90
|
|
|
@@ -75,7 +94,7 @@ module Collie
|
|
|
75
94
|
first_symbol = alt.symbols.first
|
|
76
95
|
return unless first_symbol.nonterminal?
|
|
77
96
|
|
|
78
|
-
dependent_rule =
|
|
97
|
+
dependent_rule = rule_like_nodes.find { |candidate| rule_name(candidate) == first_symbol.name }
|
|
79
98
|
return unless dependent_rule
|
|
80
99
|
|
|
81
100
|
check_dependent_rule_for_recursion(dependent_rule, rule_name)
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../ast"
|
|
4
|
+
|
|
5
|
+
module Collie
|
|
6
|
+
module Analyzer
|
|
7
|
+
# Resolves symbol kinds using declared tokens and nonterminals.
|
|
8
|
+
class SymbolResolver
|
|
9
|
+
def self.resolve(grammar, symbol_table)
|
|
10
|
+
new(grammar, symbol_table).resolve
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def initialize(grammar, symbol_table)
|
|
14
|
+
@grammar = grammar
|
|
15
|
+
@symbol_table = symbol_table
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def resolve
|
|
19
|
+
each_alternative do |alternative|
|
|
20
|
+
alternative.symbols.each { |symbol| resolve_symbol(symbol) }
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
@grammar
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
private
|
|
27
|
+
|
|
28
|
+
def each_alternative
|
|
29
|
+
@grammar.rules.each do |rule|
|
|
30
|
+
rule.alternatives.each { |alternative| yield alternative }
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
@grammar.declarations.each do |declaration|
|
|
34
|
+
next unless declaration.is_a?(AST::ParameterizedRule) || declaration.is_a?(AST::InlineRule)
|
|
35
|
+
|
|
36
|
+
declaration.alternatives.each { |alternative| yield alternative }
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def resolve_symbol(symbol)
|
|
41
|
+
if @symbol_table.token?(symbol.name)
|
|
42
|
+
symbol.kind = :terminal
|
|
43
|
+
elsif @symbol_table.nonterminal?(symbol.name)
|
|
44
|
+
symbol.kind = :nonterminal
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
symbol.arguments&.each { |argument| resolve_symbol(argument) }
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
data/lib/collie/ast.rb
CHANGED
|
@@ -84,6 +84,16 @@ module Collie
|
|
|
84
84
|
end
|
|
85
85
|
end
|
|
86
86
|
|
|
87
|
+
# Declaration node for directives Collie does not understand yet.
|
|
88
|
+
class UnknownDeclaration
|
|
89
|
+
attr_accessor :source, :location
|
|
90
|
+
|
|
91
|
+
def initialize(source:, location: nil)
|
|
92
|
+
@source = source
|
|
93
|
+
@location = location
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
87
97
|
# Grammar rule node
|
|
88
98
|
class Rule
|
|
89
99
|
attr_accessor :name, :alternatives, :location
|
|
@@ -97,12 +107,14 @@ module Collie
|
|
|
97
107
|
|
|
98
108
|
# Alternative production for a rule
|
|
99
109
|
class Alternative
|
|
100
|
-
attr_accessor :symbols, :action, :prec, :location
|
|
110
|
+
attr_accessor :symbols, :action, :prec, :explicit_empty, :empty_marker, :location
|
|
101
111
|
|
|
102
|
-
def initialize(symbols: [], action: nil, prec: nil, location: nil)
|
|
112
|
+
def initialize(symbols: [], action: nil, prec: nil, explicit_empty: false, empty_marker: nil, location: nil)
|
|
103
113
|
@symbols = symbols
|
|
104
114
|
@action = action
|
|
105
115
|
@prec = prec
|
|
116
|
+
@explicit_empty = explicit_empty
|
|
117
|
+
@empty_marker = empty_marker
|
|
106
118
|
@location = location
|
|
107
119
|
end
|
|
108
120
|
end
|
|
@@ -152,10 +164,12 @@ module Collie
|
|
|
152
164
|
|
|
153
165
|
# Lrama extension: Inline rule
|
|
154
166
|
class InlineRule
|
|
155
|
-
attr_accessor :rule, :location
|
|
167
|
+
attr_accessor :rule, :parameters, :alternatives, :location
|
|
156
168
|
|
|
157
|
-
def initialize(rule:, location: nil)
|
|
169
|
+
def initialize(rule:, parameters: [], alternatives: [], location: nil)
|
|
158
170
|
@rule = rule
|
|
171
|
+
@parameters = parameters
|
|
172
|
+
@alternatives = alternatives
|
|
159
173
|
@location = location
|
|
160
174
|
end
|
|
161
175
|
end
|