collie 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +12 -0
- data/Gemfile +10 -0
- data/LICENSE.txt +21 -0
- data/README.md +333 -0
- data/Rakefile +9 -0
- data/collie.gemspec +37 -0
- data/docs/TUTORIAL.md +588 -0
- data/docs/index.html +56 -0
- data/docs/playground/README.md +134 -0
- data/docs/playground/build-collie-bundle.rb +85 -0
- data/docs/playground/css/styles.css +402 -0
- data/docs/playground/index.html +146 -0
- data/docs/playground/js/app.js +231 -0
- data/docs/playground/js/collie-bridge.js +186 -0
- data/docs/playground/js/editor.js +129 -0
- data/docs/playground/js/examples.js +80 -0
- data/docs/playground/js/ruby-runner.js +75 -0
- data/docs/playground/test-server.sh +18 -0
- data/exe/collie +15 -0
- data/lib/collie/analyzer/conflict.rb +114 -0
- data/lib/collie/analyzer/reachability.rb +83 -0
- data/lib/collie/analyzer/recursion.rb +96 -0
- data/lib/collie/analyzer/symbol_table.rb +67 -0
- data/lib/collie/ast.rb +183 -0
- data/lib/collie/cli.rb +249 -0
- data/lib/collie/config.rb +91 -0
- data/lib/collie/formatter/formatter.rb +196 -0
- data/lib/collie/formatter/options.rb +23 -0
- data/lib/collie/linter/base.rb +62 -0
- data/lib/collie/linter/registry.rb +34 -0
- data/lib/collie/linter/rules/ambiguous_precedence.rb +87 -0
- data/lib/collie/linter/rules/circular_reference.rb +89 -0
- data/lib/collie/linter/rules/consistent_tag_naming.rb +69 -0
- data/lib/collie/linter/rules/duplicate_token.rb +38 -0
- data/lib/collie/linter/rules/empty_action.rb +52 -0
- data/lib/collie/linter/rules/factorizable_rules.rb +67 -0
- data/lib/collie/linter/rules/left_recursion.rb +34 -0
- data/lib/collie/linter/rules/long_rule.rb +37 -0
- data/lib/collie/linter/rules/missing_start_symbol.rb +38 -0
- data/lib/collie/linter/rules/nonterminal_naming.rb +34 -0
- data/lib/collie/linter/rules/prec_improvement.rb +54 -0
- data/lib/collie/linter/rules/redundant_epsilon.rb +44 -0
- data/lib/collie/linter/rules/right_recursion.rb +35 -0
- data/lib/collie/linter/rules/token_naming.rb +39 -0
- data/lib/collie/linter/rules/trailing_whitespace.rb +46 -0
- data/lib/collie/linter/rules/undefined_symbol.rb +55 -0
- data/lib/collie/linter/rules/unreachable_rule.rb +49 -0
- data/lib/collie/linter/rules/unused_nonterminal.rb +93 -0
- data/lib/collie/linter/rules/unused_token.rb +82 -0
- data/lib/collie/parser/lexer.rb +349 -0
- data/lib/collie/parser/parser.rb +416 -0
- data/lib/collie/reporter/github.rb +35 -0
- data/lib/collie/reporter/json.rb +52 -0
- data/lib/collie/reporter/text.rb +97 -0
- data/lib/collie/version.rb +5 -0
- data/lib/collie.rb +52 -0
- metadata +145 -0
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../ast"
|
|
4
|
+
|
|
5
|
+
module Collie
|
|
6
|
+
module Parser
|
|
7
|
+
# Token representation
|
|
8
|
+
class Token
|
|
9
|
+
attr_accessor :type, :value, :location
|
|
10
|
+
|
|
11
|
+
def initialize(type:, value:, location:)
|
|
12
|
+
@type = type
|
|
13
|
+
@value = value
|
|
14
|
+
@location = location
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def to_s
|
|
18
|
+
"#{type}(#{value.inspect})"
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Lexer for .y grammar files
|
|
23
|
+
class Lexer
|
|
24
|
+
KEYWORDS = %w[
|
|
25
|
+
%token %type %left %right %nonassoc %prec %union %start
|
|
26
|
+
%rule %inline %code %expect %define %param %parse-param
|
|
27
|
+
%lex-param %initial-action %destructor %printer
|
|
28
|
+
].freeze
|
|
29
|
+
|
|
30
|
+
def initialize(source, filename: "<input>")
|
|
31
|
+
@source = source
|
|
32
|
+
@filename = filename
|
|
33
|
+
@pos = 0
|
|
34
|
+
@line = 1
|
|
35
|
+
@column = 1
|
|
36
|
+
@tokens = []
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def tokenize
|
|
40
|
+
until eof?
|
|
41
|
+
skip_whitespace
|
|
42
|
+
break if eof?
|
|
43
|
+
|
|
44
|
+
if current_char == "/" && peek_char == "/"
|
|
45
|
+
skip_line_comment
|
|
46
|
+
elsif current_char == "/" && peek_char == "*"
|
|
47
|
+
skip_block_comment
|
|
48
|
+
elsif current_char == "%" && peek_char == "{"
|
|
49
|
+
@tokens << tokenize_prologue
|
|
50
|
+
elsif current_char == "%" && peek_char == "}"
|
|
51
|
+
advance(2)
|
|
52
|
+
@tokens << make_token(:PROLOGUE_END, "%}")
|
|
53
|
+
elsif current_char == "%" && peek_char == "%"
|
|
54
|
+
advance(2)
|
|
55
|
+
@tokens << make_token(:SECTION_SEPARATOR, "%%")
|
|
56
|
+
elsif current_char == "%" && alpha?(peek_char)
|
|
57
|
+
@tokens << tokenize_directive
|
|
58
|
+
elsif current_char == "{"
|
|
59
|
+
@tokens << tokenize_action
|
|
60
|
+
elsif current_char == "'"
|
|
61
|
+
@tokens << tokenize_char_literal
|
|
62
|
+
elsif current_char == '"'
|
|
63
|
+
@tokens << tokenize_string_literal
|
|
64
|
+
elsif current_char == "|"
|
|
65
|
+
advance
|
|
66
|
+
@tokens << make_token(:PIPE, "|")
|
|
67
|
+
elsif current_char == ":"
|
|
68
|
+
advance
|
|
69
|
+
@tokens << make_token(:COLON, ":")
|
|
70
|
+
elsif current_char == ";"
|
|
71
|
+
advance
|
|
72
|
+
@tokens << make_token(:SEMICOLON, ";")
|
|
73
|
+
elsif current_char == "("
|
|
74
|
+
advance
|
|
75
|
+
@tokens << make_token(:LPAREN, "(")
|
|
76
|
+
elsif current_char == ")"
|
|
77
|
+
advance
|
|
78
|
+
@tokens << make_token(:RPAREN, ")")
|
|
79
|
+
elsif current_char == "["
|
|
80
|
+
advance
|
|
81
|
+
@tokens << make_token(:LBRACKET, "[")
|
|
82
|
+
elsif current_char == "]"
|
|
83
|
+
advance
|
|
84
|
+
@tokens << make_token(:RBRACKET, "]")
|
|
85
|
+
elsif current_char == ","
|
|
86
|
+
advance
|
|
87
|
+
@tokens << make_token(:COMMA, ",")
|
|
88
|
+
elsif current_char == "<"
|
|
89
|
+
@tokens << tokenize_type_tag
|
|
90
|
+
elsif alpha?(current_char) || current_char == "_"
|
|
91
|
+
@tokens << tokenize_identifier
|
|
92
|
+
else
|
|
93
|
+
advance
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
@tokens << make_token(:EOF, "")
|
|
98
|
+
@tokens
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
private
|
|
102
|
+
|
|
103
|
+
def eof?
|
|
104
|
+
@pos >= @source.length
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def current_char
|
|
108
|
+
return nil if eof?
|
|
109
|
+
|
|
110
|
+
@source[@pos]
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def peek_char(offset = 1)
|
|
114
|
+
return nil if @pos + offset >= @source.length
|
|
115
|
+
|
|
116
|
+
@source[@pos + offset]
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def advance(count = 1)
|
|
120
|
+
count.times do
|
|
121
|
+
break if eof?
|
|
122
|
+
|
|
123
|
+
if @source[@pos] == "\n"
|
|
124
|
+
@line += 1
|
|
125
|
+
@column = 1
|
|
126
|
+
else
|
|
127
|
+
@column += 1
|
|
128
|
+
end
|
|
129
|
+
@pos += 1
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def skip_whitespace
|
|
134
|
+
advance while !eof? && whitespace?(current_char)
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def skip_line_comment
|
|
138
|
+
advance(2) # skip //
|
|
139
|
+
advance until eof? || current_char == "\n"
|
|
140
|
+
advance unless eof? # skip \n
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def skip_block_comment
|
|
144
|
+
advance(2) # skip /*
|
|
145
|
+
until eof?
|
|
146
|
+
if current_char == "*" && peek_char == "/"
|
|
147
|
+
advance(2)
|
|
148
|
+
break
|
|
149
|
+
end
|
|
150
|
+
advance
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def tokenize_prologue
|
|
155
|
+
start_line = @line
|
|
156
|
+
start_column = @column
|
|
157
|
+
advance(2) # skip %{
|
|
158
|
+
|
|
159
|
+
buffer = +""
|
|
160
|
+
until eof? || (current_char == "%" && peek_char == "}")
|
|
161
|
+
buffer << current_char
|
|
162
|
+
advance
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
Token.new(
|
|
166
|
+
type: :PROLOGUE_START,
|
|
167
|
+
value: buffer,
|
|
168
|
+
location: make_location(start_line, start_column, buffer.length + 2)
|
|
169
|
+
)
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def tokenize_directive
|
|
173
|
+
start_line = @line
|
|
174
|
+
start_column = @column
|
|
175
|
+
buffer = +""
|
|
176
|
+
|
|
177
|
+
while !eof? && (alpha?(current_char) || current_char == "%" || current_char == "-")
|
|
178
|
+
buffer << current_char
|
|
179
|
+
advance
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
type = case buffer
|
|
183
|
+
when "%token" then :TOKEN
|
|
184
|
+
when "%type" then :TYPE
|
|
185
|
+
when "%left" then :LEFT
|
|
186
|
+
when "%right" then :RIGHT
|
|
187
|
+
when "%nonassoc" then :NONASSOC
|
|
188
|
+
when "%prec" then :PREC
|
|
189
|
+
when "%union" then :UNION
|
|
190
|
+
when "%start" then :START
|
|
191
|
+
when "%rule" then :RULE
|
|
192
|
+
when "%inline" then :INLINE
|
|
193
|
+
else :DIRECTIVE
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
Token.new(
|
|
197
|
+
type: type,
|
|
198
|
+
value: buffer,
|
|
199
|
+
location: make_location(start_line, start_column, buffer.length)
|
|
200
|
+
)
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def tokenize_action
|
|
204
|
+
start_line = @line
|
|
205
|
+
start_column = @column
|
|
206
|
+
buffer = +""
|
|
207
|
+
depth = 0
|
|
208
|
+
|
|
209
|
+
loop do
|
|
210
|
+
break if eof?
|
|
211
|
+
|
|
212
|
+
if current_char == "{"
|
|
213
|
+
depth += 1
|
|
214
|
+
elsif current_char == "}"
|
|
215
|
+
depth -= 1
|
|
216
|
+
if depth.zero?
|
|
217
|
+
buffer << current_char
|
|
218
|
+
advance
|
|
219
|
+
break
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
buffer << current_char
|
|
224
|
+
advance
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
Token.new(
|
|
228
|
+
type: :ACTION,
|
|
229
|
+
value: buffer,
|
|
230
|
+
location: make_location(start_line, start_column, buffer.length)
|
|
231
|
+
)
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
def tokenize_char_literal
|
|
235
|
+
start_line = @line
|
|
236
|
+
start_column = @column
|
|
237
|
+
buffer = +""
|
|
238
|
+
advance # skip opening '
|
|
239
|
+
|
|
240
|
+
until eof? || current_char == "'"
|
|
241
|
+
buffer << current_char
|
|
242
|
+
if current_char == "\\"
|
|
243
|
+
advance
|
|
244
|
+
buffer << current_char unless eof?
|
|
245
|
+
end
|
|
246
|
+
advance
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
advance unless eof? # skip closing '
|
|
250
|
+
|
|
251
|
+
Token.new(
|
|
252
|
+
type: :CHAR,
|
|
253
|
+
value: buffer,
|
|
254
|
+
location: make_location(start_line, start_column, buffer.length + 2)
|
|
255
|
+
)
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def tokenize_string_literal
|
|
259
|
+
start_line = @line
|
|
260
|
+
start_column = @column
|
|
261
|
+
buffer = +""
|
|
262
|
+
advance # skip opening "
|
|
263
|
+
|
|
264
|
+
until eof? || current_char == '"'
|
|
265
|
+
buffer << current_char
|
|
266
|
+
if current_char == "\\"
|
|
267
|
+
advance
|
|
268
|
+
buffer << current_char unless eof?
|
|
269
|
+
end
|
|
270
|
+
advance
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
advance unless eof? # skip closing "
|
|
274
|
+
|
|
275
|
+
Token.new(
|
|
276
|
+
type: :STRING,
|
|
277
|
+
value: buffer,
|
|
278
|
+
location: make_location(start_line, start_column, buffer.length + 2)
|
|
279
|
+
)
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
def tokenize_type_tag
|
|
283
|
+
start_line = @line
|
|
284
|
+
start_column = @column
|
|
285
|
+
buffer = +""
|
|
286
|
+
advance # skip <
|
|
287
|
+
|
|
288
|
+
until eof? || current_char == ">"
|
|
289
|
+
buffer << current_char
|
|
290
|
+
advance
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
advance unless eof? # skip >
|
|
294
|
+
|
|
295
|
+
Token.new(
|
|
296
|
+
type: :TYPE_TAG,
|
|
297
|
+
value: buffer,
|
|
298
|
+
location: make_location(start_line, start_column, buffer.length + 2)
|
|
299
|
+
)
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
def tokenize_identifier
|
|
303
|
+
start_line = @line
|
|
304
|
+
start_column = @column
|
|
305
|
+
buffer = +""
|
|
306
|
+
|
|
307
|
+
while !eof? && (alnum?(current_char) || current_char == "_")
|
|
308
|
+
buffer << current_char
|
|
309
|
+
advance
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
Token.new(
|
|
313
|
+
type: :IDENTIFIER,
|
|
314
|
+
value: buffer,
|
|
315
|
+
location: make_location(start_line, start_column, buffer.length)
|
|
316
|
+
)
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
def make_token(type, value)
|
|
320
|
+
Token.new(
|
|
321
|
+
type: type,
|
|
322
|
+
value: value,
|
|
323
|
+
location: make_location(@line, @column, value.length)
|
|
324
|
+
)
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
def make_location(line, column, length)
|
|
328
|
+
AST::Location.new(
|
|
329
|
+
file: @filename,
|
|
330
|
+
line: line,
|
|
331
|
+
column: column,
|
|
332
|
+
length: length
|
|
333
|
+
)
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
def whitespace?(char)
|
|
337
|
+
char&.match?(/\s/)
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
def alpha?(char)
|
|
341
|
+
char&.match?(/[a-zA-Z]/)
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
def alnum?(char)
|
|
345
|
+
char&.match?(/[a-zA-Z0-9]/)
|
|
346
|
+
end
|
|
347
|
+
end
|
|
348
|
+
end
|
|
349
|
+
end
|