collie 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +12 -0
  3. data/Gemfile +10 -0
  4. data/LICENSE.txt +21 -0
  5. data/README.md +333 -0
  6. data/Rakefile +9 -0
  7. data/collie.gemspec +37 -0
  8. data/docs/TUTORIAL.md +588 -0
  9. data/docs/index.html +56 -0
  10. data/docs/playground/README.md +134 -0
  11. data/docs/playground/build-collie-bundle.rb +85 -0
  12. data/docs/playground/css/styles.css +402 -0
  13. data/docs/playground/index.html +146 -0
  14. data/docs/playground/js/app.js +231 -0
  15. data/docs/playground/js/collie-bridge.js +186 -0
  16. data/docs/playground/js/editor.js +129 -0
  17. data/docs/playground/js/examples.js +80 -0
  18. data/docs/playground/js/ruby-runner.js +75 -0
  19. data/docs/playground/test-server.sh +18 -0
  20. data/exe/collie +15 -0
  21. data/lib/collie/analyzer/conflict.rb +114 -0
  22. data/lib/collie/analyzer/reachability.rb +83 -0
  23. data/lib/collie/analyzer/recursion.rb +96 -0
  24. data/lib/collie/analyzer/symbol_table.rb +67 -0
  25. data/lib/collie/ast.rb +183 -0
  26. data/lib/collie/cli.rb +249 -0
  27. data/lib/collie/config.rb +91 -0
  28. data/lib/collie/formatter/formatter.rb +196 -0
  29. data/lib/collie/formatter/options.rb +23 -0
  30. data/lib/collie/linter/base.rb +62 -0
  31. data/lib/collie/linter/registry.rb +34 -0
  32. data/lib/collie/linter/rules/ambiguous_precedence.rb +87 -0
  33. data/lib/collie/linter/rules/circular_reference.rb +89 -0
  34. data/lib/collie/linter/rules/consistent_tag_naming.rb +69 -0
  35. data/lib/collie/linter/rules/duplicate_token.rb +38 -0
  36. data/lib/collie/linter/rules/empty_action.rb +52 -0
  37. data/lib/collie/linter/rules/factorizable_rules.rb +67 -0
  38. data/lib/collie/linter/rules/left_recursion.rb +34 -0
  39. data/lib/collie/linter/rules/long_rule.rb +37 -0
  40. data/lib/collie/linter/rules/missing_start_symbol.rb +38 -0
  41. data/lib/collie/linter/rules/nonterminal_naming.rb +34 -0
  42. data/lib/collie/linter/rules/prec_improvement.rb +54 -0
  43. data/lib/collie/linter/rules/redundant_epsilon.rb +44 -0
  44. data/lib/collie/linter/rules/right_recursion.rb +35 -0
  45. data/lib/collie/linter/rules/token_naming.rb +39 -0
  46. data/lib/collie/linter/rules/trailing_whitespace.rb +46 -0
  47. data/lib/collie/linter/rules/undefined_symbol.rb +55 -0
  48. data/lib/collie/linter/rules/unreachable_rule.rb +49 -0
  49. data/lib/collie/linter/rules/unused_nonterminal.rb +93 -0
  50. data/lib/collie/linter/rules/unused_token.rb +82 -0
  51. data/lib/collie/parser/lexer.rb +349 -0
  52. data/lib/collie/parser/parser.rb +416 -0
  53. data/lib/collie/reporter/github.rb +35 -0
  54. data/lib/collie/reporter/json.rb +52 -0
  55. data/lib/collie/reporter/text.rb +97 -0
  56. data/lib/collie/version.rb +5 -0
  57. data/lib/collie.rb +52 -0
  58. metadata +145 -0
@@ -0,0 +1,349 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../ast"
4
+
5
+ module Collie
6
+ module Parser
7
+ # Token representation
8
+ class Token
9
+ attr_accessor :type, :value, :location
10
+
11
+ def initialize(type:, value:, location:)
12
+ @type = type
13
+ @value = value
14
+ @location = location
15
+ end
16
+
17
+ def to_s
18
+ "#{type}(#{value.inspect})"
19
+ end
20
+ end
21
+
22
+ # Lexer for .y grammar files
23
+ class Lexer
24
+ KEYWORDS = %w[
25
+ %token %type %left %right %nonassoc %prec %union %start
26
+ %rule %inline %code %expect %define %param %parse-param
27
+ %lex-param %initial-action %destructor %printer
28
+ ].freeze
29
+
30
+ def initialize(source, filename: "<input>")
31
+ @source = source
32
+ @filename = filename
33
+ @pos = 0
34
+ @line = 1
35
+ @column = 1
36
+ @tokens = []
37
+ end
38
+
39
+ def tokenize
40
+ until eof?
41
+ skip_whitespace
42
+ break if eof?
43
+
44
+ if current_char == "/" && peek_char == "/"
45
+ skip_line_comment
46
+ elsif current_char == "/" && peek_char == "*"
47
+ skip_block_comment
48
+ elsif current_char == "%" && peek_char == "{"
49
+ @tokens << tokenize_prologue
50
+ elsif current_char == "%" && peek_char == "}"
51
+ advance(2)
52
+ @tokens << make_token(:PROLOGUE_END, "%}")
53
+ elsif current_char == "%" && peek_char == "%"
54
+ advance(2)
55
+ @tokens << make_token(:SECTION_SEPARATOR, "%%")
56
+ elsif current_char == "%" && alpha?(peek_char)
57
+ @tokens << tokenize_directive
58
+ elsif current_char == "{"
59
+ @tokens << tokenize_action
60
+ elsif current_char == "'"
61
+ @tokens << tokenize_char_literal
62
+ elsif current_char == '"'
63
+ @tokens << tokenize_string_literal
64
+ elsif current_char == "|"
65
+ advance
66
+ @tokens << make_token(:PIPE, "|")
67
+ elsif current_char == ":"
68
+ advance
69
+ @tokens << make_token(:COLON, ":")
70
+ elsif current_char == ";"
71
+ advance
72
+ @tokens << make_token(:SEMICOLON, ";")
73
+ elsif current_char == "("
74
+ advance
75
+ @tokens << make_token(:LPAREN, "(")
76
+ elsif current_char == ")"
77
+ advance
78
+ @tokens << make_token(:RPAREN, ")")
79
+ elsif current_char == "["
80
+ advance
81
+ @tokens << make_token(:LBRACKET, "[")
82
+ elsif current_char == "]"
83
+ advance
84
+ @tokens << make_token(:RBRACKET, "]")
85
+ elsif current_char == ","
86
+ advance
87
+ @tokens << make_token(:COMMA, ",")
88
+ elsif current_char == "<"
89
+ @tokens << tokenize_type_tag
90
+ elsif alpha?(current_char) || current_char == "_"
91
+ @tokens << tokenize_identifier
92
+ else
93
+ advance
94
+ end
95
+ end
96
+
97
+ @tokens << make_token(:EOF, "")
98
+ @tokens
99
+ end
100
+
101
+ private
102
+
103
+ def eof?
104
+ @pos >= @source.length
105
+ end
106
+
107
+ def current_char
108
+ return nil if eof?
109
+
110
+ @source[@pos]
111
+ end
112
+
113
+ def peek_char(offset = 1)
114
+ return nil if @pos + offset >= @source.length
115
+
116
+ @source[@pos + offset]
117
+ end
118
+
119
+ def advance(count = 1)
120
+ count.times do
121
+ break if eof?
122
+
123
+ if @source[@pos] == "\n"
124
+ @line += 1
125
+ @column = 1
126
+ else
127
+ @column += 1
128
+ end
129
+ @pos += 1
130
+ end
131
+ end
132
+
133
+ def skip_whitespace
134
+ advance while !eof? && whitespace?(current_char)
135
+ end
136
+
137
+ def skip_line_comment
138
+ advance(2) # skip //
139
+ advance until eof? || current_char == "\n"
140
+ advance unless eof? # skip \n
141
+ end
142
+
143
+ def skip_block_comment
144
+ advance(2) # skip /*
145
+ until eof?
146
+ if current_char == "*" && peek_char == "/"
147
+ advance(2)
148
+ break
149
+ end
150
+ advance
151
+ end
152
+ end
153
+
154
+ def tokenize_prologue
155
+ start_line = @line
156
+ start_column = @column
157
+ advance(2) # skip %{
158
+
159
+ buffer = +""
160
+ until eof? || (current_char == "%" && peek_char == "}")
161
+ buffer << current_char
162
+ advance
163
+ end
164
+
165
+ Token.new(
166
+ type: :PROLOGUE_START,
167
+ value: buffer,
168
+ location: make_location(start_line, start_column, buffer.length + 2)
169
+ )
170
+ end
171
+
172
+ def tokenize_directive
173
+ start_line = @line
174
+ start_column = @column
175
+ buffer = +""
176
+
177
+ while !eof? && (alpha?(current_char) || current_char == "%" || current_char == "-")
178
+ buffer << current_char
179
+ advance
180
+ end
181
+
182
+ type = case buffer
183
+ when "%token" then :TOKEN
184
+ when "%type" then :TYPE
185
+ when "%left" then :LEFT
186
+ when "%right" then :RIGHT
187
+ when "%nonassoc" then :NONASSOC
188
+ when "%prec" then :PREC
189
+ when "%union" then :UNION
190
+ when "%start" then :START
191
+ when "%rule" then :RULE
192
+ when "%inline" then :INLINE
193
+ else :DIRECTIVE
194
+ end
195
+
196
+ Token.new(
197
+ type: type,
198
+ value: buffer,
199
+ location: make_location(start_line, start_column, buffer.length)
200
+ )
201
+ end
202
+
203
+ def tokenize_action
204
+ start_line = @line
205
+ start_column = @column
206
+ buffer = +""
207
+ depth = 0
208
+
209
+ loop do
210
+ break if eof?
211
+
212
+ if current_char == "{"
213
+ depth += 1
214
+ elsif current_char == "}"
215
+ depth -= 1
216
+ if depth.zero?
217
+ buffer << current_char
218
+ advance
219
+ break
220
+ end
221
+ end
222
+
223
+ buffer << current_char
224
+ advance
225
+ end
226
+
227
+ Token.new(
228
+ type: :ACTION,
229
+ value: buffer,
230
+ location: make_location(start_line, start_column, buffer.length)
231
+ )
232
+ end
233
+
234
+ def tokenize_char_literal
235
+ start_line = @line
236
+ start_column = @column
237
+ buffer = +""
238
+ advance # skip opening '
239
+
240
+ until eof? || current_char == "'"
241
+ buffer << current_char
242
+ if current_char == "\\"
243
+ advance
244
+ buffer << current_char unless eof?
245
+ end
246
+ advance
247
+ end
248
+
249
+ advance unless eof? # skip closing '
250
+
251
+ Token.new(
252
+ type: :CHAR,
253
+ value: buffer,
254
+ location: make_location(start_line, start_column, buffer.length + 2)
255
+ )
256
+ end
257
+
258
+ def tokenize_string_literal
259
+ start_line = @line
260
+ start_column = @column
261
+ buffer = +""
262
+ advance # skip opening "
263
+
264
+ until eof? || current_char == '"'
265
+ buffer << current_char
266
+ if current_char == "\\"
267
+ advance
268
+ buffer << current_char unless eof?
269
+ end
270
+ advance
271
+ end
272
+
273
+ advance unless eof? # skip closing "
274
+
275
+ Token.new(
276
+ type: :STRING,
277
+ value: buffer,
278
+ location: make_location(start_line, start_column, buffer.length + 2)
279
+ )
280
+ end
281
+
282
+ def tokenize_type_tag
283
+ start_line = @line
284
+ start_column = @column
285
+ buffer = +""
286
+ advance # skip <
287
+
288
+ until eof? || current_char == ">"
289
+ buffer << current_char
290
+ advance
291
+ end
292
+
293
+ advance unless eof? # skip >
294
+
295
+ Token.new(
296
+ type: :TYPE_TAG,
297
+ value: buffer,
298
+ location: make_location(start_line, start_column, buffer.length + 2)
299
+ )
300
+ end
301
+
302
+ def tokenize_identifier
303
+ start_line = @line
304
+ start_column = @column
305
+ buffer = +""
306
+
307
+ while !eof? && (alnum?(current_char) || current_char == "_")
308
+ buffer << current_char
309
+ advance
310
+ end
311
+
312
+ Token.new(
313
+ type: :IDENTIFIER,
314
+ value: buffer,
315
+ location: make_location(start_line, start_column, buffer.length)
316
+ )
317
+ end
318
+
319
+ def make_token(type, value)
320
+ Token.new(
321
+ type: type,
322
+ value: value,
323
+ location: make_location(@line, @column, value.length)
324
+ )
325
+ end
326
+
327
+ def make_location(line, column, length)
328
+ AST::Location.new(
329
+ file: @filename,
330
+ line: line,
331
+ column: column,
332
+ length: length
333
+ )
334
+ end
335
+
336
+ def whitespace?(char)
337
+ char&.match?(/\s/)
338
+ end
339
+
340
+ def alpha?(char)
341
+ char&.match?(/[a-zA-Z]/)
342
+ end
343
+
344
+ def alnum?(char)
345
+ char&.match?(/[a-zA-Z0-9]/)
346
+ end
347
+ end
348
+ end
349
+ end