collie 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +28 -1
  3. data/README.md +55 -258
  4. data/lib/collie/analyzer/reachability.rb +17 -20
  5. data/lib/collie/analyzer/recursion.rb +28 -9
  6. data/lib/collie/analyzer/symbol_resolver.rb +51 -0
  7. data/lib/collie/ast.rb +18 -4
  8. data/lib/collie/cli.rb +388 -50
  9. data/lib/collie/config/schema.rb +117 -0
  10. data/lib/collie/config.rb +106 -22
  11. data/lib/collie/formatter/formatter.rb +95 -50
  12. data/lib/collie/formatter/options.rb +17 -5
  13. data/lib/collie/formatter/signature.rb +72 -0
  14. data/lib/collie/linter/base.rb +49 -0
  15. data/lib/collie/linter/rules/ambiguous_precedence.rb +5 -2
  16. data/lib/collie/linter/rules/circular_reference.rb +96 -38
  17. data/lib/collie/linter/rules/consistent_tag_naming.rb +13 -13
  18. data/lib/collie/linter/rules/empty_action.rb +42 -11
  19. data/lib/collie/linter/rules/factorizable_rules.rb +2 -2
  20. data/lib/collie/linter/rules/left_recursion.rb +5 -4
  21. data/lib/collie/linter/rules/long_rule.rb +3 -3
  22. data/lib/collie/linter/rules/nonterminal_naming.rb +6 -4
  23. data/lib/collie/linter/rules/prec_improvement.rb +1 -1
  24. data/lib/collie/linter/rules/redundant_epsilon.rb +11 -11
  25. data/lib/collie/linter/rules/right_recursion.rb +4 -1
  26. data/lib/collie/linter/rules/symbol_conflict.rb +130 -0
  27. data/lib/collie/linter/rules/token_naming.rb +2 -1
  28. data/lib/collie/linter/rules/trailing_whitespace.rb +7 -1
  29. data/lib/collie/linter/rules/undefined_symbol.rb +50 -8
  30. data/lib/collie/linter/rules/unused_nonterminal.rb +36 -1
  31. data/lib/collie/linter/rules/unused_token.rb +34 -9
  32. data/lib/collie/parser/debug_serializer.rb +205 -0
  33. data/lib/collie/parser/lexer.rb +182 -11
  34. data/lib/collie/parser/parser.rb +73 -13
  35. data/lib/collie/reporter/github.rb +15 -2
  36. data/lib/collie/reporter/json.rb +4 -1
  37. data/lib/collie/reporter/sarif.rb +81 -0
  38. data/lib/collie/version.rb +1 -1
  39. data/lib/collie.rb +6 -1
  40. metadata +8 -2
@@ -6,12 +6,13 @@ module Collie
6
6
  module Parser
7
7
  # Token representation
8
8
  class Token
9
- attr_accessor :type, :value, :location
9
+ attr_accessor :type, :value, :location, :raw_value
10
10
 
11
- def initialize(type:, value:, location:)
11
+ def initialize(type:, value:, location:, raw_value: nil)
12
12
  @type = type
13
13
  @value = value
14
14
  @location = location
15
+ @raw_value = raw_value
15
16
  end
16
17
 
17
18
  def to_s
@@ -24,7 +25,7 @@ module Collie
24
25
  KEYWORDS = %w[
25
26
  %token %type %left %right %nonassoc %prec %union %start
26
27
  %rule %inline %code %expect %define %param %parse-param
27
- %lex-param %initial-action %destructor %printer
28
+ %lex-param %initial-action %destructor %printer %empty
28
29
  ].freeze
29
30
 
30
31
  def initialize(source, filename: "<input>")
@@ -34,6 +35,7 @@ module Collie
34
35
  @line = 1
35
36
  @column = 1
36
37
  @tokens = []
38
+ @section_separator_count = 0
37
39
  end
38
40
 
39
41
  def tokenize
@@ -44,15 +46,20 @@ module Collie
44
46
  if current_char == "/" && peek_char == "/"
45
47
  skip_line_comment
46
48
  elsif current_char == "/" && peek_char == "*"
47
- skip_block_comment
49
+ token = tokenize_block_comment
50
+ @tokens << token if token
48
51
  elsif current_char == "%" && peek_char == "{"
49
52
  @tokens << tokenize_prologue
50
53
  elsif current_char == "%" && peek_char == "}"
51
54
  advance(2)
52
55
  @tokens << make_token(:PROLOGUE_END, "%}")
53
56
  elsif current_char == "%" && peek_char == "%"
54
- advance(2)
55
- @tokens << make_token(:SECTION_SEPARATOR, "%%")
57
+ @tokens << tokenize_section_separator
58
+ if @section_separator_count == 2
59
+ epilogue = tokenize_epilogue
60
+ @tokens << epilogue if epilogue
61
+ break
62
+ end
56
63
  elsif current_char == "%" && alpha?(peek_char)
57
64
  @tokens << tokenize_directive
58
65
  elsif current_char == "{"
@@ -140,15 +147,34 @@ module Collie
140
147
  advance unless eof? # skip \n
141
148
  end
142
149
 
143
- def skip_block_comment
150
+ def tokenize_block_comment
151
+ start_line = @line
152
+ start_column = @column
153
+ buffer = +"/*"
144
154
  advance(2) # skip /*
155
+
145
156
  until eof?
146
157
  if current_char == "*" && peek_char == "/"
158
+ buffer << "*/"
147
159
  advance(2)
148
160
  break
149
161
  end
162
+
163
+ buffer << current_char
150
164
  advance
151
165
  end
166
+
167
+ return unless empty_comment?(buffer)
168
+
169
+ Token.new(
170
+ type: :EMPTY,
171
+ value: buffer,
172
+ location: make_location(start_line, start_column, buffer.length)
173
+ )
174
+ end
175
+
176
+ def empty_comment?(comment)
177
+ comment.match?(%r{\A/\*\s*empty\s*\*/\z}i)
152
178
  end
153
179
 
154
180
  def tokenize_prologue
@@ -190,7 +216,9 @@ module Collie
190
216
  when "%start" then :START
191
217
  when "%rule" then :RULE
192
218
  when "%inline" then :INLINE
193
- else :DIRECTIVE
219
+ when "%empty" then :EMPTY
220
+ else
221
+ return tokenize_unknown_declaration(start_line, start_column, buffer)
194
222
  end
195
223
 
196
224
  Token.new(
@@ -200,6 +228,81 @@ module Collie
200
228
  )
201
229
  end
202
230
 
231
+ def tokenize_section_separator
232
+ start_line = @line
233
+ start_column = @column
234
+ advance(2)
235
+ @section_separator_count += 1
236
+
237
+ Token.new(
238
+ type: :SECTION_SEPARATOR,
239
+ value: "%%",
240
+ location: make_location(start_line, start_column, 2)
241
+ )
242
+ end
243
+
244
+ def tokenize_epilogue
245
+ consume_single_line_break
246
+ return nil if eof?
247
+
248
+ start_line = @line
249
+ start_column = @column
250
+ buffer = @source[@pos..]
251
+ advance(buffer.length)
252
+
253
+ Token.new(
254
+ type: :EPILOGUE,
255
+ value: buffer,
256
+ location: make_location(start_line, start_column, buffer.length)
257
+ )
258
+ end
259
+
260
+ def consume_single_line_break
261
+ if current_char == "\r" && peek_char == "\n"
262
+ advance(2)
263
+ elsif current_char == "\n"
264
+ advance
265
+ end
266
+ end
267
+
268
+ def tokenize_unknown_declaration(start_line, start_column, directive)
269
+ buffer = +directive
270
+
271
+ append_unknown_declaration_content(buffer)
272
+
273
+ Token.new(
274
+ type: :UNKNOWN_DECLARATION,
275
+ value: buffer.rstrip,
276
+ location: make_location(start_line, start_column, buffer.length)
277
+ )
278
+ end
279
+
280
+ def append_unknown_declaration_content(buffer)
281
+ action_depth = 0
282
+
283
+ until eof?
284
+ break if action_depth.zero? && current_char == "\n"
285
+
286
+ if action_depth.positive? && (current_char == '"' || current_char == "'")
287
+ append_quoted_action_content(buffer, current_char)
288
+ next
289
+ elsif action_depth.positive? && current_char == "/" && peek_char == "/"
290
+ append_line_comment_action_content(buffer)
291
+ next
292
+ elsif action_depth.positive? && current_char == "/" && peek_char == "*"
293
+ append_block_comment_action_content(buffer)
294
+ next
295
+ elsif current_char == "{"
296
+ action_depth += 1
297
+ elsif current_char == "}" && action_depth.positive?
298
+ action_depth -= 1
299
+ end
300
+
301
+ buffer << current_char
302
+ advance
303
+ end
304
+ end
305
+
203
306
  def tokenize_action
204
307
  start_line = @line
205
308
  start_column = @column
@@ -209,7 +312,16 @@ module Collie
209
312
  loop do
210
313
  break if eof?
211
314
 
212
- if current_char == "{"
315
+ if current_char == '"' || current_char == "'"
316
+ append_quoted_action_content(buffer, current_char)
317
+ next
318
+ elsif current_char == "/" && peek_char == "/"
319
+ append_line_comment_action_content(buffer)
320
+ next
321
+ elsif current_char == "/" && peek_char == "*"
322
+ append_block_comment_action_content(buffer)
323
+ next
324
+ elsif current_char == "{"
213
325
  depth += 1
214
326
  elsif current_char == "}"
215
327
  depth -= 1
@@ -232,6 +344,7 @@ module Collie
232
344
  end
233
345
 
234
346
  def tokenize_char_literal
347
+ start_pos = @pos
235
348
  start_line = @line
236
349
  start_column = @column
237
350
  buffer = +""
@@ -247,15 +360,18 @@ module Collie
247
360
  end
248
361
 
249
362
  advance unless eof? # skip closing '
363
+ raw_value = @source[start_pos...@pos]
250
364
 
251
365
  Token.new(
252
366
  type: :CHAR,
253
367
  value: buffer,
254
- location: make_location(start_line, start_column, buffer.length + 2)
368
+ raw_value: raw_value,
369
+ location: make_location(start_line, start_column, raw_value.length)
255
370
  )
256
371
  end
257
372
 
258
373
  def tokenize_string_literal
374
+ start_pos = @pos
259
375
  start_line = @line
260
376
  start_column = @column
261
377
  buffer = +""
@@ -271,14 +387,69 @@ module Collie
271
387
  end
272
388
 
273
389
  advance unless eof? # skip closing "
390
+ raw_value = @source[start_pos...@pos]
274
391
 
275
392
  Token.new(
276
393
  type: :STRING,
277
394
  value: buffer,
278
- location: make_location(start_line, start_column, buffer.length + 2)
395
+ raw_value: raw_value,
396
+ location: make_location(start_line, start_column, raw_value.length)
279
397
  )
280
398
  end
281
399
 
400
+ def append_quoted_action_content(buffer, quote)
401
+ buffer << current_char
402
+ advance
403
+
404
+ until eof?
405
+ buffer << current_char
406
+
407
+ if current_char == "\\"
408
+ advance
409
+ next if eof?
410
+
411
+ buffer << current_char
412
+ elsif current_char == quote
413
+ advance
414
+ break
415
+ end
416
+
417
+ advance
418
+ end
419
+ end
420
+
421
+ def append_line_comment_action_content(buffer)
422
+ buffer << current_char
423
+ advance
424
+ buffer << current_char
425
+ advance
426
+
427
+ until eof? || current_char == "\n"
428
+ buffer << current_char
429
+ advance
430
+ end
431
+ end
432
+
433
+ def append_block_comment_action_content(buffer)
434
+ buffer << current_char
435
+ advance
436
+ buffer << current_char
437
+ advance
438
+
439
+ until eof?
440
+ if current_char == "*" && peek_char == "/"
441
+ buffer << current_char
442
+ advance
443
+ buffer << current_char
444
+ advance
445
+ break
446
+ end
447
+
448
+ buffer << current_char
449
+ advance
450
+ end
451
+ end
452
+
282
453
  def tokenize_type_tag
283
454
  start_line = @line
284
455
  start_column = @column
@@ -86,6 +86,8 @@ module Collie
86
86
  # %inline for Lrama extensions
87
87
  advance
88
88
  declarations << parse_inline_declaration
89
+ when :UNKNOWN_DECLARATION
90
+ declarations << parse_unknown_declaration
89
91
  else
90
92
  advance # Skip unknown declarations for now
91
93
  end
@@ -127,12 +129,35 @@ module Collie
127
129
  end
128
130
 
129
131
  def parse_inline_declaration
130
- # %inline followed by rule name
131
- rule_name = expect(:IDENTIFIER).value
132
+ # %inline followed by a rule name, optionally with a full inline rule body.
133
+ rule_token = expect(:IDENTIFIER)
134
+ rule_name = rule_token.value
135
+ parameters = []
136
+ alternatives = []
137
+
138
+ if match?(:LPAREN)
139
+ advance
140
+ parameters = parse_parameter_list
141
+ expect(:RPAREN)
142
+ end
143
+
144
+ if match?(:COLON)
145
+ advance
146
+ alternatives << parse_alternative
147
+
148
+ while match?(:PIPE)
149
+ advance
150
+ alternatives << parse_alternative
151
+ end
152
+
153
+ expect(:SEMICOLON) if match?(:SEMICOLON)
154
+ end
132
155
 
133
156
  AST::InlineRule.new(
134
157
  rule: rule_name,
135
- location: current_token.location
158
+ parameters: parameters,
159
+ alternatives: alternatives,
160
+ location: rule_token.location
136
161
  )
137
162
  end
138
163
 
@@ -147,7 +172,7 @@ module Collie
147
172
  end
148
173
 
149
174
  while match?(:IDENTIFIER) || match?(:STRING) || match?(:CHAR)
150
- names << current_token.value
175
+ names << token_value(current_token)
151
176
  advance
152
177
  end
153
178
 
@@ -191,7 +216,7 @@ module Collie
191
216
 
192
217
  tokens = []
193
218
  while match?(:IDENTIFIER) || match?(:STRING) || match?(:CHAR)
194
- tokens << current_token.value
219
+ tokens << token_value(current_token)
195
220
  advance
196
221
  end
197
222
 
@@ -227,6 +252,15 @@ module Collie
227
252
  )
228
253
  end
229
254
 
255
+ def parse_unknown_declaration
256
+ token = expect(:UNKNOWN_DECLARATION)
257
+
258
+ AST::UnknownDeclaration.new(
259
+ source: token.value,
260
+ location: token.location
261
+ )
262
+ end
263
+
230
264
  def parse_rules
231
265
  rules = []
232
266
 
@@ -300,7 +334,8 @@ module Collie
300
334
 
301
335
  if match?(:IDENTIFIER) || match?(:STRING) || match?(:CHAR)
302
336
  symbol_token = current_token
303
- kind = if symbol_token.value.match?(/^[A-Z]/) || match?(:STRING) || match?(:CHAR)
337
+ name = token_value(symbol_token)
338
+ kind = if name.match?(/^[A-Z]/) || literal_token?(symbol_token)
304
339
  :terminal
305
340
  else
306
341
  :nonterminal
@@ -308,7 +343,7 @@ module Collie
308
343
  advance
309
344
 
310
345
  args << AST::Symbol.new(
311
- name: symbol_token.value,
346
+ name: name,
312
347
  kind: kind,
313
348
  location: symbol_token.location
314
349
  )
@@ -316,7 +351,8 @@ module Collie
316
351
  while match?(:COMMA)
317
352
  advance
318
353
  symbol_token = current_token
319
- kind = if symbol_token.value.match?(/^[A-Z]/) || match?(:STRING) || match?(:CHAR)
354
+ name = token_value(symbol_token)
355
+ kind = if name.match?(/^[A-Z]/) || literal_token?(symbol_token)
320
356
  :terminal
321
357
  else
322
358
  :nonterminal
@@ -324,7 +360,7 @@ module Collie
324
360
  advance
325
361
 
326
362
  args << AST::Symbol.new(
327
- name: symbol_token.value,
363
+ name: name,
328
364
  kind: kind,
329
365
  location: symbol_token.location
330
366
  )
@@ -338,17 +374,24 @@ module Collie
338
374
  symbols = []
339
375
  action = nil
340
376
  prec = nil
377
+ explicit_empty = false
378
+ empty_marker = nil
341
379
  start_location = current_token.location
342
380
 
343
381
  until match?(:PIPE) || match?(:SEMICOLON) || match?(:ACTION) ||
344
382
  match?(:SECTION_SEPARATOR) || match?(:EOF)
345
383
  if match?(:PREC)
346
384
  advance
347
- prec = current_token.value
385
+ prec = token_value(current_token)
386
+ advance
387
+ elsif match?(:EMPTY)
388
+ explicit_empty = true
389
+ empty_marker = token_value(current_token)
348
390
  advance
349
391
  elsif match?(:IDENTIFIER) || match?(:STRING) || match?(:CHAR)
350
392
  symbol_token = current_token
351
- kind = if symbol_token.value.match?(/^[A-Z]/) || match?(:STRING) || match?(:CHAR)
393
+ name = token_value(symbol_token)
394
+ kind = if name.match?(/^[A-Z]/) || literal_token?(symbol_token)
352
395
  :terminal
353
396
  else
354
397
  :nonterminal
@@ -371,7 +414,7 @@ module Collie
371
414
  end
372
415
 
373
416
  symbols << AST::Symbol.new(
374
- name: symbol_token.value,
417
+ name: name,
375
418
  kind: kind,
376
419
  alias_name: alias_name,
377
420
  arguments: arguments,
@@ -394,6 +437,8 @@ module Collie
394
437
  symbols: symbols,
395
438
  action: action,
396
439
  prec: prec,
440
+ explicit_empty: explicit_empty,
441
+ empty_marker: empty_marker,
397
442
  location: symbols.first&.location || start_location
398
443
  )
399
444
  end
@@ -402,15 +447,30 @@ module Collie
402
447
  return nil unless match?(:SECTION_SEPARATOR)
403
448
 
404
449
  advance
450
+
451
+ if match?(:EPILOGUE)
452
+ token = current_token
453
+ advance
454
+ return AST::Epilogue.new(code: token.value, location: token.location) unless token.value.empty?
455
+ end
456
+
405
457
  code = +""
406
458
 
407
459
  until match?(:EOF)
408
- code << current_token.value
460
+ code << token_value(current_token)
409
461
  advance
410
462
  end
411
463
 
412
464
  AST::Epilogue.new(code: code, location: current_token.location) unless code.empty?
413
465
  end
466
+
467
+ def token_value(token)
468
+ token.raw_value || token.value
469
+ end
470
+
471
+ def literal_token?(token)
472
+ token.type == :STRING || token.type == :CHAR
473
+ end
414
474
  end
415
475
  end
416
476
  end
@@ -12,14 +12,27 @@ module Collie
12
12
 
13
13
  def format_offense(offense)
14
14
  level = github_level(offense.severity)
15
- file = offense.location.file
15
+ file = escape_property(offense.location.file)
16
16
  line = offense.location.line
17
17
  col = offense.location.column
18
- message = offense.message.gsub(",", "%2C") # Escape commas
18
+ message = escape_data(offense.message)
19
19
 
20
20
  "::#{level} file=#{file},line=#{line},col=#{col}::#{message}"
21
21
  end
22
22
 
23
+ def escape_data(value)
24
+ value.to_s
25
+ .gsub("%", "%25")
26
+ .gsub("\r", "%0D")
27
+ .gsub("\n", "%0A")
28
+ end
29
+
30
+ def escape_property(value)
31
+ escape_data(value)
32
+ .gsub(":", "%3A")
33
+ .gsub(",", "%2C")
34
+ end
35
+
23
36
  def github_level(severity)
24
37
  case severity
25
38
  when :error
@@ -40,10 +40,13 @@ module Collie
40
40
  rule: offense.rule.rule_name,
41
41
  severity: offense.severity,
42
42
  message: offense.message,
43
+ autocorrectable: offense.autocorrectable?,
43
44
  location: {
44
45
  line: offense.location.line,
45
46
  column: offense.location.column,
46
- length: offense.location.length
47
+ length: offense.location.length,
48
+ end_line: offense.location.line,
49
+ end_column: offense.location.column + offense.location.length
47
50
  }
48
51
  }
49
52
  end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Collie
6
+ module Reporter
7
+ # SARIF 2.1.0 reporter for code scanning integrations.
8
+ class Sarif
9
+ def report(offenses)
10
+ JSON.pretty_generate(
11
+ version: "2.1.0",
12
+ "$schema": "https://json.schemastore.org/sarif-2.1.0.json",
13
+ runs: [
14
+ {
15
+ tool: {
16
+ driver: {
17
+ name: "Collie",
18
+ informationUri: "https://github.com/ydah/collie",
19
+ rules: rules(offenses)
20
+ }
21
+ },
22
+ results: offenses.map { |offense| result(offense) }
23
+ }
24
+ ]
25
+ )
26
+ end
27
+
28
+ private
29
+
30
+ def rules(offenses)
31
+ offenses.map(&:rule).uniq(&:rule_name).map do |rule|
32
+ {
33
+ id: rule.rule_name,
34
+ name: rule.rule_name,
35
+ shortDescription: {
36
+ text: rule.description
37
+ },
38
+ defaultConfiguration: {
39
+ level: level(rule.severity)
40
+ }
41
+ }
42
+ end
43
+ end
44
+
45
+ def result(offense)
46
+ {
47
+ ruleId: offense.rule.rule_name,
48
+ level: level(offense.severity),
49
+ message: {
50
+ text: offense.message
51
+ },
52
+ locations: [
53
+ {
54
+ physicalLocation: {
55
+ artifactLocation: {
56
+ uri: offense.location.file
57
+ },
58
+ region: {
59
+ startLine: offense.location.line,
60
+ startColumn: offense.location.column,
61
+ charLength: offense.location.length
62
+ }
63
+ }
64
+ }
65
+ ]
66
+ }
67
+ end
68
+
69
+ def level(severity)
70
+ case severity
71
+ when :error
72
+ "error"
73
+ when :warning, :convention
74
+ "warning"
75
+ else
76
+ "note"
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Collie
4
- VERSION = "0.1.0"
4
+ VERSION = "1.0.0"
5
5
  end
data/lib/collie.rb CHANGED
@@ -1,22 +1,27 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "collie/version"
4
- require_relative "collie/cli"
5
4
  require_relative "collie/config"
5
+ require_relative "collie/config/schema"
6
+ require_relative "collie/cli"
6
7
  require_relative "collie/ast"
7
8
  require_relative "collie/parser/lexer"
8
9
  require_relative "collie/parser/parser"
10
+ require_relative "collie/parser/debug_serializer"
9
11
  require_relative "collie/analyzer/symbol_table"
12
+ require_relative "collie/analyzer/symbol_resolver"
10
13
  require_relative "collie/analyzer/reachability"
11
14
  require_relative "collie/analyzer/recursion"
12
15
  require_relative "collie/analyzer/conflict"
13
16
  require_relative "collie/linter/base"
14
17
  require_relative "collie/linter/registry"
18
+ require_relative "collie/formatter/signature"
15
19
  require_relative "collie/formatter/formatter"
16
20
  require_relative "collie/formatter/options"
17
21
  require_relative "collie/reporter/text"
18
22
  require_relative "collie/reporter/json"
19
23
  require_relative "collie/reporter/github"
24
+ require_relative "collie/reporter/sarif"
20
25
 
21
26
  # Collie is a linter and formatter for Lrama Style BNF grammar files (.y files).
22
27
  #