syntax_suggest 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +91 -0
  3. data/.github/workflows/check_changelog.yml +20 -0
  4. data/.gitignore +14 -0
  5. data/.rspec +3 -0
  6. data/.standard.yml +1 -0
  7. data/CHANGELOG.md +158 -0
  8. data/CODE_OF_CONDUCT.md +74 -0
  9. data/Gemfile +14 -0
  10. data/Gemfile.lock +67 -0
  11. data/LICENSE.txt +21 -0
  12. data/README.md +229 -0
  13. data/Rakefile +8 -0
  14. data/bin/console +14 -0
  15. data/bin/setup +8 -0
  16. data/dead_end.gemspec +32 -0
  17. data/exe/syntax_suggest +7 -0
  18. data/lib/syntax_suggest/api.rb +199 -0
  19. data/lib/syntax_suggest/around_block_scan.rb +224 -0
  20. data/lib/syntax_suggest/block_expand.rb +74 -0
  21. data/lib/syntax_suggest/capture_code_context.rb +233 -0
  22. data/lib/syntax_suggest/clean_document.rb +304 -0
  23. data/lib/syntax_suggest/cli.rb +129 -0
  24. data/lib/syntax_suggest/code_block.rb +100 -0
  25. data/lib/syntax_suggest/code_frontier.rb +178 -0
  26. data/lib/syntax_suggest/code_line.rb +239 -0
  27. data/lib/syntax_suggest/code_search.rb +139 -0
  28. data/lib/syntax_suggest/core_ext.rb +101 -0
  29. data/lib/syntax_suggest/display_code_with_line_numbers.rb +70 -0
  30. data/lib/syntax_suggest/display_invalid_blocks.rb +84 -0
  31. data/lib/syntax_suggest/explain_syntax.rb +103 -0
  32. data/lib/syntax_suggest/left_right_lex_count.rb +168 -0
  33. data/lib/syntax_suggest/lex_all.rb +55 -0
  34. data/lib/syntax_suggest/lex_value.rb +70 -0
  35. data/lib/syntax_suggest/parse_blocks_from_indent_line.rb +60 -0
  36. data/lib/syntax_suggest/pathname_from_message.rb +59 -0
  37. data/lib/syntax_suggest/priority_engulf_queue.rb +63 -0
  38. data/lib/syntax_suggest/priority_queue.rb +105 -0
  39. data/lib/syntax_suggest/ripper_errors.rb +36 -0
  40. data/lib/syntax_suggest/unvisited_lines.rb +36 -0
  41. data/lib/syntax_suggest/version.rb +5 -0
  42. data/lib/syntax_suggest.rb +3 -0
  43. metadata +88 -0
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "left_right_lex_count"
4
+
5
+ module SyntaxSuggest
6
+ # Explains syntax errors based on their source
7
+ #
8
+ # example:
9
+ #
10
+ # source = "def foo; puts 'lol'" # Note missing end
11
+ # explain ExplainSyntax.new(
12
+ # code_lines: CodeLine.from_source(source)
13
+ # ).call
14
+ # explain.errors.first
15
+ # # => "Unmatched keyword, missing `end' ?"
16
+ #
17
+ # When the error cannot be determined by lexical counting
18
+ # then ripper is run against the input and the raw ripper
19
+ # errors returned.
20
+ #
21
+ # Example:
22
+ #
23
+ # source = "1 * " # Note missing a second number
24
+ # explain ExplainSyntax.new(
25
+ # code_lines: CodeLine.from_source(source)
26
+ # ).call
27
+ # explain.errors.first
28
+ # # => "syntax error, unexpected end-of-input"
29
+ class ExplainSyntax
30
+ INVERSE = {
31
+ "{" => "}",
32
+ "}" => "{",
33
+ "[" => "]",
34
+ "]" => "[",
35
+ "(" => ")",
36
+ ")" => "(",
37
+ "|" => "|"
38
+ }.freeze
39
+
40
+ def initialize(code_lines:)
41
+ @code_lines = code_lines
42
+ @left_right = LeftRightLexCount.new
43
+ @missing = nil
44
+ end
45
+
46
+ def call
47
+ @code_lines.each do |line|
48
+ line.lex.each do |lex|
49
+ @left_right.count_lex(lex)
50
+ end
51
+ end
52
+
53
+ self
54
+ end
55
+
56
+ # Returns an array of missing elements
57
+ #
58
+ # For example this:
59
+ #
60
+ # ExplainSyntax.new(code_lines: lines).missing
61
+ # # => ["}"]
62
+ #
63
+ # Would indicate that the source is missing
64
+ # a `}` character in the source code
65
+ def missing
66
+ @missing ||= @left_right.missing
67
+ end
68
+
69
+ # Converts a missing string to
70
+ # an human understandable explanation.
71
+ #
72
+ # Example:
73
+ #
74
+ # explain.why("}")
75
+ # # => "Unmatched `{', missing `}' ?"
76
+ #
77
+ def why(miss)
78
+ case miss
79
+ when "keyword"
80
+ "Unmatched `end', missing keyword (`do', `def`, `if`, etc.) ?"
81
+ when "end"
82
+ "Unmatched keyword, missing `end' ?"
83
+ else
84
+ inverse = INVERSE.fetch(miss) {
85
+ raise "Unknown explain syntax char or key: #{miss.inspect}"
86
+ }
87
+ "Unmatched `#{inverse}', missing `#{miss}' ?"
88
+ end
89
+ end
90
+
91
+ # Returns an array of syntax error messages
92
+ #
93
+ # If no missing pairs are found it falls back
94
+ # on the original ripper error messages
95
+ def errors
96
+ if missing.empty?
97
+ return RipperErrors.new(@code_lines.map(&:original).join).call.errors
98
+ end
99
+
100
+ missing.map { |miss| why(miss) }
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,168 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxSuggest
4
+ # Find mis-matched syntax based on lexical count
5
+ #
6
+ # Used for detecting missing pairs of elements
7
+ # each keyword needs an end, each '{' needs a '}'
8
+ # etc.
9
+ #
10
+ # Example:
11
+ #
12
+ # left_right = LeftRightLexCount.new
13
+ # left_right.count_kw
14
+ # left_right.missing.first
15
+ # # => "end"
16
+ #
17
+ # left_right = LeftRightLexCount.new
18
+ # source = "{ a: b, c: d" # Note missing '}'
19
+ # LexAll.new(source: source).each do |lex|
20
+ # left_right.count_lex(lex)
21
+ # end
22
+ # left_right.missing.first
23
+ # # => "}"
24
+ class LeftRightLexCount
25
+ def initialize
26
+ @kw_count = 0
27
+ @end_count = 0
28
+
29
+ @count_for_char = {
30
+ "{" => 0,
31
+ "}" => 0,
32
+ "[" => 0,
33
+ "]" => 0,
34
+ "(" => 0,
35
+ ")" => 0,
36
+ "|" => 0
37
+ }
38
+ end
39
+
40
+ def count_kw
41
+ @kw_count += 1
42
+ end
43
+
44
+ def count_end
45
+ @end_count += 1
46
+ end
47
+
48
+ # Count source code characters
49
+ #
50
+ # Example:
51
+ #
52
+ # left_right = LeftRightLexCount.new
53
+ # left_right.count_lex(LexValue.new(1, :on_lbrace, "{", Ripper::EXPR_BEG))
54
+ # left_right.count_for_char("{")
55
+ # # => 1
56
+ # left_right.count_for_char("}")
57
+ # # => 0
58
+ def count_lex(lex)
59
+ case lex.type
60
+ when :on_tstring_content
61
+ # ^^^
62
+ # Means it's a string or a symbol `"{"` rather than being
63
+ # part of a data structure (like a hash) `{ a: b }`
64
+ # ignore it.
65
+ when :on_words_beg, :on_symbos_beg, :on_qwords_beg,
66
+ :on_qsymbols_beg, :on_regexp_beg, :on_tstring_beg
67
+ # ^^^
68
+ # Handle shorthand syntaxes like `%Q{ i am a string }`
69
+ #
70
+ # The start token will be the full thing `%Q{` but we
71
+ # need to count it as if it's a `{`. Any token
72
+ # can be used
73
+ char = lex.token[-1]
74
+ @count_for_char[char] += 1 if @count_for_char.key?(char)
75
+ when :on_embexpr_beg
76
+ # ^^^
77
+ # Embedded string expressions like `"#{foo} <-embed"`
78
+ # are parsed with chars:
79
+ #
80
+ # `#{` as :on_embexpr_beg
81
+ # `}` as :on_embexpr_end
82
+ #
83
+ # We cannot ignore both :on_emb_expr_beg and :on_embexpr_end
84
+ # because sometimes the lexer thinks something is an embed
85
+ # string end, when it is not like `lol = }` (no clue why).
86
+ #
87
+ # When we see `#{` count it as a `{` or we will
88
+ # have a mis-match count.
89
+ #
90
+ case lex.token
91
+ when "\#{"
92
+ @count_for_char["{"] += 1
93
+ end
94
+ else
95
+ @end_count += 1 if lex.is_end?
96
+ @kw_count += 1 if lex.is_kw?
97
+ @count_for_char[lex.token] += 1 if @count_for_char.key?(lex.token)
98
+ end
99
+ end
100
+
101
+ def count_for_char(char)
102
+ @count_for_char[char]
103
+ end
104
+
105
+ # Returns an array of missing syntax characters
106
+ # or `"end"` or `"keyword"`
107
+ #
108
+ # left_right.missing
109
+ # # => ["}"]
110
+ def missing
111
+ out = missing_pairs
112
+ out << missing_pipe
113
+ out << missing_keyword_end
114
+ out.compact!
115
+ out
116
+ end
117
+
118
+ PAIRS = {
119
+ "{" => "}",
120
+ "[" => "]",
121
+ "(" => ")"
122
+ }.freeze
123
+
124
+ # Opening characters like `{` need closing characters # like `}`.
125
+ #
126
+ # When a mis-match count is detected, suggest the
127
+ # missing member.
128
+ #
129
+ # For example if there are 3 `}` and only two `{`
130
+ # return `"{"`
131
+ private def missing_pairs
132
+ PAIRS.map do |(left, right)|
133
+ case @count_for_char[left] <=> @count_for_char[right]
134
+ when 1
135
+ right
136
+ when 0
137
+ nil
138
+ when -1
139
+ left
140
+ end
141
+ end
142
+ end
143
+
144
+ # Keywords need ends and ends need keywords
145
+ #
146
+ # If we have more keywords, there's a missing `end`
147
+ # if we have more `end`-s, there's a missing keyword
148
+ private def missing_keyword_end
149
+ case @kw_count <=> @end_count
150
+ when 1
151
+ "end"
152
+ when 0
153
+ nil
154
+ when -1
155
+ "keyword"
156
+ end
157
+ end
158
+
159
+ # Pipes come in pairs.
160
+ # If there's an odd number of pipes then we
161
+ # are missing one
162
+ private def missing_pipe
163
+ if @count_for_char["|"].odd?
164
+ "|"
165
+ end
166
+ end
167
+ end
168
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxSuggest
4
+ # Ripper.lex is not guaranteed to lex the entire source document
5
+ #
6
+ # lex = LexAll.new(source: source)
7
+ # lex.each do |value|
8
+ # puts value.line
9
+ # end
10
+ class LexAll
11
+ include Enumerable
12
+
13
+ def initialize(source:, source_lines: nil)
14
+ @lex = Ripper::Lexer.new(source, "-", 1).parse.sort_by(&:pos)
15
+ lineno = @lex.last.pos.first + 1
16
+ source_lines ||= source.lines
17
+ last_lineno = source_lines.length
18
+
19
+ until lineno >= last_lineno
20
+ lines = source_lines[lineno..-1]
21
+
22
+ @lex.concat(
23
+ Ripper::Lexer.new(lines.join, "-", lineno + 1).parse.sort_by(&:pos)
24
+ )
25
+ lineno = @lex.last.pos.first + 1
26
+ end
27
+
28
+ last_lex = nil
29
+ @lex.map! { |elem|
30
+ last_lex = LexValue.new(elem.pos.first, elem.event, elem.tok, elem.state, last_lex)
31
+ }
32
+ end
33
+
34
+ def to_a
35
+ @lex
36
+ end
37
+
38
+ def each
39
+ return @lex.each unless block_given?
40
+ @lex.each do |x|
41
+ yield x
42
+ end
43
+ end
44
+
45
+ def [](index)
46
+ @lex[index]
47
+ end
48
+
49
+ def last
50
+ @lex.last
51
+ end
52
+ end
53
+ end
54
+
55
+ require_relative "lex_value"
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxSuggest
4
+ # Value object for accessing lex values
5
+ #
6
+ # This lex:
7
+ #
8
+ # [1, 0], :on_ident, "describe", CMDARG
9
+ #
10
+ # Would translate into:
11
+ #
12
+ # lex.line # => 1
13
+ # lex.type # => :on_indent
14
+ # lex.token # => "describe"
15
+ class LexValue
16
+ attr_reader :line, :type, :token, :state
17
+
18
+ def initialize(line, type, token, state, last_lex = nil)
19
+ @line = line
20
+ @type = type
21
+ @token = token
22
+ @state = state
23
+
24
+ set_kw_end(last_lex)
25
+ end
26
+
27
+ private def set_kw_end(last_lex)
28
+ @is_end = false
29
+ @is_kw = false
30
+ return if type != :on_kw
31
+ #
32
+ return if last_lex && last_lex.fname? # https://github.com/ruby/ruby/commit/776759e300e4659bb7468e2b97c8c2d4359a2953
33
+
34
+ case token
35
+ when "if", "unless", "while", "until"
36
+ # Only count if/unless when it's not a "trailing" if/unless
37
+ # https://github.com/ruby/ruby/blob/06b44f819eb7b5ede1ff69cecb25682b56a1d60c/lib/irb/ruby-lex.rb#L374-L375
38
+ @is_kw = true unless expr_label?
39
+ when "def", "case", "for", "begin", "class", "module", "do"
40
+ @is_kw = true
41
+ when "end"
42
+ @is_end = true
43
+ end
44
+ end
45
+
46
+ def fname?
47
+ state.allbits?(Ripper::EXPR_FNAME)
48
+ end
49
+
50
+ def ignore_newline?
51
+ type == :on_ignored_nl
52
+ end
53
+
54
+ def is_end?
55
+ @is_end
56
+ end
57
+
58
+ def is_kw?
59
+ @is_kw
60
+ end
61
+
62
+ def expr_beg?
63
+ state.anybits?(Ripper::EXPR_BEG)
64
+ end
65
+
66
+ def expr_label?
67
+ state.allbits?(Ripper::EXPR_LABEL)
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxSuggest
4
+ # This class is responsible for generating initial code blocks
5
+ # that will then later be expanded.
6
+ #
7
+ # The biggest concern when guessing code blocks, is accidentally
8
+ # grabbing one that contains only an "end". In this example:
9
+ #
10
+ # def dog
11
+ # begonn # mispelled `begin`
12
+ # puts "bark"
13
+ # end
14
+ # end
15
+ #
16
+ # The following lines would be matched (from bottom to top):
17
+ #
18
+ # 1) end
19
+ #
20
+ # 2) puts "bark"
21
+ # end
22
+ #
23
+ # 3) begonn
24
+ # puts "bark"
25
+ # end
26
+ #
27
+ # At this point it has no where else to expand, and it will yield this inner
28
+ # code as a block
29
+ class ParseBlocksFromIndentLine
30
+ attr_reader :code_lines
31
+
32
+ def initialize(code_lines:)
33
+ @code_lines = code_lines
34
+ end
35
+
36
+ # Builds blocks from bottom up
37
+ def each_neighbor_block(target_line)
38
+ scan = AroundBlockScan.new(code_lines: code_lines, block: CodeBlock.new(lines: target_line))
39
+ .skip(:empty?)
40
+ .skip(:hidden?)
41
+ .scan_while { |line| line.indent >= target_line.indent }
42
+
43
+ neighbors = scan.code_block.lines
44
+
45
+ block = CodeBlock.new(lines: neighbors)
46
+ if neighbors.length <= 2 || block.valid?
47
+ yield block
48
+ else
49
+ until neighbors.empty?
50
+ lines = [neighbors.pop]
51
+ while (block = CodeBlock.new(lines: lines)) && block.invalid? && neighbors.any?
52
+ lines.prepend neighbors.pop
53
+ end
54
+
55
+ yield block if block
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxSuggest
4
+ # Converts a SyntaxError message to a path
5
+ #
6
+ # Handles the case where the filename has a colon in it
7
+ # such as on a windows file system: https://github.com/zombocom/syntax_suggest/issues/111
8
+ #
9
+ # Example:
10
+ #
11
+ # message = "/tmp/scratch:2:in `require_relative': /private/tmp/bad.rb:1: syntax error, unexpected `end' (SyntaxError)"
12
+ # puts PathnameFromMessage.new(message).call.name
13
+ # # => "/tmp/scratch.rb"
14
+ #
15
+ class PathnameFromMessage
16
+ EVAL_RE = /^\(eval\):\d+/
17
+ STREAMING_RE = /^-:\d+/
18
+ attr_reader :name
19
+
20
+ def initialize(message, io: $stderr)
21
+ @line = message.lines.first
22
+ @parts = @line.split(":")
23
+ @guess = []
24
+ @name = nil
25
+ @io = io
26
+ end
27
+
28
+ def call
29
+ if skip_missing_file_name?
30
+ if ENV["SYNTAX_SUGGEST_DEBUG"]
31
+ @io.puts "SyntaxSuggest: Could not find filename from #{@line.inspect}"
32
+ end
33
+ else
34
+ until stop?
35
+ @guess << @parts.shift
36
+ @name = Pathname(@guess.join(":"))
37
+ end
38
+
39
+ if @parts.empty?
40
+ @io.puts "SyntaxSuggest: Could not find filename from #{@line.inspect}"
41
+ @name = nil
42
+ end
43
+ end
44
+
45
+ self
46
+ end
47
+
48
+ def stop?
49
+ return true if @parts.empty?
50
+ return false if @guess.empty?
51
+
52
+ @name&.exist?
53
+ end
54
+
55
+ def skip_missing_file_name?
56
+ @line.match?(EVAL_RE) || @line.match?(STREAMING_RE)
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxSuggest
4
+ # Keeps track of what elements are in the queue in
5
+ # priority and also ensures that when one element
6
+ # engulfs/covers/eats another that the larger element
7
+ # evicts the smaller element
8
+ class PriorityEngulfQueue
9
+ def initialize
10
+ @queue = PriorityQueue.new
11
+ end
12
+
13
+ def to_a
14
+ @queue.to_a
15
+ end
16
+
17
+ def empty?
18
+ @queue.empty?
19
+ end
20
+
21
+ def length
22
+ @queue.length
23
+ end
24
+
25
+ def peek
26
+ @queue.peek
27
+ end
28
+
29
+ def pop
30
+ @queue.pop
31
+ end
32
+
33
+ def push(block)
34
+ prune_engulf(block)
35
+ @queue << block
36
+ flush_deleted
37
+
38
+ self
39
+ end
40
+
41
+ private def flush_deleted
42
+ while @queue&.peek&.deleted?
43
+ @queue.pop
44
+ end
45
+ end
46
+
47
+ private def prune_engulf(block)
48
+ # If we're about to pop off the same block, we can skip deleting
49
+ # things from the frontier this iteration since we'll get it
50
+ # on the next iteration
51
+ return if @queue.peek && (block <=> @queue.peek) == 1
52
+
53
+ if block.starts_at != block.ends_at # A block of size 1 cannot engulf another
54
+ @queue.to_a.each { |b|
55
+ if b.starts_at >= block.starts_at && b.ends_at <= block.ends_at
56
+ b.delete
57
+ true
58
+ end
59
+ }
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxSuggest
4
+ # Holds elements in a priority heap on insert
5
+ #
6
+ # Instead of constantly calling `sort!`, put
7
+ # the element where it belongs the first time
8
+ # around
9
+ #
10
+ # Example:
11
+ #
12
+ # queue = PriorityQueue.new
13
+ # queue << 33
14
+ # queue << 44
15
+ # queue << 1
16
+ #
17
+ # puts queue.peek # => 44
18
+ #
19
+ class PriorityQueue
20
+ attr_reader :elements
21
+
22
+ def initialize
23
+ @elements = []
24
+ end
25
+
26
+ def <<(element)
27
+ @elements << element
28
+ bubble_up(last_index, element)
29
+ end
30
+
31
+ def pop
32
+ exchange(0, last_index)
33
+ max = @elements.pop
34
+ bubble_down(0)
35
+ max
36
+ end
37
+
38
+ def length
39
+ @elements.length
40
+ end
41
+
42
+ def empty?
43
+ @elements.empty?
44
+ end
45
+
46
+ def peek
47
+ @elements.first
48
+ end
49
+
50
+ def to_a
51
+ @elements
52
+ end
53
+
54
+ # Used for testing, extremely not performant
55
+ def sorted
56
+ out = []
57
+ elements = @elements.dup
58
+ while (element = pop)
59
+ out << element
60
+ end
61
+ @elements = elements
62
+ out.reverse
63
+ end
64
+
65
+ private def last_index
66
+ @elements.size - 1
67
+ end
68
+
69
+ private def bubble_up(index, element)
70
+ return if index <= 0
71
+
72
+ parent_index = (index - 1) / 2
73
+ parent = @elements[parent_index]
74
+
75
+ return if (parent <=> element) >= 0
76
+
77
+ exchange(index, parent_index)
78
+ bubble_up(parent_index, element)
79
+ end
80
+
81
+ private def bubble_down(index)
82
+ child_index = (index * 2) + 1
83
+
84
+ return if child_index > last_index
85
+
86
+ not_the_last_element = child_index < last_index
87
+ left_element = @elements[child_index]
88
+ right_element = @elements[child_index + 1]
89
+
90
+ child_index += 1 if not_the_last_element && (right_element <=> left_element) == 1
91
+
92
+ return if (@elements[index] <=> @elements[child_index]) >= 0
93
+
94
+ exchange(index, child_index)
95
+ bubble_down(child_index)
96
+ end
97
+
98
+ def exchange(source, target)
99
+ a = @elements[source]
100
+ b = @elements[target]
101
+ @elements[source] = b
102
+ @elements[target] = a
103
+ end
104
+ end
105
+ end