syntax_search 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+ module SyntaxErrorSearch
3
+ # This class is responsible for taking a code block that exists
4
+ # at a far indentaion and then iteratively increasing the block
5
+ # so that it captures everything within the same indentation block.
6
+ #
7
+ # def dog
8
+ # puts "bow"
9
+ # puts "wow"
10
+ # end
11
+ #
12
+ # block = BlockExpand.new(code_lines: code_lines)
13
+ # .call(CodeBlock.new(lines: code_lines[1]))
14
+ #
15
+ # puts block.to_s
16
+ # # => puts "bow"
17
+ # puts "wow"
18
+ #
19
+ #
20
+ # Once a code block has captured everything at a given indentation level
21
+ # then it will expand to capture surrounding indentation.
22
+ #
23
+ # block = BlockExpand.new(code_lines: code_lines)
24
+ # .call(block)
25
+ #
26
+ # block.to_s
27
+ # # => def dog
28
+ # puts "bow"
29
+ # puts "wow"
30
+ # end
31
+ #
32
+ class BlockExpand
33
+ def initialize(code_lines: )
34
+ @code_lines = code_lines
35
+ end
36
+
37
+ def call(block)
38
+ if (next_block = expand_neighbors(block, grab_empty: true))
39
+ return next_block
40
+ end
41
+
42
+ expand_indent(block)
43
+ end
44
+
45
+ def expand_indent(block)
46
+ block = AroundBlockScan.new(code_lines: @code_lines, block: block)
47
+ .skip(:hidden?)
48
+ .stop_after_kw
49
+ .scan_adjacent_indent
50
+ .code_block
51
+ end
52
+
53
+ def expand_neighbors(block, grab_empty: true)
54
+ scan = AroundBlockScan.new(code_lines: @code_lines, block: block)
55
+ .skip(:hidden?)
56
+ .stop_after_kw
57
+ .scan_neighbors
58
+
59
+ # Slurp up empties
60
+ if grab_empty
61
+ scan = AroundBlockScan.new(code_lines: @code_lines, block: scan.code_block)
62
+ .scan_while {|line| line.empty? || line.hidden? }
63
+ end
64
+
65
+ new_block = scan.code_block
66
+
67
+ if block.lines == new_block.lines
68
+ return nil
69
+ else
70
+ return new_block
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxErrorSearch
4
+
5
+ # Given a block, this method will capture surrounding
6
+ # code to give the user more context for the location of
7
+ # the problem.
8
+ #
9
+ # Return is an array of CodeLines to be rendered.
10
+ #
11
+ # Surrounding code is captured regardless of visible state
12
+ #
13
+ # puts block.to_s # => "def bark"
14
+ #
15
+ # context = CaptureCodeContext.new(
16
+ # blocks: block,
17
+ # code_lines: code_lines
18
+ # )
19
+ #
20
+ # puts context.call.join
21
+ # # =>
22
+ # class Dog
23
+ # def bark
24
+ # end
25
+ #
26
+ class CaptureCodeContext
27
+ attr_reader :code_lines
28
+
29
+ def initialize(blocks: , code_lines:)
30
+ @blocks = Array(blocks)
31
+ @code_lines = code_lines
32
+ @visible_lines = @blocks.map(&:visible_lines).flatten
33
+ @lines_to_output = @visible_lines.dup
34
+ end
35
+
36
+ def call
37
+ @blocks.each do |block|
38
+ around_lines = AroundBlockScan.new(code_lines: @code_lines, block: block)
39
+ .start_at_next_line
40
+ .capture_neighbor_context
41
+
42
+ around_lines -= block.lines
43
+
44
+ @lines_to_output.concat(around_lines)
45
+
46
+ AroundBlockScan.new(
47
+ block: block,
48
+ code_lines: @code_lines,
49
+ ).on_falling_indent do |line|
50
+ @lines_to_output << line
51
+ end
52
+ end
53
+
54
+ @lines_to_output.select!(&:not_empty?)
55
+ @lines_to_output.select!(&:not_comment?)
56
+ @lines_to_output.uniq!
57
+ @lines_to_output.sort!
58
+
59
+ return @lines_to_output
60
+ end
61
+ end
62
+ end
@@ -3,11 +3,7 @@
3
3
  module SyntaxErrorSearch
4
4
  # Multiple lines form a singular CodeBlock
5
5
  #
6
- # Source code is made of multiple CodeBlocks. A code block
7
- # has a reference to the source code that created itself, this allows
8
- # a code block to "expand" when needed
9
- #
10
- # The most important ability of a CodeBlock is this ability to expand:
6
+ # Source code is made of multiple CodeBlocks.
11
7
  #
12
8
  # Example:
13
9
  #
@@ -16,33 +12,39 @@ module SyntaxErrorSearch
16
12
  # # puts "foo"
17
13
  # # end
18
14
  #
19
- # code_block.expand_until_next_boundry
15
+ # code_block.valid? # => true
16
+ # code_block.in_valid? # => false
20
17
  #
21
- # code_block.to_s # =>
22
- # # class Foo
23
- # # def foo
24
- # # puts "foo"
25
- # # end
26
- # # end
27
18
  #
28
19
  class CodeBlock
29
20
  attr_reader :lines
30
21
 
31
- def initialize(code_lines: nil, lines: [])
22
+ def initialize(lines: [])
32
23
  @lines = Array(lines)
33
- @code_lines = code_lines
24
+ end
25
+
26
+ def visible_lines
27
+ @lines.select(&:visible?).select(&:not_empty?)
28
+ end
29
+
30
+ def mark_invisible
31
+ @lines.map(&:mark_invisible)
34
32
  end
35
33
 
36
34
  def is_end?
37
35
  to_s.strip == "end"
38
36
  end
39
37
 
38
+ def hidden?
39
+ @lines.all?(&:hidden?)
40
+ end
41
+
40
42
  def starts_at
41
- @lines.first&.line_number
43
+ @starts_at ||= @lines.first&.line_number
42
44
  end
43
45
 
44
- def code_lines
45
- @code_lines
46
+ def ends_at
47
+ @ends_at ||= @lines.last&.line_number
46
48
  end
47
49
 
48
50
  # This is used for frontier ordering, we are searching from
@@ -50,158 +52,15 @@ module SyntaxErrorSearch
50
52
  # populate an array with multiple code blocks then call `sort!`
51
53
  # on it without having to specify the sorting criteria
52
54
  def <=>(other)
53
- self.current_indent <=> other.current_indent
54
- end
55
-
56
- # Only the lines that are not empty and visible
57
- def visible_lines
58
- @lines
59
- .select(&:not_empty?)
60
- .select(&:visible?)
61
- end
62
-
63
- # This method is used to expand a code block to capture it's calling context
64
- def expand_until_next_boundry
65
- expand_to_indent(next_indent)
66
- self
67
- end
68
-
69
- # This method expands the given code block until it captures
70
- # its nearest neighbors. This is used to expand a single line of code
71
- # to its smallest likely block.
72
- #
73
- # code_block.to_s # =>
74
- # # puts "foo"
75
- # code_block.expand_until_neighbors
76
- #
77
- # code_block.to_s # =>
78
- # # puts "foo"
79
- # # puts "bar"
80
- # # puts "baz"
81
- #
82
- def expand_until_neighbors
83
- expand_to_indent(current_indent)
84
-
85
- expand_hidden_parner_line if self.to_s.strip == "end"
86
- self
87
- end
88
-
89
- def expand_hidden_parner_line
90
- index = @lines.first.index
91
- indent = current_indent
92
- partner_line = code_lines.select {|line| line.index < index && line.indent == indent }.last
93
-
94
- if partner_line&.hidden?
95
- partner_line.mark_visible
96
- @lines.prepend(partner_line)
97
- end
98
- end
99
-
100
- # This method expands the existing code block up (before)
101
- # and down (after). It will break on change in indentation
102
- # and empty lines.
103
- #
104
- # code_block.to_s # =>
105
- # # def foo
106
- # # puts "foo"
107
- # # end
108
- #
109
- # code_block.expand_to_indent(0)
110
- # code_block.to_s # =>
111
- # # class Foo
112
- # # def foo
113
- # # puts "foo"
114
- # # end
115
- # # end
116
- #
117
- private def expand_to_indent(indent)
118
- array = []
119
- before_lines(skip_empty: false).each do |line|
120
- if line.empty?
121
- array.prepend(line)
122
- break
123
- end
124
-
125
- if line.indent == indent
126
- array.prepend(line)
127
- else
128
- break
129
- end
130
- end
131
-
132
- array << @lines
133
-
134
- after_lines(skip_empty: false).each do |line|
135
- if line.empty?
136
- array << line
137
- break
138
- end
139
-
140
- if line.indent == indent
141
- array << line
142
- else
143
- break
144
- end
145
- end
55
+ out = self.current_indent <=> other.current_indent
56
+ return out if out != 0
146
57
 
147
- @lines = array.flatten
148
- end
149
-
150
- def next_indent
151
- [
152
- before_line&.indent || 0,
153
- after_line&.indent || 0
154
- ].max
58
+ # Stable sort
59
+ self.starts_at <=> other.starts_at
155
60
  end
156
61
 
157
62
  def current_indent
158
- lines.detect(&:not_empty?)&.indent || 0
159
- end
160
-
161
- def before_line
162
- before_lines.first
163
- end
164
-
165
- def after_line
166
- after_lines.first
167
- end
168
-
169
- def before_lines(skip_empty: true)
170
- index = @lines.first.index
171
- lines = code_lines.select {|line| line.index < index }
172
- lines.select!(&:not_empty?) if skip_empty
173
- lines.select!(&:visible?)
174
- lines.reverse!
175
-
176
- lines
177
- end
178
-
179
- def after_lines(skip_empty: true)
180
- index = @lines.last.index
181
- lines = code_lines.select {|line| line.index > index }
182
- lines.select!(&:not_empty?) if skip_empty
183
- lines.select!(&:visible?)
184
- lines
185
- end
186
-
187
- # Returns a code block of the source that does not include
188
- # the current lines. This is useful for checking if a source
189
- # with the given lines removed parses successfully. If so
190
- #
191
- # Then it's proof that the current block is invalid
192
- def block_without
193
- @block_without ||= CodeBlock.new(
194
- source: @source,
195
- lines: @source.code_lines - @lines
196
- )
197
- end
198
-
199
- def document_valid_without?
200
- block_without.valid?
201
- end
202
-
203
- def valid_without?
204
- block_without.valid?
63
+ @current_indent ||= lines.select(&:not_empty?).map(&:indent).min || 0
205
64
  end
206
65
 
207
66
  def invalid?
@@ -1,178 +1,43 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SyntaxErrorSearch
4
- # This class is responsible for generating, storing, and sorting code blocks
4
+ # The main function of the frontier is to hold the edges of our search and to
5
+ # evaluate when we can stop searching.
5
6
  #
6
- # The search algorithm for finding our syntax errors isn't in this class, but
7
- # this is class holds the bulk of the logic for generating, storing, detecting
8
- # and filtering invalid code.
7
+ # ## Knowing where we've been
9
8
  #
10
- # This is loosely based on the idea of a "frontier" for searching for a path
11
- # example: https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm
9
+ # Once a code block is generated it is added onto the frontier where it will be
10
+ # sorted and then the frontier can be filtered. Large blocks that totally contain a
11
+ # smaller block will cause the smaller block to be evicted.
12
12
  #
13
- # In this case our path is going from code with a syntax error to code without a
14
- # syntax error. We're currently doing that by evaluating individual lines
15
- # with respect to indentation and other whitespace (empty lines). As represented
16
- # by individual "code blocks".
13
+ # CodeFrontier#<<
14
+ # CodeFrontier#pop
17
15
  #
18
- # This class does not just store the frontier that we're searching, but is responsible
19
- # for generating new code blocks as well. This is not ideal, but the state of generating
20
- # and evaluating paths i.e. codeblocks is very tightly coupled.
16
+ # ## Knowing where we can go
21
17
  #
22
- # ## Creation
18
+ # Internally it keeps track of an "indent hash" which is exposed via `next_indent_line`
19
+ # when called this will return a line of code with the most indentation.
23
20
  #
24
- # This example code is re-used in the other sections
21
+ # This line of code can be used to build a CodeBlock via and then when that code block
22
+ # is added back to the frontier, then the lines in the code block are removed from the
23
+ # indent hash so we don't double-create the same block.
25
24
  #
26
- # Example:
25
+ # CodeFrontier#next_indent_line
26
+ # CodeFrontier#register_indent_block
27
27
  #
28
- # code_lines = [
29
- # CodeLine.new(line: "def cinco\n", index: 0)
30
- # CodeLine.new(line: " def dog\n", index: 1) # Syntax error 1
31
- # CodeLine.new(line: " def cat\n", index: 2) # Syntax error 2
32
- # CodeLine.new(line: "end\n", index: 3)
33
- # ]
28
+ # ## Knowing when to stop
34
29
  #
35
- # frontier = CodeFrontier.new(code_lines: code_lines)
30
+ # The frontier holds the syntax error when removing all code blocks from the original
31
+ # source document allows it to be parsed as syntatically valid:
36
32
  #
37
- # frontier << frontier.next_block if frontier.next_block?
38
- # frontier << frontier.next_block if frontier.next_block?
33
+ # CodeFrontier#holds_all_syntax_errors?
39
34
  #
40
- # frontier.holds_all_syntax_errors? # => true
41
- # block = frontier.pop
42
- # frontier.holds_all_syntax_errors? # => false
43
- # frontier << block
44
- # frontier.holds_all_syntax_errors? # => true
35
+ # ## Filtering false positives
45
36
  #
46
- # frontier.detect_invalid_blocks.map(&:to_s) # =>
47
- # [
48
- # "def dog\n",
49
- # "def cat\n"
50
- # ]
37
+ # Once the search is completed, the frontier will have many blocks that do not contain
38
+ # the syntax error. To filter to the smallest subset that does call:
51
39
  #
52
- # ## Block Generation
53
- #
54
- # Currently code blocks are generated based off of indentation. With the idea that blocks are,
55
- # well, indented. Once a code block is added to the frontier or it is expanded, or it is generated
56
- # then we also need to remove those lines from our generation code so we don't generate the same block
57
- # twice by accident.
58
- #
59
- # This is block generation is currently done via the "indent_hash" internally by starting at the outer
60
- # most indentation.
61
- #
62
- # Example:
63
- #
64
- # ```
65
- # def river
66
- # puts "lol" # <=== Start looking here and expand outwards
67
- # end
68
- # ```
69
- #
70
- # Generating new code blocks is a little verbose but looks like this:
71
- #
72
- # frontier << frontier.next_block if frontier.next_block?
73
- #
74
- # Once a block is in the frontier, it can be popped off:
75
- #
76
- # frontier.pop
77
- # # => <# CodeBlock >
78
- #
79
- # ## Block (frontier) storage, ordering and retrieval
80
- #
81
- # Once a block is generated it is stored internally in a frontier array. This is very similar to a search algorithm.
82
- # The array is sorted by indentation order, so that when a block is popped off the array, the one with
83
- # the largest current indentation is evaluated first.
84
- #
85
- # For example, if we have these two blocks in the frontier:
86
- #
87
- # ```
88
- # # Block A - 0 spaces for indentation
89
- #
90
- # def cinco
91
- # puts "lol"
92
- # end
93
- # ```
94
- #
95
- # ```
96
- # # Block B - 2 spaces for indentation
97
- #
98
- # def river
99
- # puts "hehe"
100
- # end
101
- # ```
102
- #
103
- # The "Block B" has more current indentation, so it would be evaluated first.
104
- #
105
- # ## Frontier evaluation (Find the syntax error)
106
- #
107
- # Another key difference between this and a normal search "frontier" is that we're not checking if
108
- # an individual code block meets the goal (turning invalid code to valid code) since there can
109
- # be multiple syntax errors and this will require multiple code blocks. To handle this, we're
110
- # evaluating all the contents of the frontier at the same time to see if the solution exists in any
111
- # of our search blocks.
112
- #
113
- # # Using the previously generated frontier
114
- #
115
- # frontier << Block.new(lines: code_lines[1], code_lines: code_lines)
116
- # frontier.holds_all_syntax_errors? # => false
117
- #
118
- # frontier << Block.new(lines: code_lines[2], code_lines: code_lines)
119
- # frontier.holds_all_syntax_errors? # => true
120
- #
121
- # ## Detect invalid blocks (Filter for smallest solution)
122
- #
123
- # After we prove that a solution exists and we've found it to be in our frontier, we can start stop searching.
124
- # Once we've done this, we need to search through the existing frontier code blocks to find the minimum combination
125
- # of blocks that hold the solution. This is done in: `detect_invalid_blocks`.
126
- #
127
- # # Using the previously generated frontier
128
- #
129
- # frontier << CodeBlock.new(lines: code_lines[0], code_lines: code_lines)
130
- # frontier << CodeBlock.new(lines: code_lines[1], code_lines: code_lines)
131
- # frontier << CodeBlock.new(lines: code_lines[2], code_lines: code_lines)
132
- # frontier << CodeBlock.new(lines: code_lines[3], code_lines: code_lines)
133
- #
134
- # frontier.count # => 4
135
- # frontier.detect_invalid_blocks.length => 2
136
- # frontier.detect_invalid_blocks.map(&:to_s) # =>
137
- # [
138
- # "def dog\n",
139
- # "def cat\n"
140
- # ]
141
- #
142
- # Once invalid blocks are found and filtered, then they can be passed to a formatter.
143
- #
144
- #
145
- #
146
-
147
- class IndentScan
148
- attr_reader :code_lines
149
-
150
- def initialize(code_lines: )
151
- @code_lines = code_lines
152
- end
153
-
154
- def neighbors_from_top(top_line)
155
- code_lines
156
- .select {|l| l.index >= top_line.index }
157
- .select {|l| l.not_empty? }
158
- .select {|l| l.visible? }
159
- .take_while {|l| l.indent >= top_line.indent }
160
- end
161
-
162
- def each_neighbor_block(top_line)
163
- neighbors = neighbors_from_top(top_line)
164
-
165
- until neighbors.empty?
166
- lines = [neighbors.pop]
167
- while (block = CodeBlock.new(lines: lines, code_lines: code_lines)) && block.invalid? && neighbors.any?
168
- lines.prepend neighbors.pop
169
- end
170
-
171
- yield block if block
172
- end
173
- end
174
- end
175
-
40
+ # CodeFrontier#detect_invalid_blocks
176
41
  class CodeFrontier
177
42
  def initialize(code_lines: )
178
43
  @code_lines = code_lines
@@ -207,16 +72,9 @@ module SyntaxErrorSearch
207
72
 
208
73
  # Returns a code block with the largest indentation possible
209
74
  def pop
210
- return nil if empty?
211
-
212
75
  return @frontier.pop
213
76
  end
214
77
 
215
- def next_block?
216
- !@indent_hash.empty?
217
- end
218
-
219
-
220
78
  def indent_hash_indent
221
79
  @indent_hash.keys.sort.last
222
80
  end
@@ -226,40 +84,25 @@ module SyntaxErrorSearch
226
84
  @indent_hash[indent]&.first
227
85
  end
228
86
 
229
- def generate_blocks
230
- end
231
-
232
- def next_block
233
- indent = @indent_hash.keys.sort.last
234
- lines = @indent_hash[indent].first
235
-
236
- block = CodeBlock.new(
237
- lines: lines,
238
- code_lines: @code_lines
239
- ).expand_until_neighbors
240
-
241
- register(block)
242
- block
243
- end
244
-
245
87
  def expand?
246
88
  return false if @frontier.empty?
247
89
  return true if @indent_hash.empty?
248
90
 
249
- @frontier.last.current_indent >= @indent_hash.keys.sort.last
250
- end
91
+ frontier_indent = @frontier.last.current_indent
92
+ hash_indent = @indent_hash.keys.sort.last
251
93
 
252
- # This method is responsible for determining if a new code
253
- # block should be generated instead of evaluating an already
254
- # existing block in the frontier
255
- def generate_new_block?
256
- return false if @indent_hash.empty?
257
- return true if @frontier.empty?
94
+ if ENV["DEBUG"]
95
+ puts "```"
96
+ puts @frontier.last.to_s
97
+ puts "```"
98
+ puts " @frontier indent: #{frontier_indent}"
99
+ puts " @hash indent: #{hash_indent}"
100
+ end
258
101
 
259
- @frontier.last.current_indent <= @indent_hash.keys.sort.last
102
+ frontier_indent >= hash_indent
260
103
  end
261
104
 
262
- def register(block)
105
+ def register_indent_block(block)
263
106
  block.lines.each do |line|
264
107
  @indent_hash[line.indent]&.delete(line)
265
108
  end
@@ -273,22 +116,18 @@ module SyntaxErrorSearch
273
116
  # and that each code block's lines are removed from the indentation hash so we
274
117
  # don't re-evaluate the same line multiple times.
275
118
  def <<(block)
276
- register(block)
119
+ register_indent_block(block)
277
120
 
121
+ # Make sure we don't double expand, if a code block fully engulfs another code block, keep the bigger one
122
+ @frontier.reject! {|b|
123
+ b.starts_at >= block.starts_at && b.ends_at <= block.ends_at
124
+ }
278
125
  @frontier << block
279
126
  @frontier.sort!
280
127
 
281
128
  self
282
129
  end
283
130
 
284
- def any?
285
- !empty?
286
- end
287
-
288
- def empty?
289
- @frontier.empty? && @indent_hash.empty?
290
- end
291
-
292
131
  # Example:
293
132
  #
294
133
  # combination([:a, :b, :c, :d])