dead_end 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DeadEnd
4
+ # Multiple lines form a singular CodeBlock
5
+ #
6
+ # Source code is made of multiple CodeBlocks.
7
+ #
8
+ # Example:
9
+ #
10
+ # code_block.to_s # =>
11
+ # # def foo
12
+ # # puts "foo"
13
+ # # end
14
+ #
15
+ # code_block.valid? # => true
16
+ # code_block.in_valid? # => false
17
+ #
18
+ #
19
+ class CodeBlock
20
+ attr_reader :lines
21
+
22
+ def initialize(lines: [])
23
+ @lines = Array(lines)
24
+ end
25
+
26
+ def visible_lines
27
+ @lines.select(&:visible?).select(&:not_empty?)
28
+ end
29
+
30
+ def mark_invisible
31
+ @lines.map(&:mark_invisible)
32
+ end
33
+
34
+ def is_end?
35
+ to_s.strip == "end"
36
+ end
37
+
38
+ def hidden?
39
+ @lines.all?(&:hidden?)
40
+ end
41
+
42
+ def starts_at
43
+ @starts_at ||= @lines.first&.line_number
44
+ end
45
+
46
+ def ends_at
47
+ @ends_at ||= @lines.last&.line_number
48
+ end
49
+
50
+ # This is used for frontier ordering, we are searching from
51
+ # the largest indentation to the smallest. This allows us to
52
+ # populate an array with multiple code blocks then call `sort!`
53
+ # on it without having to specify the sorting criteria
54
+ def <=>(other)
55
+ out = self.current_indent <=> other.current_indent
56
+ return out if out != 0
57
+
58
+ # Stable sort
59
+ self.starts_at <=> other.starts_at
60
+ end
61
+
62
+ def current_indent
63
+ @current_indent ||= lines.select(&:not_empty?).map(&:indent).min || 0
64
+ end
65
+
66
+ def invalid?
67
+ !valid?
68
+ end
69
+
70
+ def valid?
71
+ DeadEnd.valid?(self.to_s)
72
+ end
73
+
74
+ def to_s
75
+ @lines.join
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,151 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DeadEnd
4
+ # The main function of the frontier is to hold the edges of our search and to
5
+ # evaluate when we can stop searching.
6
+ #
7
+ # ## Knowing where we've been
8
+ #
9
+ # Once a code block is generated it is added onto the frontier where it will be
10
+ # sorted and then the frontier can be filtered. Large blocks that totally contain a
11
+ # smaller block will cause the smaller block to be evicted.
12
+ #
13
+ # CodeFrontier#<<
14
+ # CodeFrontier#pop
15
+ #
16
+ # ## Knowing where we can go
17
+ #
18
+ # Internally it keeps track of an "indent hash" which is exposed via `next_indent_line`
19
+ # when called this will return a line of code with the most indentation.
20
+ #
21
+ # This line of code can be used to build a CodeBlock via and then when that code block
22
+ # is added back to the frontier, then the lines in the code block are removed from the
23
+ # indent hash so we don't double-create the same block.
24
+ #
25
+ # CodeFrontier#next_indent_line
26
+ # CodeFrontier#register_indent_block
27
+ #
28
+ # ## Knowing when to stop
29
+ #
30
+ # The frontier holds the syntax error when removing all code blocks from the original
31
+ # source document allows it to be parsed as syntatically valid:
32
+ #
33
+ # CodeFrontier#holds_all_syntax_errors?
34
+ #
35
+ # ## Filtering false positives
36
+ #
37
+ # Once the search is completed, the frontier will have many blocks that do not contain
38
+ # the syntax error. To filter to the smallest subset that does call:
39
+ #
40
+ # CodeFrontier#detect_invalid_blocks
41
+ class CodeFrontier
42
+ def initialize(code_lines: )
43
+ @code_lines = code_lines
44
+ @frontier = []
45
+ @indent_hash = {}
46
+ code_lines.each do |line|
47
+ next if line.empty?
48
+
49
+ @indent_hash[line.indent] ||= []
50
+ @indent_hash[line.indent] << line
51
+ end
52
+ end
53
+
54
+ def count
55
+ @frontier.count
56
+ end
57
+
58
+ # Returns true if the document is valid with all lines
59
+ # removed. By default it checks all blocks in present in
60
+ # the frontier array, but can be used for arbitrary arrays
61
+ # of codeblocks as well
62
+ def holds_all_syntax_errors?(block_array = @frontier)
63
+ without_lines = block_array.map do |block|
64
+ block.lines
65
+ end
66
+
67
+ DeadEnd.valid_without?(
68
+ without_lines: without_lines,
69
+ code_lines: @code_lines
70
+ )
71
+ end
72
+
73
+ # Returns a code block with the largest indentation possible
74
+ def pop
75
+ return @frontier.pop
76
+ end
77
+
78
+ def indent_hash_indent
79
+ @indent_hash.keys.sort.last
80
+ end
81
+
82
+ def next_indent_line
83
+ indent = @indent_hash.keys.sort.last
84
+ @indent_hash[indent]&.first
85
+ end
86
+
87
+ def expand?
88
+ return false if @frontier.empty?
89
+ return true if @indent_hash.empty?
90
+
91
+ frontier_indent = @frontier.last.current_indent
92
+ hash_indent = @indent_hash.keys.sort.last
93
+
94
+ if ENV["DEBUG"]
95
+ puts "```"
96
+ puts @frontier.last.to_s
97
+ puts "```"
98
+ puts " @frontier indent: #{frontier_indent}"
99
+ puts " @hash indent: #{hash_indent}"
100
+ end
101
+
102
+ frontier_indent >= hash_indent
103
+ end
104
+
105
+ def register_indent_block(block)
106
+ block.lines.each do |line|
107
+ @indent_hash[line.indent]&.delete(line)
108
+ end
109
+ @indent_hash.select! {|k, v| !v.empty?}
110
+ self
111
+ end
112
+
113
+ # Add a block to the frontier
114
+ #
115
+ # This method ensures the frontier always remains sorted (in indentation order)
116
+ # and that each code block's lines are removed from the indentation hash so we
117
+ # don't re-evaluate the same line multiple times.
118
+ def <<(block)
119
+ register_indent_block(block)
120
+
121
+ # Make sure we don't double expand, if a code block fully engulfs another code block, keep the bigger one
122
+ @frontier.reject! {|b|
123
+ b.starts_at >= block.starts_at && b.ends_at <= block.ends_at
124
+ }
125
+ @frontier << block
126
+ @frontier.sort!
127
+
128
+ self
129
+ end
130
+
131
+ # Example:
132
+ #
133
+ # combination([:a, :b, :c, :d])
134
+ # # => [[:a], [:b], [:c], [:d], [:a, :b], [:a, :c], [:a, :d], [:b, :c], [:b, :d], [:c, :d], [:a, :b, :c], [:a, :b, :d], [:a, :c, :d], [:b, :c, :d], [:a, :b, :c, :d]]
135
+ def self.combination(array)
136
+ guesses = []
137
+ 1.upto(array.length).each do |size|
138
+ guesses.concat(array.combination(size).to_a)
139
+ end
140
+ guesses
141
+ end
142
+
143
+ # Given that we know our syntax error exists somewhere in our frontier, we want to find
144
+ # the smallest possible set of blocks that contain all the syntax errors
145
+ def detect_invalid_blocks
146
+ self.class.combination(@frontier).detect do |block_array|
147
+ holds_all_syntax_errors?(block_array)
148
+ end || []
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,139 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DeadEnd
4
+ # Represents a single line of code of a given source file
5
+ #
6
+ # This object contains metadata about the line such as
7
+ # amount of indentation. An if it is empty or not.
8
+ #
9
+ # While a given search for syntax errors is being performed
10
+ # state about the search can be stored in individual lines such
11
+ # as :valid or :invalid.
12
+ #
13
+ # Visibility of lines can be toggled on and off.
14
+ #
15
+ # Example:
16
+ #
17
+ # line = CodeLine.new(line: "def foo\n", index: 0)
18
+ # line.line_number => 1
19
+ # line.empty? # => false
20
+ # line.visible? # => true
21
+ # line.mark_invisible
22
+ # line.visible? # => false
23
+ #
24
+ # A CodeBlock is made of multiple CodeLines
25
+ #
26
+ # Marking a line as invisible indicates that it should not be used
27
+ # for syntax checks. It's essentially the same as commenting it out
28
+ #
29
+ # Marking a line as invisible also lets the overall program know
30
+ # that it should not check that area for syntax errors.
31
+ class CodeLine
32
+ TRAILING_SLASH = ("\\" + $/).freeze
33
+
34
+ attr_reader :line, :index, :indent, :original_line
35
+
36
+ def initialize(line: , index:)
37
+ @original_line = line.freeze
38
+ @line = @original_line
39
+ @empty = line.strip.empty?
40
+ @index = index
41
+ @indent = SpaceCount.indent(line)
42
+ @status = nil # valid, invalid, unknown
43
+ @invalid = false
44
+
45
+ lex_detect!
46
+ end
47
+
48
+ private def lex_detect!
49
+ lex = LexAll.new(source: line)
50
+ kw_count = 0
51
+ end_count = 0
52
+ lex.each do |lex|
53
+ next unless lex.type == :on_kw
54
+
55
+ case lex.token
56
+ when 'def', 'case', 'for', 'begin', 'class', 'module', 'if', 'unless', 'while', 'until' , 'do'
57
+ kw_count += 1
58
+ when 'end'
59
+ end_count += 1
60
+ end
61
+ end
62
+
63
+ @is_kw = (kw_count - end_count) > 0
64
+ @is_end = (end_count - kw_count) > 0
65
+ @is_comment = lex.detect {|lex| lex.type != :on_sp}&.type == :on_comment
66
+ @is_trailing_slash = lex.last.token == TRAILING_SLASH
67
+ end
68
+
69
+ alias :original :original_line
70
+
71
+ def trailing_slash?
72
+ @is_trailing_slash
73
+ end
74
+
75
+ def <=>(b)
76
+ self.index <=> b.index
77
+ end
78
+
79
+ def is_comment?
80
+ @is_comment
81
+ end
82
+
83
+ def not_comment?
84
+ !is_comment?
85
+ end
86
+
87
+ def is_kw?
88
+ @is_kw
89
+ end
90
+
91
+ def is_end?
92
+ @is_end
93
+ end
94
+
95
+ def mark_invalid
96
+ @invalid = true
97
+ self
98
+ end
99
+
100
+ def marked_invalid?
101
+ @invalid
102
+ end
103
+
104
+ def mark_invisible
105
+ @line = ""
106
+ self
107
+ end
108
+
109
+ def mark_visible
110
+ @line = @original_line
111
+ self
112
+ end
113
+
114
+ def visible?
115
+ !line.empty?
116
+ end
117
+
118
+ def hidden?
119
+ !visible?
120
+ end
121
+
122
+ def line_number
123
+ index + 1
124
+ end
125
+ alias :number :line_number
126
+
127
+ def not_empty?
128
+ !empty?
129
+ end
130
+
131
+ def empty?
132
+ @empty
133
+ end
134
+
135
+ def to_s
136
+ self.line
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,157 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DeadEnd
4
+ # Searches code for a syntax error
5
+ #
6
+ # The bulk of the heavy lifting is done in:
7
+ #
8
+ # - CodeFrontier (Holds information for generating blocks and determining if we can stop searching)
9
+ # - ParseBlocksFromLine (Creates blocks into the frontier)
10
+ # - BlockExpand (Expands existing blocks to search more code
11
+ #
12
+ # ## Syntax error detection
13
+ #
14
+ # When the frontier holds the syntax error, we can stop searching
15
+ #
16
+ # search = CodeSearch.new(<<~EOM)
17
+ # def dog
18
+ # def lol
19
+ # end
20
+ # EOM
21
+ #
22
+ # search.call
23
+ #
24
+ # search.invalid_blocks.map(&:to_s) # =>
25
+ # # => ["def lol\n"]
26
+ #
27
+ class CodeSearch
28
+ private; attr_reader :frontier; public
29
+ public; attr_reader :invalid_blocks, :record_dir, :code_lines
30
+
31
+ def initialize(source, record_dir: ENV["DEAD_END_RECORD_DIR"] || ENV["DEBUG"] ? "tmp" : nil)
32
+ @source = source
33
+ if record_dir
34
+ @time = Time.now.strftime('%Y-%m-%d-%H-%M-%s-%N')
35
+ @record_dir = Pathname(record_dir).join(@time).tap {|p| p.mkpath }
36
+ @write_count = 0
37
+ end
38
+ code_lines = source.lines.map.with_index do |line, i|
39
+ CodeLine.new(line: line, index: i)
40
+ end
41
+
42
+ @code_lines = TrailingSlashJoin.new(code_lines: code_lines).call
43
+
44
+ @frontier = CodeFrontier.new(code_lines: @code_lines)
45
+ @invalid_blocks = []
46
+ @name_tick = Hash.new {|hash, k| hash[k] = 0 }
47
+ @tick = 0
48
+ @block_expand = BlockExpand.new(code_lines: code_lines)
49
+ @parse_blocks_from_indent_line = ParseBlocksFromIndentLine.new(code_lines: @code_lines)
50
+ end
51
+
52
+ # Used for debugging
53
+ def record(block:, name: "record")
54
+ return if !@record_dir
55
+ @name_tick[name] += 1
56
+ filename = "#{@write_count += 1}-#{name}-#{@name_tick[name]}.txt"
57
+ if ENV["DEBUG"]
58
+ puts "\n\n==== #{filename} ===="
59
+ puts "\n```#{block.starts_at}:#{block.ends_at}"
60
+ puts "#{block.to_s}"
61
+ puts "```"
62
+ puts " block indent: #{block.current_indent}"
63
+ end
64
+ @record_dir.join(filename).open(mode: "a") do |f|
65
+ display = DisplayInvalidBlocks.new(
66
+ blocks: block,
67
+ terminal: false,
68
+ code_lines: @code_lines,
69
+ )
70
+ f.write(display.indent display.code_with_lines)
71
+ end
72
+ end
73
+
74
+ def push(block, name: )
75
+ record(block: block, name: name)
76
+
77
+ if block.valid?
78
+ block.lines.each(&:mark_invisible)
79
+ frontier << block
80
+ else
81
+ frontier << block
82
+ end
83
+ end
84
+
85
+ # Removes the block without putting it back in the frontier
86
+ def sweep(block:, name: )
87
+ record(block: block, name: name)
88
+
89
+ block.lines.each(&:mark_invisible)
90
+ frontier.register_indent_block(block)
91
+ end
92
+
93
+ # Parses the most indented lines into blocks that are marked
94
+ # and added to the frontier
95
+ def add_invalid_blocks
96
+ max_indent = frontier.next_indent_line&.indent
97
+
98
+ while (line = frontier.next_indent_line) && (line.indent == max_indent)
99
+
100
+ @parse_blocks_from_indent_line.each_neighbor_block(frontier.next_indent_line) do |block|
101
+ record(block: block, name: "add")
102
+
103
+ block.mark_invisible if block.valid?
104
+ push(block, name: "add")
105
+ end
106
+ end
107
+ end
108
+
109
+ # Given an already existing block in the frontier, expand it to see
110
+ # if it contains our invalid syntax
111
+ def expand_invalid_block
112
+ block = frontier.pop
113
+ return unless block
114
+
115
+ record(block: block, name: "pop")
116
+
117
+ # block = block.expand_until_next_boundry
118
+ block = @block_expand.call(block)
119
+ push(block, name: "expand")
120
+ end
121
+
122
+ def sweep_heredocs
123
+ HeredocBlockParse.new(
124
+ source: @source,
125
+ code_lines: @code_lines
126
+ ).call.each do |block|
127
+ push(block, name: "heredoc")
128
+ end
129
+ end
130
+
131
+ def sweep_comments
132
+ lines = @code_lines.select(&:is_comment?)
133
+ return if lines.empty?
134
+ block = CodeBlock.new(lines: lines)
135
+ sweep(block: block, name: "comments")
136
+ end
137
+
138
+ # Main search loop
139
+ def call
140
+ sweep_heredocs
141
+ sweep_comments
142
+ until frontier.holds_all_syntax_errors?
143
+ @tick += 1
144
+
145
+ if frontier.expand?
146
+ expand_invalid_block
147
+ else
148
+ add_invalid_blocks
149
+ end
150
+ end
151
+
152
+ @invalid_blocks.concat(frontier.detect_invalid_blocks )
153
+ @invalid_blocks.sort_by! {|block| block.starts_at }
154
+ self
155
+ end
156
+ end
157
+ end