dead_end 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DeadEnd
4
+ # Multiple lines form a singular CodeBlock
5
+ #
6
+ # Source code is made of multiple CodeBlocks.
7
+ #
8
+ # Example:
9
+ #
10
+ # code_block.to_s # =>
11
+ # # def foo
12
+ # # puts "foo"
13
+ # # end
14
+ #
15
+ # code_block.valid? # => true
16
+ # code_block.in_valid? # => false
17
+ #
18
+ #
19
+ class CodeBlock
20
+ attr_reader :lines
21
+
22
+ def initialize(lines: [])
23
+ @lines = Array(lines)
24
+ end
25
+
26
+ def visible_lines
27
+ @lines.select(&:visible?).select(&:not_empty?)
28
+ end
29
+
30
+ def mark_invisible
31
+ @lines.map(&:mark_invisible)
32
+ end
33
+
34
+ def is_end?
35
+ to_s.strip == "end"
36
+ end
37
+
38
+ def hidden?
39
+ @lines.all?(&:hidden?)
40
+ end
41
+
42
+ def starts_at
43
+ @starts_at ||= @lines.first&.line_number
44
+ end
45
+
46
+ def ends_at
47
+ @ends_at ||= @lines.last&.line_number
48
+ end
49
+
50
+ # This is used for frontier ordering, we are searching from
51
+ # the largest indentation to the smallest. This allows us to
52
+ # populate an array with multiple code blocks then call `sort!`
53
+ # on it without having to specify the sorting criteria
54
+ def <=>(other)
55
+ out = self.current_indent <=> other.current_indent
56
+ return out if out != 0
57
+
58
+ # Stable sort
59
+ self.starts_at <=> other.starts_at
60
+ end
61
+
62
+ def current_indent
63
+ @current_indent ||= lines.select(&:not_empty?).map(&:indent).min || 0
64
+ end
65
+
66
+ def invalid?
67
+ !valid?
68
+ end
69
+
70
+ def valid?
71
+ DeadEnd.valid?(self.to_s)
72
+ end
73
+
74
+ def to_s
75
+ @lines.join
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,151 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DeadEnd
4
+ # The main function of the frontier is to hold the edges of our search and to
5
+ # evaluate when we can stop searching.
6
+ #
7
+ # ## Knowing where we've been
8
+ #
9
+ # Once a code block is generated it is added onto the frontier where it will be
10
+ # sorted and then the frontier can be filtered. Large blocks that totally contain a
11
+ # smaller block will cause the smaller block to be evicted.
12
+ #
13
+ # CodeFrontier#<<
14
+ # CodeFrontier#pop
15
+ #
16
+ # ## Knowing where we can go
17
+ #
18
+ # Internally it keeps track of an "indent hash" which is exposed via `next_indent_line`
19
+ # when called this will return a line of code with the most indentation.
20
+ #
21
+ # This line of code can be used to build a CodeBlock via and then when that code block
22
+ # is added back to the frontier, then the lines in the code block are removed from the
23
+ # indent hash so we don't double-create the same block.
24
+ #
25
+ # CodeFrontier#next_indent_line
26
+ # CodeFrontier#register_indent_block
27
+ #
28
+ # ## Knowing when to stop
29
+ #
30
+ # The frontier holds the syntax error when removing all code blocks from the original
31
+ # source document allows it to be parsed as syntatically valid:
32
+ #
33
+ # CodeFrontier#holds_all_syntax_errors?
34
+ #
35
+ # ## Filtering false positives
36
+ #
37
+ # Once the search is completed, the frontier will have many blocks that do not contain
38
+ # the syntax error. To filter to the smallest subset that does call:
39
+ #
40
+ # CodeFrontier#detect_invalid_blocks
41
+ class CodeFrontier
42
+ def initialize(code_lines: )
43
+ @code_lines = code_lines
44
+ @frontier = []
45
+ @indent_hash = {}
46
+ code_lines.each do |line|
47
+ next if line.empty?
48
+
49
+ @indent_hash[line.indent] ||= []
50
+ @indent_hash[line.indent] << line
51
+ end
52
+ end
53
+
54
+ def count
55
+ @frontier.count
56
+ end
57
+
58
+ # Returns true if the document is valid with all lines
59
+ # removed. By default it checks all blocks in present in
60
+ # the frontier array, but can be used for arbitrary arrays
61
+ # of codeblocks as well
62
+ def holds_all_syntax_errors?(block_array = @frontier)
63
+ without_lines = block_array.map do |block|
64
+ block.lines
65
+ end
66
+
67
+ DeadEnd.valid_without?(
68
+ without_lines: without_lines,
69
+ code_lines: @code_lines
70
+ )
71
+ end
72
+
73
+ # Returns a code block with the largest indentation possible
74
+ def pop
75
+ return @frontier.pop
76
+ end
77
+
78
+ def indent_hash_indent
79
+ @indent_hash.keys.sort.last
80
+ end
81
+
82
+ def next_indent_line
83
+ indent = @indent_hash.keys.sort.last
84
+ @indent_hash[indent]&.first
85
+ end
86
+
87
+ def expand?
88
+ return false if @frontier.empty?
89
+ return true if @indent_hash.empty?
90
+
91
+ frontier_indent = @frontier.last.current_indent
92
+ hash_indent = @indent_hash.keys.sort.last
93
+
94
+ if ENV["DEBUG"]
95
+ puts "```"
96
+ puts @frontier.last.to_s
97
+ puts "```"
98
+ puts " @frontier indent: #{frontier_indent}"
99
+ puts " @hash indent: #{hash_indent}"
100
+ end
101
+
102
+ frontier_indent >= hash_indent
103
+ end
104
+
105
+ def register_indent_block(block)
106
+ block.lines.each do |line|
107
+ @indent_hash[line.indent]&.delete(line)
108
+ end
109
+ @indent_hash.select! {|k, v| !v.empty?}
110
+ self
111
+ end
112
+
113
+ # Add a block to the frontier
114
+ #
115
+ # This method ensures the frontier always remains sorted (in indentation order)
116
+ # and that each code block's lines are removed from the indentation hash so we
117
+ # don't re-evaluate the same line multiple times.
118
+ def <<(block)
119
+ register_indent_block(block)
120
+
121
+ # Make sure we don't double expand, if a code block fully engulfs another code block, keep the bigger one
122
+ @frontier.reject! {|b|
123
+ b.starts_at >= block.starts_at && b.ends_at <= block.ends_at
124
+ }
125
+ @frontier << block
126
+ @frontier.sort!
127
+
128
+ self
129
+ end
130
+
131
+ # Example:
132
+ #
133
+ # combination([:a, :b, :c, :d])
134
+ # # => [[:a], [:b], [:c], [:d], [:a, :b], [:a, :c], [:a, :d], [:b, :c], [:b, :d], [:c, :d], [:a, :b, :c], [:a, :b, :d], [:a, :c, :d], [:b, :c, :d], [:a, :b, :c, :d]]
135
+ def self.combination(array)
136
+ guesses = []
137
+ 1.upto(array.length).each do |size|
138
+ guesses.concat(array.combination(size).to_a)
139
+ end
140
+ guesses
141
+ end
142
+
143
+ # Given that we know our syntax error exists somewhere in our frontier, we want to find
144
+ # the smallest possible set of blocks that contain all the syntax errors
145
+ def detect_invalid_blocks
146
+ self.class.combination(@frontier).detect do |block_array|
147
+ holds_all_syntax_errors?(block_array)
148
+ end || []
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,139 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DeadEnd
4
+ # Represents a single line of code of a given source file
5
+ #
6
+ # This object contains metadata about the line such as
7
+ # amount of indentation. An if it is empty or not.
8
+ #
9
+ # While a given search for syntax errors is being performed
10
+ # state about the search can be stored in individual lines such
11
+ # as :valid or :invalid.
12
+ #
13
+ # Visibility of lines can be toggled on and off.
14
+ #
15
+ # Example:
16
+ #
17
+ # line = CodeLine.new(line: "def foo\n", index: 0)
18
+ # line.line_number => 1
19
+ # line.empty? # => false
20
+ # line.visible? # => true
21
+ # line.mark_invisible
22
+ # line.visible? # => false
23
+ #
24
+ # A CodeBlock is made of multiple CodeLines
25
+ #
26
+ # Marking a line as invisible indicates that it should not be used
27
+ # for syntax checks. It's essentially the same as commenting it out
28
+ #
29
+ # Marking a line as invisible also lets the overall program know
30
+ # that it should not check that area for syntax errors.
31
+ class CodeLine
32
+ TRAILING_SLASH = ("\\" + $/).freeze
33
+
34
+ attr_reader :line, :index, :indent, :original_line
35
+
36
+ def initialize(line: , index:)
37
+ @original_line = line.freeze
38
+ @line = @original_line
39
+ @empty = line.strip.empty?
40
+ @index = index
41
+ @indent = SpaceCount.indent(line)
42
+ @status = nil # valid, invalid, unknown
43
+ @invalid = false
44
+
45
+ lex_detect!
46
+ end
47
+
48
+ private def lex_detect!
49
+ lex = LexAll.new(source: line)
50
+ kw_count = 0
51
+ end_count = 0
52
+ lex.each do |lex|
53
+ next unless lex.type == :on_kw
54
+
55
+ case lex.token
56
+ when 'def', 'case', 'for', 'begin', 'class', 'module', 'if', 'unless', 'while', 'until' , 'do'
57
+ kw_count += 1
58
+ when 'end'
59
+ end_count += 1
60
+ end
61
+ end
62
+
63
+ @is_kw = (kw_count - end_count) > 0
64
+ @is_end = (end_count - kw_count) > 0
65
+ @is_comment = lex.detect {|lex| lex.type != :on_sp}&.type == :on_comment
66
+ @is_trailing_slash = lex.last.token == TRAILING_SLASH
67
+ end
68
+
69
+ alias :original :original_line
70
+
71
+ def trailing_slash?
72
+ @is_trailing_slash
73
+ end
74
+
75
+ def <=>(b)
76
+ self.index <=> b.index
77
+ end
78
+
79
+ def is_comment?
80
+ @is_comment
81
+ end
82
+
83
+ def not_comment?
84
+ !is_comment?
85
+ end
86
+
87
+ def is_kw?
88
+ @is_kw
89
+ end
90
+
91
+ def is_end?
92
+ @is_end
93
+ end
94
+
95
+ def mark_invalid
96
+ @invalid = true
97
+ self
98
+ end
99
+
100
+ def marked_invalid?
101
+ @invalid
102
+ end
103
+
104
+ def mark_invisible
105
+ @line = ""
106
+ self
107
+ end
108
+
109
+ def mark_visible
110
+ @line = @original_line
111
+ self
112
+ end
113
+
114
+ def visible?
115
+ !line.empty?
116
+ end
117
+
118
+ def hidden?
119
+ !visible?
120
+ end
121
+
122
+ def line_number
123
+ index + 1
124
+ end
125
+ alias :number :line_number
126
+
127
+ def not_empty?
128
+ !empty?
129
+ end
130
+
131
+ def empty?
132
+ @empty
133
+ end
134
+
135
+ def to_s
136
+ self.line
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,157 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DeadEnd
4
+ # Searches code for a syntax error
5
+ #
6
+ # The bulk of the heavy lifting is done in:
7
+ #
8
+ # - CodeFrontier (Holds information for generating blocks and determining if we can stop searching)
9
+ # - ParseBlocksFromLine (Creates blocks into the frontier)
10
+ # - BlockExpand (Expands existing blocks to search more code
11
+ #
12
+ # ## Syntax error detection
13
+ #
14
+ # When the frontier holds the syntax error, we can stop searching
15
+ #
16
+ # search = CodeSearch.new(<<~EOM)
17
+ # def dog
18
+ # def lol
19
+ # end
20
+ # EOM
21
+ #
22
+ # search.call
23
+ #
24
+ # search.invalid_blocks.map(&:to_s) # =>
25
+ # # => ["def lol\n"]
26
+ #
27
+ class CodeSearch
28
+ private; attr_reader :frontier; public
29
+ public; attr_reader :invalid_blocks, :record_dir, :code_lines
30
+
31
+ def initialize(source, record_dir: ENV["DEAD_END_RECORD_DIR"] || ENV["DEBUG"] ? "tmp" : nil)
32
+ @source = source
33
+ if record_dir
34
+ @time = Time.now.strftime('%Y-%m-%d-%H-%M-%s-%N')
35
+ @record_dir = Pathname(record_dir).join(@time).tap {|p| p.mkpath }
36
+ @write_count = 0
37
+ end
38
+ code_lines = source.lines.map.with_index do |line, i|
39
+ CodeLine.new(line: line, index: i)
40
+ end
41
+
42
+ @code_lines = TrailingSlashJoin.new(code_lines: code_lines).call
43
+
44
+ @frontier = CodeFrontier.new(code_lines: @code_lines)
45
+ @invalid_blocks = []
46
+ @name_tick = Hash.new {|hash, k| hash[k] = 0 }
47
+ @tick = 0
48
+ @block_expand = BlockExpand.new(code_lines: code_lines)
49
+ @parse_blocks_from_indent_line = ParseBlocksFromIndentLine.new(code_lines: @code_lines)
50
+ end
51
+
52
+ # Used for debugging
53
+ def record(block:, name: "record")
54
+ return if !@record_dir
55
+ @name_tick[name] += 1
56
+ filename = "#{@write_count += 1}-#{name}-#{@name_tick[name]}.txt"
57
+ if ENV["DEBUG"]
58
+ puts "\n\n==== #{filename} ===="
59
+ puts "\n```#{block.starts_at}:#{block.ends_at}"
60
+ puts "#{block.to_s}"
61
+ puts "```"
62
+ puts " block indent: #{block.current_indent}"
63
+ end
64
+ @record_dir.join(filename).open(mode: "a") do |f|
65
+ display = DisplayInvalidBlocks.new(
66
+ blocks: block,
67
+ terminal: false,
68
+ code_lines: @code_lines,
69
+ )
70
+ f.write(display.indent display.code_with_lines)
71
+ end
72
+ end
73
+
74
+ def push(block, name: )
75
+ record(block: block, name: name)
76
+
77
+ if block.valid?
78
+ block.lines.each(&:mark_invisible)
79
+ frontier << block
80
+ else
81
+ frontier << block
82
+ end
83
+ end
84
+
85
+ # Removes the block without putting it back in the frontier
86
+ def sweep(block:, name: )
87
+ record(block: block, name: name)
88
+
89
+ block.lines.each(&:mark_invisible)
90
+ frontier.register_indent_block(block)
91
+ end
92
+
93
+ # Parses the most indented lines into blocks that are marked
94
+ # and added to the frontier
95
+ def add_invalid_blocks
96
+ max_indent = frontier.next_indent_line&.indent
97
+
98
+ while (line = frontier.next_indent_line) && (line.indent == max_indent)
99
+
100
+ @parse_blocks_from_indent_line.each_neighbor_block(frontier.next_indent_line) do |block|
101
+ record(block: block, name: "add")
102
+
103
+ block.mark_invisible if block.valid?
104
+ push(block, name: "add")
105
+ end
106
+ end
107
+ end
108
+
109
+ # Given an already existing block in the frontier, expand it to see
110
+ # if it contains our invalid syntax
111
+ def expand_invalid_block
112
+ block = frontier.pop
113
+ return unless block
114
+
115
+ record(block: block, name: "pop")
116
+
117
+ # block = block.expand_until_next_boundry
118
+ block = @block_expand.call(block)
119
+ push(block, name: "expand")
120
+ end
121
+
122
+ def sweep_heredocs
123
+ HeredocBlockParse.new(
124
+ source: @source,
125
+ code_lines: @code_lines
126
+ ).call.each do |block|
127
+ push(block, name: "heredoc")
128
+ end
129
+ end
130
+
131
+ def sweep_comments
132
+ lines = @code_lines.select(&:is_comment?)
133
+ return if lines.empty?
134
+ block = CodeBlock.new(lines: lines)
135
+ sweep(block: block, name: "comments")
136
+ end
137
+
138
+ # Main search loop
139
+ def call
140
+ sweep_heredocs
141
+ sweep_comments
142
+ until frontier.holds_all_syntax_errors?
143
+ @tick += 1
144
+
145
+ if frontier.expand?
146
+ expand_invalid_block
147
+ else
148
+ add_invalid_blocks
149
+ end
150
+ end
151
+
152
+ @invalid_blocks.concat(frontier.detect_invalid_blocks )
153
+ @invalid_blocks.sort_by! {|block| block.starts_at }
154
+ self
155
+ end
156
+ end
157
+ end