gitolemy 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,124 @@
1
+ require "active_support/core_ext/object"
2
+
3
+ require "amatch"
4
+ include Amatch
5
+
6
+ module Line
7
+ JARO_SIMILARITY_THRESHOLD = 0.88
8
+ MIN_LENGTH_SIMILARITY_THRESHOLD = 0.2
9
+ MAX_LENGTH_SIMILARITY_THRESHOLD = 5
10
+ COVERAGE_INVALID = nil
11
+
12
+ TOKEN_SPLIT_REGEX = /[\s,.&|^+=*\/-]/
13
+ MAX_MEANING_LENGTH = 400
14
+ NOT_WHITESPACE_REGEX = /[^\s]/
15
+ BEAUTYSPACE_REGEX = /\s/
16
+ MEANINGLESS_KEYWORDS = {
17
+ "" => true,
18
+ "//" => true,
19
+ "/*" => true,
20
+ "*/" => true,
21
+ "#" => true,
22
+ "else" => true,
23
+ "end" => true,
24
+ "rescue" => true,
25
+ "{" => true,
26
+ "}" => true,
27
+ "[" => true,
28
+ "]" => true,
29
+ "(" => true,
30
+ ")" => true,
31
+ "<" => true,
32
+ ">" => true,
33
+ "/>" => true
34
+ }
35
+
36
+ # TODO: return line_id only.
37
+ def Line.new_line(text, file_name, line_num, commit)
38
+ line = {
39
+ text: text,
40
+ coverage: COVERAGE_INVALID,
41
+ commit: commit.id,
42
+ issues: commit.issue_id.blank? ? [] : [commit.issue_id],
43
+ bugs: commit.bug_id.blank? ? [] : [commit.bug_id],
44
+ errors: [],
45
+ revisions: []
46
+ }
47
+ line
48
+ end
49
+
50
+ # TODO: Don't move/change lines twice.
51
+ def Line.change!(old_line, line)
52
+ past_revisions = old_line.delete(:revisions) || []
53
+ {
54
+ text: line[:text],
55
+ coverage: line[:coverage],
56
+ commit: line[:commit],
57
+ issues: line[:issues],
58
+ errors: line[:errors],
59
+ bugs: line[:bugs],
60
+ revisions: line[:revisions] + [old_line] + past_revisions
61
+ }
62
+ end
63
+
64
+ def Line.move!(old_line, line)
65
+ Line.change!(old_line, line)
66
+ end
67
+
68
+ def Line.merge_error!(line, error, depth)
69
+ return if line[:errors].any? { |line_error| line_error[:error_id] == error[:error_id] }
70
+
71
+ line[:errors].unshift({
72
+ error_id: error[:error_id],
73
+ depth: depth
74
+ })
75
+ end
76
+
77
+ def Line.relevant?(line)
78
+ MEANINGLESS_KEYWORDS[line[:text].strip()].nil?
79
+ end
80
+
81
+ def Line.relevant_change?(line)
82
+ return false if not relevant?(line)
83
+ if line.has_key?(:change)
84
+ a_line = {text: line.dig(:change, :change_text)}
85
+ return trailing?(line, a_line) || beauty?(line, a_line)
86
+ end
87
+ return true
88
+ end
89
+
90
+ def Line.meaningless?(text)
91
+ MEANINGLESS_KEYWORDS[text] == true ||
92
+ text.length > MAX_MEANING_LENGTH ||
93
+ text.split(TOKEN_SPLIT_REGEX).length <= 1
94
+ end
95
+
96
+ def Line.similar?(text_a, text_b)
97
+ length_similarity = text_a.length.to_f / text_b.length.to_f
98
+ length_similarity > MIN_LENGTH_SIMILARITY_THRESHOLD &&
99
+ length_similarity < MAX_LENGTH_SIMILARITY_THRESHOLD &&
100
+ text_a.jarowinkler_similar(text_b) > JARO_SIMILARITY_THRESHOLD
101
+ end
102
+
103
+ def Line.score(line)
104
+ score = 0
105
+ line[:errors].each { |error| score += 2.0 / (error[:depth] + 1) }
106
+ score += 3 * line[:bugs].count
107
+ score += 0.75 * line[:revisions].count
108
+ score += 3 if line[:coverage] == 0
109
+ score += 1.5 if line[:coverage].nil?
110
+ score
111
+ end
112
+
113
+ def Line.beauty?(a_line, b_line)
114
+ a_start = a_line[:text].index(NOT_WHITESPACE_REGEX)
115
+ b_start = b_line[:text].index(NOT_WHITESPACE_REGEX)
116
+
117
+ a_start == b_start &&
118
+ a_line[:text].gsub(BEAUTYSPACE_REGEX, "") == b_line[:text].gsub(BEAUTYSPACE_REGEX, "")
119
+ end
120
+
121
+ def Line.trailing?(a_line, b_line)
122
+ a_line[:text].rstrip == b_line[:text].rstrip
123
+ end
124
+ end
@@ -0,0 +1,90 @@
1
+ require_relative "line"
2
+
3
+ class LineTracker
4
+ def track_mutations!(file_diffs)
5
+ deletions = index_deletions(file_diffs)
6
+ file_deletions = deletions.delete(:files)
7
+
8
+ grouped_file_diffs(file_diffs).reduce(change_context()) do |acc, (file_name, file_diff)|
9
+ file_diff[:diffs].each do |diff|
10
+ file_deletes = (file_deletions[file_diff[:a_file_name]] || {})
11
+
12
+ diff.insertions.each_with_index do |insertion, i|
13
+ insertion = insertion.strip()
14
+ next if Line.meaningless?(insertion)
15
+ line_number = diff.insert_start + i - 1
16
+
17
+ if movement?(deletions, insertion)
18
+ index!(acc[:movements], file_name, line_number, deletions[insertion].shift())
19
+ else
20
+ changed_line = find_change(file_deletes, insertion)
21
+ index!(acc[:changes], file_name, line_number, changed_line) if changed_line
22
+ end
23
+ end
24
+ end
25
+ acc
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ def movement?(deletions, new_line)
32
+ (deletions[new_line] || []).count > 0
33
+ end
34
+
35
+ def find_change(deletions, new_line)
36
+ change = (deletions || {})
37
+ .keys
38
+ .detect { |deletion| Line::similar?(deletion, new_line) }
39
+ return change ? deletions[change].shift() : nil
40
+ end
41
+
42
+ def index!(acc, file_name, line_number, old_line)
43
+ acc[file_name] ||= {}
44
+ acc[file_name][line_number] = old_line
45
+ end
46
+
47
+ def change_context()
48
+ {
49
+ movements: {},
50
+ changes: {}
51
+ }
52
+ end
53
+
54
+ # TODO:
55
+ # 1: Machine learning based match to avoid false positives from other files.
56
+ # a: Within commit, within file, be most lenient.
57
+ # b: Within commit, be somewhat lenient.
58
+ # c: Within entire codebase, be pretty strict. Exclude even exact matches some times.
59
+ # 2: Once a line is matched from the graveyard, remove it -- it's alive again.
60
+ def index_deletions(file_diffs)
61
+ file_diffs
62
+ .reduce({files: {}, }) do |acc, (file_name, file_diff)|
63
+ deleted_file = file_diff.a_file_name
64
+
65
+ file_diff.diffs.each do |diff|
66
+ diff.deletions.each_with_index do |deletion, index|
67
+ deletion = deletion.strip()
68
+ next if Line.meaningless?(deletion)
69
+ line_number = diff.delete_start + index - 1
70
+ line = {from: deleted_file, line: line_number}
71
+
72
+ acc[:files][file_name] ||= {}
73
+ acc[:files][file_name][deletion] ||= []
74
+ acc[:files][file_name][deletion] << line
75
+ acc[deletion] ||= []
76
+ acc[deletion] << line
77
+ end
78
+ end
79
+ acc
80
+ end
81
+ end
82
+
83
+ def grouped_file_diffs(file_diffs)
84
+ file_diffs.reduce({}) do |acc, (file_name, file_diff)|
85
+ acc[file_diff.b_file_name] ||= {diffs: [], a_file_name: file_diff.a_file_name}
86
+ acc[file_diff.b_file_name][:diffs].concat(file_diff.diffs)
87
+ acc
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,24 @@
1
+ require "logger"
2
+ require "singleton"
3
+ require "active_support/core_ext/module/delegation"
4
+
5
+ require_relative "cache"
6
+
7
+ class Loggr
8
+ include Singleton
9
+
10
+ attr_reader :logger
11
+ delegate :info, :warn, :error, to: :logger
12
+
13
+ def initialize()
14
+ @logger = Logger.new(output)
15
+ end
16
+
17
+ private
18
+
19
+ def output()
20
+ return STDOUT if ENV["GITOLEMY_VERBOSE"] == "true"
21
+ Cache.ensure_directory(".gitolemy")
22
+ File.open(File.join(".gitolemy", "out.log"), "w")
23
+ end
24
+ end
@@ -0,0 +1,20 @@
1
+ class Notifier
2
+ def notify(url, commit, risk)
3
+ return if ENV["GITOLEMY_SYNC"] == "true" || commit.nil?
4
+ post(url, {
5
+ status: risk <= 30 ? "success" : "failure",
6
+ commit_id: commit.commit_id
7
+ })
8
+ end
9
+
10
+ private
11
+
12
+ def post(url, payload)
13
+ uri = URI(url)
14
+ request = Net::HTTP::Post.new(uri, {"Content-Type" => "application/json"})
15
+ request.body = payload.to_json
16
+ http = Net::HTTP.new(uri.hostname, uri.port)
17
+ http.use_ssl = url.index("https://") == 0
18
+ http.request(request)
19
+ end
20
+ end
@@ -0,0 +1,13 @@
1
+ require_relative "cache"
2
+
3
+ class ProjectCache
4
+ include Cache
5
+
6
+ def initialize(base_path)
7
+ @base_path = base_path
8
+ end
9
+
10
+ def cache_path(key)
11
+ File.join(@base_path, Cache.cache_path(key))
12
+ end
13
+ end
@@ -0,0 +1,53 @@
1
+ require "active_support/core_ext/object"
2
+
3
+ require_relative "commit_stats"
4
+
5
+ class RiskAnalyzer
6
+ # TODO: Add config to match test files
7
+ def analyze(file_manager, commit)
8
+ return if commit.nil?
9
+
10
+ files = files_for_diff(commit.file_diffs, file_manager)
11
+
12
+ CommitStats.link_mutations!(files, commit.changes, :change)
13
+ CommitStats.link_mutations!(files, commit.movements, :movement)
14
+ line_diffs = CommitStats.link_diffs!(files, commit.file_diffs)
15
+
16
+ score(line_diffs)
17
+ end
18
+
19
+ # TODO: Maybe include function score per line.
20
+ def score(line_diffs)
21
+ (line_diffs[:insertions] + line_diffs[:deletions])
22
+ .reduce(0) do |acc, line|
23
+ penalty = Line.relevant_change?(line) ? Line.score(line) : 0
24
+ penalty /= 3 if line.has_key?(:movement)
25
+ acc += penalty
26
+ acc
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ def files_for_diff(file_diffs, file_manager)
33
+ file_diffs.reduce({a: {}, b: {}}) do |acc, (b_path, file_diff)|
34
+ file = {
35
+ a_file: file_manager.vfs[file_diff.a_file_name] || load_file(file_diff),
36
+ b_file: file_manager.vfs[file_diff.b_file_name],
37
+ a_path: file_diff.a_file_name,
38
+ b_path: file_diff.b_file_name,
39
+ changes: [],
40
+ movements: []
41
+ }
42
+ acc[:a][file[:a_path]] = file
43
+ acc[:b][file[:b_path]] = file
44
+ acc
45
+ end
46
+ end
47
+
48
+ def load_file(file_diff)
49
+ Cache
50
+ .read_object(file_diff.a_file_id)
51
+ .deep_symbolize_keys
52
+ end
53
+ end
@@ -0,0 +1,61 @@
1
+ require "openssl"
2
+ require "json"
3
+
4
+
5
+ class SecureFileStore
6
+ def initialize(key)
7
+ @key = unhex(key)
8
+ end
9
+
10
+ def write_file(data, file_path)
11
+ iv, data = encrypt(@key, data)
12
+ File.open(file_path, "wb") { |file| file.write(data) }
13
+ end
14
+
15
+ def read_file(iv, file_path)
16
+ data = File.read(file_path)
17
+ decrypt(@key, unhex(iv), data)
18
+ end
19
+
20
+ def write_settings(settings, dir=".gitolemy")
21
+ iv, data = encrypt(@key, settings.to_json())
22
+ file_path = File.join(dir, "config-#{hex(iv)}")
23
+ File.open(file_path, "wb") { |file| file.write(data) }
24
+ end
25
+
26
+ def read_settings(dir=".gitolemy")
27
+ file_path = Dir[File.join(dir, "config-*")].first
28
+ iv = File.basename(file_path).gsub(/^config-/, "")
29
+ JSON.parse(read_file(iv, file_path))
30
+ end
31
+
32
+ private
33
+
34
+ def hex(iv)
35
+ iv.unpack("H*").first
36
+ end
37
+
38
+ def unhex(iv)
39
+ iv
40
+ .scan(/../)
41
+ .map { |x| x.hex }
42
+ .pack("c*")
43
+ end
44
+
45
+ def encrypt(private_key, data)
46
+ cipher = OpenSSL::Cipher::AES.new(256, :CBC)
47
+ cipher.encrypt
48
+ cipher.key = private_key
49
+ iv = cipher.random_iv
50
+ out = cipher.update(data) + cipher.final
51
+ [iv, out]
52
+ end
53
+
54
+ def decrypt(private_key, iv, data)
55
+ cipher = OpenSSL::Cipher::AES.new(256, :CBC)
56
+ cipher.decrypt
57
+ cipher.key = private_key
58
+ cipher.iv = iv
59
+ cipher.update(data) + cipher.final
60
+ end
61
+ end
@@ -0,0 +1,23 @@
1
+ class SourceTree
2
+
3
+ def merge_collapse(commits)
4
+ indexed_commits = commits.reduce({}, &method(:index_commits))
5
+
6
+ graph = []
7
+ head = commits.first
8
+ while head
9
+ graph << head
10
+ commit_id = head.children.first
11
+ head = indexed_commits[commit_id]
12
+ end
13
+
14
+ graph
15
+ end
16
+
17
+ private
18
+
19
+ def index_commits(acc, commit)
20
+ acc[commit.id] = commit
21
+ acc
22
+ end
23
+ end
@@ -0,0 +1,197 @@
1
+ require "date"
2
+ require "byebug"
3
+
4
+ require "active_support/core_ext/object"
5
+
6
+ require_relative "project_cache"
7
+ require_relative "integrations/git_client"
8
+ require_relative "line"
9
+
10
+ class StackTracer
11
+ def initialize(base_path)
12
+ @git_client = GitClient.new({"git_dir" => File.join(base_path, ".git")})
13
+ @cache = ProjectCache.new(base_path)
14
+ end
15
+
16
+ def trace(error_id, deployed_commit_id=nil, last_deployed_commit_id=nil)
17
+ error = @cache.read("errors")[error_id.to_s].deep_symbolize_keys
18
+ commits = @cache.read("commits")
19
+ deploys = @cache.read("deploys")
20
+
21
+ deployed_commits = deploys
22
+ .map { |deploy| commits[deploy["revision"]] }
23
+ .reject(&:nil?)
24
+ .sort_by { |commit| commit["date"] }
25
+
26
+ deployed_commit_id ||= after_deployed_commit(error[:first_time], deployed_commits)
27
+ last_deployed_commit_id ||= prior_deployed_commit(error[:first_time], deployed_commits)
28
+
29
+ git_files = git_files(deployed_commit_id.to_s)
30
+ stacktrace = stacktrace(error, git_files)
31
+ trace_files = trace_files(stacktrace, git_files)
32
+ trace_lines = trace_lines(stacktrace, trace_files, commits, last_deployed_commit_id.to_s)
33
+
34
+ detail(error, trace_lines)
35
+ end
36
+
37
+ private
38
+
39
+ def git_files(commit_id)
40
+ @git_client
41
+ .file_tree(commit_id)
42
+ .reduce({}) do |acc, blob|
43
+ blob = blob.split(" ")
44
+ acc[blob.last] = blob[2]
45
+ acc
46
+ end
47
+ end
48
+
49
+ def trace_files(stacktrace, git_files)
50
+ stacktrace
51
+ .map { |trace_file| trace_file[:file] }
52
+ .uniq
53
+ .reduce({}) do |acc, file|
54
+ acc[file] = @cache.read_object(git_files[file]).deep_symbolize_keys
55
+ acc
56
+ end
57
+ end
58
+
59
+ def trace_lines(stacktrace, trace_files, commits, last_deployed_commit_id)
60
+ last_deployed_commit = commits[last_deployed_commit_id]
61
+ cutoff_time = DateTime.parse(last_deployed_commit["date"])
62
+
63
+ stacktrace
64
+ .each_with_index
65
+ .map do |trace, depth|
66
+ file = trace_files[trace[:file]]
67
+ line_num = trace[:line] - 1
68
+ line = describe_line(file[:lines][line_num], commits, cutoff_time)
69
+ line[:depth] = depth
70
+
71
+ function = file[:functions].detect do |function|
72
+ line_num >= function[:start] && line_num <= function[:end]
73
+ end
74
+
75
+ function_lines = function_lines(file, function)
76
+ .each_with_index
77
+ .map do |line, index|
78
+ line = describe_line(line, commits, cutoff_time)
79
+ line[:revisions] = line[:revisions].map do |revision|
80
+ revision[:author] = commits[revision[:commit]]["author"]
81
+ revision
82
+ end
83
+ line
84
+ end
85
+
86
+ {
87
+ file: trace[:file],
88
+ line_num: trace[:line],
89
+ line: line,
90
+ title: "#{trace[:file]}:#{trace[:line]} - #{trace[:function]}",
91
+ function: function,
92
+ function_lines: function_lines
93
+ }
94
+ end
95
+ end
96
+
97
+ def detail(error, trace_lines)
98
+ functions = trace_lines.reduce({}) do |acc, trace|
99
+ function_id = trace[:function].nil? ?
100
+ trace[:file] :
101
+ "#{trace[:file]}:#{trace[:function][:name]}"
102
+
103
+ acc[function_id] ||= trace
104
+ depth = trace[:line][:depth]
105
+ trace[:line][:trace_title] = trace[:title]
106
+
107
+ if acc[function_id].has_key?(:lines)
108
+ acc[function_id][:lines] << trace[:line]
109
+ else
110
+ acc[function_id][:lines] = [trace[:line]]
111
+ acc[function_id].delete(:line)
112
+ end
113
+
114
+ acc[function_id][:function_lines].each_with_index do |line, index|
115
+ function_line_num = trace[:line_num] - trace[:function][:start]
116
+ line[:depth] = depth if index == function_line_num
117
+ end
118
+
119
+ if not acc.has_key?(function_id)
120
+ acc[function_id] = trace
121
+ end
122
+ acc
123
+ end
124
+
125
+ details = functions
126
+ .reduce(detail_context()) do |acc, (file_path, function)|
127
+ function[:function_lines].each do |line|
128
+ score_line(acc[:experts], line[:author]["email"], line)
129
+ if line[:after_cutoff]
130
+ score_line(acc[:suspects], line[:author]["email"], line)
131
+ score_line(acc[:suspect_commits], line[:commit], line)
132
+ end
133
+ line[:revisions].each do |revision|
134
+ score_line(acc[:experts], revision[:author]["email"], line)
135
+ end
136
+ end
137
+ acc
138
+ end
139
+
140
+ details.merge({
141
+ message: error[:message],
142
+ first_time: error[:first_time],
143
+ last_time: error[:last_time],
144
+ total_occurrences: error[:total_occurrences],
145
+ functions: functions,
146
+ })
147
+ end
148
+
149
+ def score_line(hash, key, line)
150
+ hash[key] ||= 0
151
+ hash[key] += 1
152
+ hash[key] += 2.0 / (line[:depth] + 1) if line.has_key?(:depth)
153
+ end
154
+
155
+ def detail_context()
156
+ {
157
+ experts: {},
158
+ suspects: {},
159
+ suspect_commits: {}
160
+ }
161
+ end
162
+
163
+ def describe_line(line, commits, cutoff_time)
164
+ line = line.dup
165
+ line[:updated_at] = DateTime.parse(commits[line[:commit]]["date"])
166
+ line[:after_cutoff] = line[:updated_at] > cutoff_time
167
+ line[:author] = commits[line[:commit]]["author"]
168
+ line[:score] = Line.score(line)
169
+ line
170
+ end
171
+
172
+ def stacktrace(error, git_files)
173
+ error[:stack_trace]
174
+ .reduce([]) do |acc, trace|
175
+ trace[:file] = git_files.keys.detect { |app_file| trace[:file].include?(app_file) }
176
+ acc << trace if not trace[:file].nil?
177
+ acc
178
+ end
179
+ end
180
+
181
+ def function_lines(file, function)
182
+ function.nil? ? [] : file[:lines][(function[:start] - 1)..function[:end]]
183
+ end
184
+
185
+ def prior_deployed_commit(timestamp, commits)
186
+ commits
187
+ .detect { |commit| commit["date"] < timestamp }["commit_id"]
188
+ end
189
+
190
+ def after_deployed_commit(timestamp, commits)
191
+ commit = commits
192
+ .reverse()
193
+ .detect { |commit| commit["date"] > timestamp }
194
+ commit ||= commits.last
195
+ commit["commit_id"]
196
+ end
197
+ end