checkmate5 4.0.67__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. checkmate/__init__.py +21 -0
  2. checkmate/__main__.py +25 -0
  3. checkmate/contrib/__init__.py +21 -0
  4. checkmate/contrib/plugins/__init__.py +0 -0
  5. checkmate/contrib/plugins/all/gptanalyzer/__init__.py +0 -0
  6. checkmate/contrib/plugins/all/gptanalyzer/analyzer.py +99 -0
  7. checkmate/contrib/plugins/all/gptanalyzer/issues_data.py +6 -0
  8. checkmate/contrib/plugins/all/gptanalyzer/setup.py +13 -0
  9. checkmate/contrib/plugins/cve/__init__.py +0 -0
  10. checkmate/contrib/plugins/cve/text4shell/__init__.py +0 -0
  11. checkmate/contrib/plugins/cve/text4shell/analyzer.py +64 -0
  12. checkmate/contrib/plugins/cve/text4shell/issues_data.py +8 -0
  13. checkmate/contrib/plugins/cve/text4shell/setup.py +13 -0
  14. checkmate/contrib/plugins/git/__init__.py +0 -0
  15. checkmate/contrib/plugins/git/commands/__init__.py +6 -0
  16. checkmate/contrib/plugins/git/commands/analyze.py +364 -0
  17. checkmate/contrib/plugins/git/commands/base.py +16 -0
  18. checkmate/contrib/plugins/git/commands/diff.py +199 -0
  19. checkmate/contrib/plugins/git/commands/init.py +59 -0
  20. checkmate/contrib/plugins/git/commands/update_stats.py +41 -0
  21. checkmate/contrib/plugins/git/hooks/__init__.py +0 -0
  22. checkmate/contrib/plugins/git/hooks/project.py +19 -0
  23. checkmate/contrib/plugins/git/lib/__init__.py +1 -0
  24. checkmate/contrib/plugins/git/lib/repository.py +557 -0
  25. checkmate/contrib/plugins/git/lib/repository_pygit2.py +531 -0
  26. checkmate/contrib/plugins/git/models.py +178 -0
  27. checkmate/contrib/plugins/git/setup.py +27 -0
  28. checkmate/contrib/plugins/golang/__init__.py +0 -0
  29. checkmate/contrib/plugins/golang/gostaticcheck/__init__.py +0 -0
  30. checkmate/contrib/plugins/golang/gostaticcheck/analyzer.py +94 -0
  31. checkmate/contrib/plugins/golang/gostaticcheck/issues_data.py +1246 -0
  32. checkmate/contrib/plugins/golang/gostaticcheck/setup.py +13 -0
  33. checkmate/contrib/plugins/iac/__init__.py +0 -0
  34. checkmate/contrib/plugins/iac/kubescape/__init__.py +0 -0
  35. checkmate/contrib/plugins/iac/kubescape/analyzer.py +115 -0
  36. checkmate/contrib/plugins/iac/kubescape/issues_data.py +636 -0
  37. checkmate/contrib/plugins/iac/kubescape/setup.py +14 -0
  38. checkmate/contrib/plugins/iac/tfsec/__init__.py +0 -0
  39. checkmate/contrib/plugins/iac/tfsec/analyzer.py +92 -0
  40. checkmate/contrib/plugins/iac/tfsec/issues_data.py +1917 -0
  41. checkmate/contrib/plugins/iac/tfsec/setup.py +13 -0
  42. checkmate/contrib/plugins/java/__init__.py +0 -0
  43. checkmate/contrib/plugins/java/semgrepjava/__init__.py +0 -0
  44. checkmate/contrib/plugins/java/semgrepjava/analyzer.py +96 -0
  45. checkmate/contrib/plugins/java/semgrepjava/issues_data.py +5 -0
  46. checkmate/contrib/plugins/java/semgrepjava/setup.py +13 -0
  47. checkmate/contrib/plugins/javascript/__init__.py +0 -0
  48. checkmate/contrib/plugins/javascript/semgrepeslint/__init__.py +0 -0
  49. checkmate/contrib/plugins/javascript/semgrepeslint/analyzer.py +95 -0
  50. checkmate/contrib/plugins/javascript/semgrepeslint/issues_data.py +6 -0
  51. checkmate/contrib/plugins/javascript/semgrepeslint/setup.py +13 -0
  52. checkmate/contrib/plugins/perl/__init__.py +0 -0
  53. checkmate/contrib/plugins/perl/graudit/__init__.py +0 -0
  54. checkmate/contrib/plugins/perl/graudit/analyzer.py +70 -0
  55. checkmate/contrib/plugins/perl/graudit/issues_data.py +8 -0
  56. checkmate/contrib/plugins/perl/graudit/setup.py +13 -0
  57. checkmate/contrib/plugins/python/__init__.py +0 -0
  58. checkmate/contrib/plugins/python/bandit/__init__.py +0 -0
  59. checkmate/contrib/plugins/python/bandit/analyzer.py +74 -0
  60. checkmate/contrib/plugins/python/bandit/issues_data.py +426 -0
  61. checkmate/contrib/plugins/python/bandit/setup.py +13 -0
  62. checkmate/contrib/plugins/ruby/__init__.py +0 -0
  63. checkmate/contrib/plugins/ruby/brakeman/__init__.py +0 -0
  64. checkmate/contrib/plugins/ruby/brakeman/analyzer.py +96 -0
  65. checkmate/contrib/plugins/ruby/brakeman/issues_data.py +518 -0
  66. checkmate/contrib/plugins/ruby/brakeman/setup.py +13 -0
  67. checkmate/helpers/__init__.py +0 -0
  68. checkmate/helpers/facts.py +26 -0
  69. checkmate/helpers/hashing.py +68 -0
  70. checkmate/helpers/issue.py +101 -0
  71. checkmate/helpers/settings.py +14 -0
  72. checkmate/lib/__init__.py +1 -0
  73. checkmate/lib/analysis/__init__.py +3 -0
  74. checkmate/lib/analysis/base.py +103 -0
  75. checkmate/lib/code/__init__.py +3 -0
  76. checkmate/lib/code/environment.py +809 -0
  77. checkmate/lib/models.py +515 -0
  78. checkmate/lib/stats/__init__.py +1 -0
  79. checkmate/lib/stats/helpers.py +19 -0
  80. checkmate/lib/stats/mapreduce.py +29 -0
  81. checkmate/management/__init__.py +1 -0
  82. checkmate/management/commands/__init__.py +18 -0
  83. checkmate/management/commands/alembic.py +32 -0
  84. checkmate/management/commands/analyze.py +42 -0
  85. checkmate/management/commands/analyzers.py +1 -0
  86. checkmate/management/commands/base.py +66 -0
  87. checkmate/management/commands/compare.py +0 -0
  88. checkmate/management/commands/export.py +0 -0
  89. checkmate/management/commands/info.py +0 -0
  90. checkmate/management/commands/init.py +103 -0
  91. checkmate/management/commands/issues.py +478 -0
  92. checkmate/management/commands/props/__init__.py +1 -0
  93. checkmate/management/commands/props/delete.py +29 -0
  94. checkmate/management/commands/props/get.py +30 -0
  95. checkmate/management/commands/props/set.py +29 -0
  96. checkmate/management/commands/reset.py +53 -0
  97. checkmate/management/commands/shell.py +19 -0
  98. checkmate/management/commands/snapshots.py +22 -0
  99. checkmate/management/commands/stats.py +21 -0
  100. checkmate/management/commands/summary.py +19 -0
  101. checkmate/management/commands/sync.py +63 -0
  102. checkmate/management/commands/trend.py +1 -0
  103. checkmate/management/commands/watch.py +27 -0
  104. checkmate/management/decorators.py +1 -0
  105. checkmate/management/helpers.py +140 -0
  106. checkmate/scripts/__init__.py +18 -0
  107. checkmate/scripts/manage.py +121 -0
  108. checkmate/settings/__init__.py +2 -0
  109. checkmate/settings/base.py +127 -0
  110. checkmate/settings/defaults.py +133 -0
  111. checkmate5-4.0.67.dist-info/LICENSE.txt +4095 -0
  112. checkmate5-4.0.67.dist-info/METADATA +15 -0
  113. checkmate5-4.0.67.dist-info/RECORD +116 -0
  114. checkmate5-4.0.67.dist-info/WHEEL +5 -0
  115. checkmate5-4.0.67.dist-info/entry_points.txt +2 -0
  116. checkmate5-4.0.67.dist-info/top_level.txt +1 -0
@@ -0,0 +1,809 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+
4
+ import sys
5
+ import re
6
+ import time
7
+ import traceback
8
+ import logging
9
+ import copy
10
+ import hashlib
11
+
12
+ from checkmate.helpers.issue import group_issues_by_fingerprint
13
+ from checkmate.management.helpers import (filter_filenames_by_analyzers,
14
+ filter_filenames_by_checkignore)
15
+ from checkmate.helpers.hashing import Hasher
16
+ from checkmate.lib.analysis.base import BaseAnalyzer
17
+ from checkmate.lib.models import (Issue,
18
+ IssueOccurrence,
19
+ Diff,
20
+ Snapshot,
21
+ FileRevision,
22
+ DiffFileRevision,
23
+ DiffIssueOccurrence)
24
+
25
+ from collections import defaultdict
26
+ from functools import reduce
27
+
28
+ import os
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ class AnalysisTimeAnalyzer(BaseAnalyzer):
34
+
35
+ def summarize(self, items):
36
+
37
+ stats = defaultdict(lambda: 0.0)
38
+
39
+ for item in items:
40
+ for analyzer, duration in list(item.items()):
41
+ stats[analyzer] += duration
42
+
43
+ return dict(stats)
44
+
45
+
46
+ def apply_filter(filename, patterns):
47
+ return reduce(lambda x, y: x or y, [True if re.search(pattern, filename)
48
+ else False for pattern in patterns], False)
49
+
50
+
51
+ def diff_objects(objects_a, objects_b, key, comparator=None, with_unchanged=False):
52
+ """
53
+ Returns a "diff" between two lists of objects.
54
+
55
+ :param key: The key that identifies objects with identical location in each set,
56
+ such as files with the same path or code objects with the same URL.
57
+ :param comparator: Comparison functions that decides if two objects are identical.
58
+ """
59
+
60
+ objects_by_key = {'a': defaultdict(list),
61
+ 'b': defaultdict(list)}
62
+
63
+ for name, objects in ('a', objects_a), ('b', objects_b):
64
+ d = objects_by_key[name]
65
+ for obj in objects:
66
+ d[key(obj)].append(obj)
67
+
68
+ added_objects = [obj for key, objs in list(objects_by_key['b'].items())
69
+ if key not in objects_by_key['a'] for obj in objs]
70
+
71
+ deleted_objects = [obj for key, objs in list(objects_by_key['a'].items())
72
+ if key not in objects_by_key['b'] for obj in objs]
73
+
74
+ joint_keys = [key for key in objects_by_key['a']
75
+ if key in objects_by_key['b']]
76
+
77
+ modified_objects = []
78
+
79
+ # we go through the keys that exist in both object sets
80
+ for key in joint_keys:
81
+ objects_a = objects_by_key['a'][key]
82
+ objects_b = objects_by_key['b'][key]
83
+
84
+ if len(objects_a) > 1 or len(objects_b) > 1:
85
+
86
+ # this is an ambiguous situation: we have more than one object for the same
87
+ # key, so we have to decide which ones have been added or not
88
+ # we try to remove identical objects from the set
89
+
90
+ objects_a_copy = objects_a[:]
91
+ objects_b_copy = objects_b[:]
92
+
93
+ # for the next step, we need a comparator
94
+ if comparator:
95
+ # we iterate through the list and try to find different objects...
96
+ for obj_a in objects_a:
97
+ for obj_b in objects_b_copy:
98
+ if comparator(obj_a, obj_b) == 0:
99
+ # these objects are identical, we remove them from both sets...
100
+ objects_a_copy.remove(obj_a)
101
+ objects_b_copy.remove(obj_b)
102
+ break
103
+
104
+ # here we cannot distinguish objects...
105
+ if len(objects_b_copy) > len(objects_a_copy):
106
+ # we arbitrarily mark the last objects in objects_b as added
107
+ added_objects.extend(objects_b_copy[len(objects_a_copy):])
108
+ elif len(objects_a_copy) > len(objects_b_copy):
109
+ # we arbitrarily mark the last objects in objects_a as deleted
110
+ deleted_objects.extend(objects_a_copy[len(objects_b_copy):])
111
+ else:
112
+ if comparator and comparator(objects_a[0], objects_b[0]) != 0:
113
+ # these objects are different
114
+ modified_objects.append(objects_a[0])
115
+
116
+ result = {
117
+ 'added': added_objects,
118
+ 'deleted': deleted_objects,
119
+ 'modified': modified_objects,
120
+ }
121
+
122
+ if with_unchanged:
123
+ unchanged_objects = [objects_b_by_key[key]
124
+ for key in joint_keys
125
+ if not objects_b_by_key[key] in modified_objects]
126
+ result['unchanged'] = unchanged_objects
127
+
128
+ return result
129
+
130
+
131
+ def file_revision_key(file_revision):
132
+ return file_revision.path
133
+
134
+
135
+ def file_revision_comparator(file_revision_a, file_revision_b):
136
+ return 0 if file_revision_a.hash == file_revision_b.hash else -1
137
+
138
+
139
+ def issue_occurrence_key(issue_occurrence):
140
+ try:
141
+ return issue_occurrence.file_revision.path+":"+issue_occurrence.issue.analyzer +\
142
+ ":"+issue_occurrence.issue.code+":"+issue_occurrence.issue.fingerprint
143
+ except AttributeError:
144
+ return issue_occurrence.file_revision.path+":"+issue_occurrence.issue.analyzer+":"+issue_occurrence.issue.code
145
+
146
+
147
+ def issue_occurrence_comparator(issue_occurrence_a, issue_occurrence_b):
148
+ if issue_occurrence_key(issue_occurrence_a) != issue_occurrence_key(issue_occurrence_b):
149
+ return -1
150
+ if issue_occurrence_a.from_row != issue_occurrence_b.from_row or\
151
+ issue_occurrence_a.to_row != issue_occurrence_b.to_row or\
152
+ issue_occurrence_a.from_column != issue_occurrence_b.from_column or\
153
+ issue_occurrence_a.to_column != issue_occurrence_b.to_column:
154
+ return -1
155
+
156
+ return 0
157
+
158
+
159
+ class CodeEnvironment(object):
160
+
161
+ """
162
+ Represents the full code environment of the project, including all code files.
163
+
164
+ Responsibilities of the code environment:
165
+
166
+ -Manage a list of file revisions
167
+ -Manage a list of analyzers and aggregators
168
+ -Manage project parameters that passed on to the analyzers
169
+ """
170
+
171
+ def __init__(self,
172
+ project,
173
+ global_settings,
174
+ project_settings,
175
+ raise_on_analysis_error=False,
176
+ env=None,
177
+ file_revisions=None,
178
+ ):
179
+ self.project = project
180
+ # global settings dictionary
181
+ self.global_settings = global_settings
182
+ self.project_settings = project_settings
183
+ self.raise_on_analysis_error = raise_on_analysis_error
184
+ self._env = env if env is not None else {}
185
+ self._file_revisions = file_revisions
186
+
187
+ self._active_analyzers = None
188
+ self._active_aggregators = None
189
+ self._analyzer_cache = {}
190
+
191
+ def create_analysis_error(self, exception_str, location=None):
192
+ issue_doc = {
193
+ 'code': 'AnalysisError',
194
+ 'data': {
195
+ 'exception': exception_str
196
+ },
197
+ 'location': location,
198
+ 'fingerprint': hashlib.sha256(exception_str.encode()).hexdigest(),
199
+ }
200
+ return issue_doc
201
+
202
+ @property
203
+ def file_revisions(self):
204
+ return self._file_revisions
205
+
206
+ @file_revisions.setter
207
+ def file_revisions(self, file_revisions):
208
+ self._file_revisions = file_revisions
209
+ # we reset the analyzers and aggregators, as they depend
210
+ # on the file revision information...
211
+ self._active_analyzers = None
212
+ self._active_aggregators = None
213
+ self._analyzer_cache = {}
214
+
215
+ @property
216
+ def env(self):
217
+ return self._env
218
+
219
+ @property
220
+ def analyzers(self):
221
+ if self._active_analyzers is None:
222
+ self._active_analyzers = self.get_active_analyzers()
223
+ return self._active_analyzers
224
+
225
+ @property
226
+ def aggregators(self):
227
+ if self._active_aggregators is None:
228
+ self._active_aggregators = self.get_active_aggregators()
229
+ return self._active_aggregators
230
+
231
+ def get_language(self, file_revision):
232
+ for language, language_pattern in list(self.global_settings.language_patterns.items()):
233
+ if 'patterns' in language_pattern and \
234
+ apply_filter(file_revision['path'], language_pattern['patterns']):
235
+ return language
236
+ return None
237
+
238
+ def filter_file_revisions(self, file_revisions):
239
+
240
+ def analyzer_filter(filenames): return filter_filenames_by_analyzers(filenames,
241
+ list(
242
+ self.global_settings.analyzers.values()),
243
+ self.global_settings.language_patterns)
244
+
245
+ filters = [analyzer_filter]
246
+
247
+ if 'ignore' in self.project_settings:
248
+ checkignore = self.project_settings['ignore']
249
+ filters.append(lambda filenames: filter_filenames_by_checkignore(
250
+ filenames, checkignore))
251
+
252
+ file_revisions_by_path = {fr.path: fr for fr in file_revisions}
253
+ filtered_paths = list(file_revisions_by_path.keys())
254
+
255
+ for path_filter in filters:
256
+ filtered_paths = path_filter(filtered_paths)
257
+
258
+ return [file_revisions_by_path[path] for path in filtered_paths]
259
+
260
+ def _get_active_objects(self, objs, disabled_by_default=False, obj_type='analyzers'):
261
+ active_objs = {}
262
+ project_settings = self.project_settings
263
+ project_obj_settings = project_settings.get(obj_type, {})
264
+ for name, params in list(objs.items()):
265
+ if 'enable' in project_obj_settings and not name in project_obj_settings['enable']:
266
+ continue
267
+ if 'disable' in project_obj_settings and name in project_obj_settings['disable']:
268
+ continue
269
+ if not name in project_obj_settings and disabled_by_default:
270
+ continue
271
+ obj_settings = params.copy()
272
+ if not 'settings' in obj_settings:
273
+ obj_settings['settings'] = {}
274
+ project_obj_settings = project_obj_settings.get(name, {})
275
+ if 'settings' in project_obj_settings:
276
+ obj_settings['settings'].update(
277
+ project_obj_settings['settings'])
278
+ active_objs[name] = obj_settings
279
+ return active_objs
280
+
281
+ def get_active_aggregators(self, disabled_by_default=False):
282
+ return self._get_active_objects(self.global_settings.aggregators, disabled_by_default=disabled_by_default, obj_type='aggregators')
283
+
284
+ def get_active_analyzers(self, disabled_by_default=False):
285
+ return self._get_active_objects(self.global_settings.analyzers, disabled_by_default=disabled_by_default, obj_type='analyzers')
286
+
287
+ def init_analyzer(self, name, parameters):
288
+ class_str = parameters['class']
289
+
290
+ if class_str in self._analyzer_cache:
291
+ return self._analyzer_cache[class_str]
292
+
293
+ if isinstance(class_str, six.string_types):
294
+ (module_name, separator, class_name) = class_str.rpartition(".")
295
+ module = __import__(module_name, globals(),
296
+ locals(), [str(class_name)], -1)
297
+ analyzer_class = getattr(module, class_name)
298
+ else:
299
+ analyzer_class = class_str
300
+
301
+ try:
302
+ analyzer = analyzer_class(self,
303
+ settings=parameters.get('settings'),
304
+ ignore=parameters.get('ignore')
305
+ )
306
+ except:
307
+ logger.error("Cannot initialize analyzer {}".format(name))
308
+ logger.error(traceback.format_exc())
309
+ analyzer = None
310
+ self._analyzer_cache[class_str] = analyzer
311
+ return analyzer
312
+
313
+ def diff_snapshots(self, snapshot_a, snapshot_b, save=True, diff=None):
314
+ """
315
+ Returns a list of
316
+ """
317
+
318
+ file_revisions_a = snapshot_a.file_revisions
319
+ file_revisions_b = snapshot_b.file_revisions
320
+
321
+ file_revisions_diff = diff_objects(file_revisions_a,
322
+ file_revisions_b,
323
+ file_revision_key,
324
+ file_revision_comparator)
325
+
326
+ # We just generate code objects and issues
327
+ # for the modified file revisions, to save time when diffing.
328
+
329
+ logger.debug("Generating list of modified file revisions...")
330
+ modified_file_revisions_by_path = {}
331
+ for fr_type in ('modified', 'added', 'deleted'):
332
+ for fr in file_revisions_diff[fr_type]:
333
+ if not fr.path in modified_file_revisions_by_path:
334
+ modified_file_revisions_by_path[fr.path] = fr
335
+
336
+ logger.debug("Generating list of modified issues...")
337
+
338
+ modified_file_revisions_a = [fr for fr in file_revisions_a
339
+ if fr.path in modified_file_revisions_by_path]
340
+ modified_file_revisions_b = [fr for fr in file_revisions_b
341
+ if fr.path in modified_file_revisions_by_path]
342
+
343
+ if modified_file_revisions_a:
344
+ # to do: check the file revisions chunk-wise to avoid DB query errors
345
+ issue_occurrences_a = self.project.backend.filter(IssueOccurrence,
346
+ {
347
+ 'file_revision': {'$in': modified_file_revisions_a}
348
+ },
349
+ include=('file_revision', 'issue'))
350
+ else:
351
+ issue_occurrences_a = []
352
+
353
+ if modified_file_revisions_b:
354
+ # to do: check the file revisions chunk-wise to avoid DB query errors
355
+ issue_occurrences_b = self.project.backend.filter(IssueOccurrence,
356
+ {
357
+ 'file_revision': {'$in': modified_file_revisions_b}
358
+ },
359
+ include=('file_revision', 'issue'))
360
+ else:
361
+ issue_occurrences_b = []
362
+
363
+ logger.debug("Diffing issues (%d in A, %d in B)" % (len(issue_occurrences_a),
364
+ len(issue_occurrences_b)))
365
+
366
+ issue_occurrences_diff = diff_objects(issue_occurrences_a,
367
+ issue_occurrences_b,
368
+ issue_occurrence_key,
369
+ issue_occurrence_comparator)
370
+
371
+ logger.debug("Diffing summary...")
372
+ summary_diff = self.diff_summaries(snapshot_a, snapshot_b)
373
+
374
+ if diff is None:
375
+ diff = Diff({'summary': summary_diff,
376
+ 'snapshot_a': snapshot_a,
377
+ 'project': self.project,
378
+ 'configuration': self.project.configuration,
379
+ 'snapshot_b': snapshot_b})
380
+ # we generate the hash value for this diff
381
+ hasher = Hasher()
382
+ hasher.add(diff.snapshot_a.hash)
383
+ hasher.add(diff.snapshot_b.hash)
384
+ diff.hash = hasher.digest.hexdigest()
385
+ elif save:
386
+ with self.project.backend.transaction():
387
+ self.project.backend.filter(
388
+ DiffFileRevision, {'diff': diff}).delete()
389
+ self.project.backend.filter(
390
+ DiffIssueOccurrence, {'diff': diff}).delete()
391
+ if save:
392
+ with self.project.backend.transaction():
393
+ self.project.backend.save(diff)
394
+
395
+ diff_file_revisions = []
396
+
397
+ with self.project.backend.transaction():
398
+ for key, file_revisions in list(file_revisions_diff.items()):
399
+ for file_revision in file_revisions:
400
+ hasher = Hasher()
401
+ hasher.add(file_revision.hash)
402
+ hasher.add(diff.hash)
403
+ hasher.add(key)
404
+ diff_file_revision = DiffFileRevision({
405
+ 'diff': diff,
406
+ 'file_revision': file_revision,
407
+ 'hash': hasher.digest.hexdigest(),
408
+ 'key': key})
409
+ if save:
410
+ self.project.backend.save(diff_file_revision)
411
+ diff_file_revisions.append(diff_file_revision)
412
+
413
+ diff_issue_occurrences = []
414
+ mapping = {'deleted': 'fixed', 'added': 'added'}
415
+ with self.project.backend.transaction():
416
+ for key, issue_occurrences in list(issue_occurrences_diff.items()):
417
+ if not key in mapping:
418
+ continue
419
+ for issue_occurrence in issue_occurrences:
420
+ hasher = Hasher()
421
+ hasher.add(issue_occurrence.hash)
422
+ hasher.add(diff.hash)
423
+ hasher.add(mapping[key])
424
+ diff_issue_occurrence = DiffIssueOccurrence({
425
+ 'diff': diff,
426
+ 'hash': hasher.digest.hexdigest(),
427
+ 'issue_occurrence': issue_occurrence,
428
+ 'key': mapping[key]
429
+ })
430
+ if save:
431
+ self.project.backend.save(diff_issue_occurrence)
432
+ diff_issue_occurrences.append(diff_issue_occurrence)
433
+
434
+ return diff, diff_file_revisions, diff_issue_occurrences
435
+
436
+ def diff_summaries(self, snapshot_a, snapshot_b):
437
+
438
+ summary = {}
439
+
440
+ if not hasattr(snapshot_a, 'summary') or not hasattr(snapshot_b, 'summary'):
441
+ return summary
442
+
443
+ languages = set(list(snapshot_a.summary.keys()) +
444
+ list(snapshot_b.summary.keys()))
445
+
446
+ for language in languages:
447
+
448
+ summary[language] = {}
449
+
450
+ if not language in snapshot_a.summary or not language in snapshot_b.summary:
451
+ continue
452
+
453
+ language_summary_a = snapshot_a.summary[language]
454
+ language_summary_b = snapshot_b.summary[language]
455
+
456
+ for analyzer_name, analyzer_params in list(self.analyzers.items()):
457
+
458
+ if not analyzer_name in language_summary_a \
459
+ or not analyzer_name in language_summary_b:
460
+ continue
461
+
462
+ summary[language][analyzer_name] = {}
463
+
464
+ analyzer = self.init_analyzer(analyzer_name, analyzer_params)
465
+ if analyzer is None:
466
+ continue
467
+
468
+ for key in language_summary_a[analyzer_name]:
469
+ if not key in language_summary_b[analyzer_name]:
470
+ continue
471
+ result = analyzer.diff_summary(language_summary_a[analyzer_name][key],
472
+ language_summary_b[analyzer_name][key]
473
+ )
474
+ if result:
475
+ summary[language][analyzer_name][key] = result
476
+
477
+ return summary
478
+
479
+ def summarize(self,
480
+ file_revisions,
481
+ significance_limit=0.01,
482
+ include_analysis_time=True):
483
+
484
+ if not file_revisions:
485
+ return {}
486
+
487
+ results = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))
488
+ file_revisions_by_key = defaultdict(lambda: {})
489
+
490
+ for aggregator in list(self.aggregators.values()):
491
+ for file_revision in file_revisions:
492
+ keys = aggregator['mapper'](file_revision)
493
+ for key in keys:
494
+ if not file_revision['path'] in file_revisions_by_key[key]:
495
+ file_revisions_by_key[key][file_revision['path']
496
+ ] = file_revision
497
+
498
+ for language in set([analyzer['language'] for analyzer in list(self.analyzers.values())]):
499
+ for analyzer_name, analyzer_params in list({name: analyzer
500
+ for name, analyzer in list(self.analyzers.items())
501
+ if analyzer['language'] == language}.items()):
502
+
503
+ analyzer = self.init_analyzer(analyzer_name, analyzer_params)
504
+ if analyzer is None:
505
+ continue
506
+
507
+ for key in file_revisions_by_key:
508
+ try:
509
+ if hasattr(analyzer, 'summarize_all'):
510
+ # If the analyzer has a `summarize_all` function we call it with the
511
+ # results from ALL analyzers and its own name.
512
+ results[language][analyzer_name][key] = analyzer\
513
+ .summarize_all([f['results']
514
+ for f in list(file_revisions_by_key[key].values())
515
+ if 'results' in f and f['language'] == language], analyzer_name)
516
+ else:
517
+ results[language][analyzer_name][key] = analyzer.summarize([
518
+ f['results'][analyzer_name]
519
+ for f in list(file_revisions_by_key[key].values())
520
+ if 'results' in f and f['language'] == language
521
+ and analyzer_name in f['results']])
522
+ except Exception as e:
523
+ traceback.print_exc()
524
+ logger.error("Could not summarize results for analyzers %s and key %s" %
525
+ (analyzer_name, key))
526
+ raise
527
+ continue
528
+
529
+ results[language] = dict(results[language])
530
+
531
+ results = dict(results)
532
+ return results
533
+
534
+ def analyze_file_revisions(self, file_revisions):
535
+ filtered_file_revisions = self.filter_file_revisions(file_revisions)
536
+
537
+ for file_revision in tqdm(filtered_file_revisions):
538
+ logger.info("Analyzing file revision "+file_revision['path'])
539
+ file_revision.language = self.get_language(file_revision)
540
+ one = self.analyze_file_revision(file_revision,
541
+ {analyzer_name: analyzer_params
542
+ for analyzer_name, analyzer_params in list(self.analyzers.items())
543
+ if analyzer_params['language'] == file_revision.language})
544
+
545
+ data = list(self.analyzers.items()) # <-- Line 556
546
+ for analyzer_name, analyzer_params in data:
547
+ if analyzer_name == "trojansource":
548
+ two = self.analyze_file_revision(file_revision, {"trojansource": analyzer_params})
549
+ if analyzer_name == "trufflehog3":
550
+ three = self.analyze_file_revision(file_revision, {"trufflehog3": analyzer_params})
551
+ if analyzer_name == "yara":
552
+ four = self.analyze_file_revision(file_revision, {"yara": analyzer_params})
553
+ if analyzer_name == "gptanalyzer":
554
+ five = self.analyze_file_revision(file_revision, {"gptanalyzer": analyzer_params})
555
+ if analyzer_name == "privategptanalyzer":
556
+ six = self.analyze_file_revision(file_revision, {"privategptanalyzer": analyzer_params})
557
+
558
+ file_revision.results = {**one, **two, **three, **four}
559
+
560
+ return filtered_file_revisions
561
+
562
+ def analyze_file_revision(self, file_revision, analyzers):
563
+
564
+ analysis_time = {}
565
+ results = {}
566
+
567
+ for analyzer_name, analyzer_params in list(analyzers.items()):
568
+ try:
569
+
570
+ analyzer = self.init_analyzer(analyzer_name, analyzer_params)
571
+ if analyzer is None:
572
+ continue
573
+
574
+ start = time.time()
575
+ analyzer_results = analyzer.analyze(file_revision)
576
+ stop = time.time()
577
+
578
+ analysis_time[analyzer_name] = stop-start
579
+
580
+ if analyzer_results:
581
+ results[analyzer_name] = analyzer_results
582
+ else:
583
+ results[analyzer_name] = {}
584
+
585
+ except Exception as e:
586
+ if self.raise_on_analysis_error:
587
+ raise
588
+ issue = self.create_analysis_error(
589
+ 'An exception occurred during the analysis of this file.')
590
+ logger.error(traceback.format_exc())
591
+ results[analyzer_name] = {'issues': [issue]}
592
+
593
+ results['analysis_time'] = dict(analysis_time)
594
+
595
+ return results
596
+
597
+ def analyze(self, file_revisions, save_if_empty=False, snapshot=None):
598
+ """
599
+ Handling dependencies:
600
+
601
+ * First, genreate a list of file revisions for this snapshot
602
+ * Then, check which ones of of them already exist
603
+ * For the existing ones, check their dependencies
604
+ * If any of the dependencies are outdated, add the dependent file revision to the analyze list
605
+
606
+ How to handle hashes? Dependencies should be included in the hash sum.
607
+
608
+ * Just load the files based on their SHA values
609
+ * Check if dependencies match with the current set based on SHA values
610
+ * If not, re-analyze the file revision
611
+ * After analysis, calculate the hash value based on path, SHA and dependencies
612
+ """
613
+
614
+ logger.debug("Analyzing code environment...")
615
+
616
+ if snapshot is None:
617
+ snapshot = Snapshot()
618
+ snapshot.configuration = self.project.configuration
619
+
620
+ file_revisions_by_pk = dict([(fr.hash, fr) for fr in file_revisions])
621
+
622
+ filtered_file_revisions = self.filter_file_revisions(file_revisions)
623
+ filtered_file_revisions_by_pk = dict(
624
+ [(fr.hash, fr) for fr in filtered_file_revisions])
625
+
626
+ excluded_file_revisions = [file_revisions_by_pk[pk]
627
+ for pk in list(file_revisions_by_pk.keys())
628
+ if not pk in filtered_file_revisions_by_pk
629
+ ]
630
+
631
+ logger.info("Excluding %d file revisions" %
632
+ len(excluded_file_revisions))
633
+
634
+ file_revisions = filtered_file_revisions
635
+ file_revisions_by_pk = filtered_file_revisions_by_pk
636
+
637
+ max_file_revisions = 10000
638
+
639
+ if len(file_revisions) > max_file_revisions:
640
+
641
+ logger.warning("Too many file revisions (%d) in snapshot, truncating at %d" %
642
+ (len(file_revisions), max_file_revisions))
643
+ file_revisions_by_pk = dict(sorted(list(file_revisions_by_pk.items()),
644
+ key=lambda x: x[0])[:max_file_revisions])
645
+ file_revisions = list(file_revisions_by_pk.values())
646
+
647
+ i = 0
648
+ chunk_size = 50
649
+ existing_file_revisions = []
650
+ file_revisions_by_pk_keys = list(file_revisions_by_pk.keys())
651
+
652
+ # we only check 50 keys at a time and then incrementally save them
653
+ while i < len(file_revisions_by_pk_keys):
654
+ file_revisions_by_pk_chunk = file_revisions_by_pk_keys[i:i+chunk_size]
655
+ if not file_revisions_by_pk_chunk:
656
+ break
657
+ existing_file_revisions.extend(list(self.project.backend.filter(FileRevision, {
658
+ 'project': self.project,
659
+ 'hash': {'$in': file_revisions_by_pk_chunk}
660
+ })))
661
+ i += chunk_size
662
+
663
+ existing_file_revisions_by_pk = dict(
664
+ [(fr.hash, fr) for fr in existing_file_revisions])
665
+ new_file_revisions = [file_revision for file_revision in file_revisions
666
+ if not file_revision.hash in existing_file_revisions_by_pk]
667
+
668
+ new_file_revisions = []
669
+
670
+ for file_revision in file_revisions:
671
+ if not file_revision.hash in existing_file_revisions_by_pk:
672
+ file_revision.configuration = self.project.configuration
673
+ new_file_revisions.append(file_revision)
674
+ elif existing_file_revisions_by_pk[file_revision.hash].configuration != self.project.configuration:
675
+ # we replace the pk and configuration values of the new file_revision object, so that
676
+ # it will overwrite the old version...
677
+ file_revision.pk = existing_file_revisions_by_pk[file_revision.hash].pk
678
+ file_revision.configuration = self.project.configuration
679
+ new_file_revisions.append(file_revision)
680
+
681
+ file_revisions_dict = {}
682
+
683
+ for file_revision in existing_file_revisions+new_file_revisions:
684
+ file_revisions_dict[file_revision.path] = file_revision
685
+
686
+ logger.info("Analyzing %d new file revisions (%d are already analyzed)" % (
687
+ len(new_file_revisions),
688
+ len(existing_file_revisions)
689
+ ))
690
+ i = 0
691
+
692
+ # We set the project information in the snapshot.
693
+ snapshot.project = self.project
694
+ snapshot.file_revisions = list(file_revisions_dict.values())
695
+ self.env['snapshot'] = snapshot
696
+
697
+ try:
698
+ while i < len(new_file_revisions):
699
+ j = i+10 if i + \
700
+ 10 < len(new_file_revisions) else len(new_file_revisions)
701
+ logger.info("Analyzing and saving: %d - %d (%d remaining)" %
702
+ (i, j, len(new_file_revisions) - i))
703
+ file_revisions_slice = new_file_revisions[i:j]
704
+ analyzed_file_revisions = self.analyze_file_revisions(
705
+ file_revisions_slice)
706
+ logger.info("Annotating and saving file revisions...")
707
+ self.save_file_revisions(snapshot, analyzed_file_revisions)
708
+ i += 10
709
+ logger.info("Summarizing file revisions...")
710
+ snapshot.summary = self.summarize(
711
+ list(file_revisions_dict.values()))
712
+ finally:
713
+ del self.env['snapshot']
714
+
715
+ snapshot.analyzed = True
716
+
717
+ logger.info("Saving snapshot...")
718
+
719
+ with self.project.backend.transaction():
720
+ self.project.backend.save(snapshot)
721
+
722
+ logger.info("Done analyzing snapshot %s" % snapshot.pk)
723
+
724
+ return snapshot
725
+
726
+ def save_file_revisions(self, snapshot, file_revisions):
727
+ """
728
+ We convert various items in the file revision to documents,
729
+ so that we can easily search and retrieve them...
730
+ """
731
+
732
+ annotations = defaultdict(list)
733
+
734
+ for file_revision in file_revisions:
735
+ issues_results = {}
736
+
737
+ for analyzer_name, results in list(file_revision.results.items()):
738
+
739
+ if 'issues' in results:
740
+ issues_results[analyzer_name] = results['issues']
741
+ del results['issues']
742
+ if len(issues_results) > 1000:
743
+ issues_results[analyzer_name] = [{
744
+ 'code': 'TooManyIssues',
745
+ 'analyzer': analyzer_name,
746
+ }]
747
+
748
+ with self.project.backend.transaction():
749
+ self.project.backend.save(file_revision)
750
+
751
+ def location_sorter(issue):
752
+ if issue['location'] and issue['location'][0] and issue['location'][0][0]:
753
+ return issue['location'][0][0][0]
754
+ return 0
755
+
756
+ with self.project.backend.transaction():
757
+ for analyzer_name, issues in list(issues_results.items()):
758
+ grouped_issues = group_issues_by_fingerprint(issues)
759
+ for issue_dict in grouped_issues:
760
+
761
+ hasher = Hasher()
762
+ hasher.add(analyzer_name)
763
+ hasher.add(issue_dict['code'])
764
+ hasher.add(issue_dict['fingerprint'])
765
+ issue_dict['hash'] = hasher.digest.hexdigest()
766
+
767
+ try:
768
+ # we check if the issue already exists
769
+ issue = self.project.backend.get(Issue, {'hash': issue_dict['hash'],
770
+ 'project': self.project
771
+ })
772
+ except Issue.DoesNotExist:
773
+ # if not, we create it
774
+ d = issue_dict.copy()
775
+ d['analyzer'] = analyzer_name
776
+ if 'location' in d:
777
+ del d['location']
778
+ if 'occurrences' in d:
779
+ del d['occurrences']
780
+ issue = Issue(d)
781
+ issue.project = self.project
782
+ self.project.backend.save(issue)
783
+
784
+ for occurrence in issue_dict['occurrences']:
785
+ hasher = Hasher()
786
+ hasher.add(file_revision.hash)
787
+ hasher.add(issue.hash)
788
+ hasher.add(occurrence.get('from_row'))
789
+ hasher.add(occurrence.get('from_column'))
790
+ hasher.add(occurrence.get('to_row'))
791
+ hasher.add(occurrence.get('to_column'))
792
+ hasher.add(occurrence.get('sequence'))
793
+ occurrence['hash'] = hasher.digest.hexdigest()
794
+
795
+ try:
796
+ # we check if the occurrence already exists
797
+ occurrence = self.project.backend.get(IssueOccurrence, {'hash': occurrence['hash'],
798
+ 'issue': issue
799
+ })
800
+ except IssueOccurrence.DoesNotExist:
801
+ # if not, we create it
802
+ occurrence = IssueOccurrence(occurrence)
803
+ occurrence.issue = issue
804
+ occurrence.file_revision = file_revision
805
+ self.project.backend.save(occurrence)
806
+ annotations['occurrences'].append(occurrence)
807
+ annotations['issues'].append(issue)
808
+
809
+ return annotations