argus-alm 0.14.2__py3-none-any.whl → 0.15.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. argus/_version.py +21 -0
  2. argus/backend/.gitkeep +0 -0
  3. argus/backend/__init__.py +0 -0
  4. argus/backend/cli.py +57 -0
  5. argus/backend/controller/__init__.py +0 -0
  6. argus/backend/controller/admin.py +20 -0
  7. argus/backend/controller/admin_api.py +355 -0
  8. argus/backend/controller/api.py +589 -0
  9. argus/backend/controller/auth.py +67 -0
  10. argus/backend/controller/client_api.py +109 -0
  11. argus/backend/controller/main.py +316 -0
  12. argus/backend/controller/notification_api.py +72 -0
  13. argus/backend/controller/notifications.py +13 -0
  14. argus/backend/controller/planner_api.py +194 -0
  15. argus/backend/controller/team.py +129 -0
  16. argus/backend/controller/team_ui.py +19 -0
  17. argus/backend/controller/testrun_api.py +513 -0
  18. argus/backend/controller/view_api.py +188 -0
  19. argus/backend/controller/views_widgets/__init__.py +0 -0
  20. argus/backend/controller/views_widgets/graphed_stats.py +54 -0
  21. argus/backend/controller/views_widgets/graphs.py +68 -0
  22. argus/backend/controller/views_widgets/highlights.py +135 -0
  23. argus/backend/controller/views_widgets/nemesis_stats.py +26 -0
  24. argus/backend/controller/views_widgets/summary.py +43 -0
  25. argus/backend/db.py +98 -0
  26. argus/backend/error_handlers.py +41 -0
  27. argus/backend/events/event_processors.py +34 -0
  28. argus/backend/models/__init__.py +0 -0
  29. argus/backend/models/argus_ai.py +24 -0
  30. argus/backend/models/github_issue.py +60 -0
  31. argus/backend/models/plan.py +24 -0
  32. argus/backend/models/result.py +187 -0
  33. argus/backend/models/runtime_store.py +58 -0
  34. argus/backend/models/view_widgets.py +25 -0
  35. argus/backend/models/web.py +403 -0
  36. argus/backend/plugins/__init__.py +0 -0
  37. argus/backend/plugins/core.py +248 -0
  38. argus/backend/plugins/driver_matrix_tests/controller.py +66 -0
  39. argus/backend/plugins/driver_matrix_tests/model.py +429 -0
  40. argus/backend/plugins/driver_matrix_tests/plugin.py +21 -0
  41. argus/backend/plugins/driver_matrix_tests/raw_types.py +62 -0
  42. argus/backend/plugins/driver_matrix_tests/service.py +61 -0
  43. argus/backend/plugins/driver_matrix_tests/udt.py +42 -0
  44. argus/backend/plugins/generic/model.py +86 -0
  45. argus/backend/plugins/generic/plugin.py +15 -0
  46. argus/backend/plugins/generic/types.py +14 -0
  47. argus/backend/plugins/loader.py +39 -0
  48. argus/backend/plugins/sct/controller.py +224 -0
  49. argus/backend/plugins/sct/plugin.py +37 -0
  50. argus/backend/plugins/sct/resource_setup.py +177 -0
  51. argus/backend/plugins/sct/service.py +682 -0
  52. argus/backend/plugins/sct/testrun.py +288 -0
  53. argus/backend/plugins/sct/udt.py +100 -0
  54. argus/backend/plugins/sirenada/model.py +118 -0
  55. argus/backend/plugins/sirenada/plugin.py +16 -0
  56. argus/backend/service/admin.py +26 -0
  57. argus/backend/service/argus_service.py +696 -0
  58. argus/backend/service/build_system_monitor.py +185 -0
  59. argus/backend/service/client_service.py +127 -0
  60. argus/backend/service/event_service.py +18 -0
  61. argus/backend/service/github_service.py +233 -0
  62. argus/backend/service/jenkins_service.py +269 -0
  63. argus/backend/service/notification_manager.py +159 -0
  64. argus/backend/service/planner_service.py +608 -0
  65. argus/backend/service/release_manager.py +229 -0
  66. argus/backend/service/results_service.py +690 -0
  67. argus/backend/service/stats.py +610 -0
  68. argus/backend/service/team_manager_service.py +82 -0
  69. argus/backend/service/test_lookup.py +172 -0
  70. argus/backend/service/testrun.py +489 -0
  71. argus/backend/service/user.py +308 -0
  72. argus/backend/service/views.py +219 -0
  73. argus/backend/service/views_widgets/__init__.py +0 -0
  74. argus/backend/service/views_widgets/graphed_stats.py +180 -0
  75. argus/backend/service/views_widgets/highlights.py +374 -0
  76. argus/backend/service/views_widgets/nemesis_stats.py +34 -0
  77. argus/backend/template_filters.py +27 -0
  78. argus/backend/tests/__init__.py +0 -0
  79. argus/backend/tests/client_service/__init__.py +0 -0
  80. argus/backend/tests/client_service/test_submit_results.py +79 -0
  81. argus/backend/tests/conftest.py +180 -0
  82. argus/backend/tests/results_service/__init__.py +0 -0
  83. argus/backend/tests/results_service/test_best_results.py +178 -0
  84. argus/backend/tests/results_service/test_cell.py +65 -0
  85. argus/backend/tests/results_service/test_chartjs_additional_functions.py +259 -0
  86. argus/backend/tests/results_service/test_create_chartjs.py +220 -0
  87. argus/backend/tests/results_service/test_result_metadata.py +100 -0
  88. argus/backend/tests/results_service/test_results_service.py +203 -0
  89. argus/backend/tests/results_service/test_validation_rules.py +213 -0
  90. argus/backend/tests/view_widgets/__init__.py +0 -0
  91. argus/backend/tests/view_widgets/test_highlights_api.py +532 -0
  92. argus/backend/util/common.py +65 -0
  93. argus/backend/util/config.py +38 -0
  94. argus/backend/util/encoders.py +56 -0
  95. argus/backend/util/logsetup.py +80 -0
  96. argus/backend/util/module_loaders.py +30 -0
  97. argus/backend/util/send_email.py +91 -0
  98. argus/client/base.py +1 -3
  99. argus/client/driver_matrix_tests/cli.py +17 -8
  100. argus/client/generic/cli.py +4 -2
  101. argus/client/generic/client.py +1 -0
  102. argus/client/generic_result.py +48 -9
  103. argus/client/sct/client.py +1 -3
  104. argus/client/sirenada/client.py +4 -1
  105. argus/client/tests/__init__.py +0 -0
  106. argus/client/tests/conftest.py +19 -0
  107. argus/client/tests/test_package.py +45 -0
  108. argus/client/tests/test_results.py +224 -0
  109. argus/common/sct_types.py +3 -0
  110. argus/common/sirenada_types.py +1 -1
  111. {argus_alm-0.14.2.dist-info → argus_alm-0.15.2.dist-info}/METADATA +43 -19
  112. argus_alm-0.15.2.dist-info/RECORD +122 -0
  113. {argus_alm-0.14.2.dist-info → argus_alm-0.15.2.dist-info}/WHEEL +2 -1
  114. argus_alm-0.15.2.dist-info/entry_points.txt +3 -0
  115. argus_alm-0.15.2.dist-info/top_level.txt +1 -0
  116. argus_alm-0.14.2.dist-info/RECORD +0 -20
  117. argus_alm-0.14.2.dist-info/entry_points.txt +0 -4
  118. {argus_alm-0.14.2.dist-info → argus_alm-0.15.2.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,682 @@
1
+ import base64
2
+ from dataclasses import dataclass
3
+ from datetime import datetime
4
+ import logging
5
+ import math
6
+ import re
7
+ from time import time
8
+ from xml.etree import ElementTree
9
+ from flask import g
10
+ from argus.backend.models.github_issue import GithubIssue, IssueLink
11
+ from argus.backend.models.web import ArgusEventTypes, ErrorEventEmbeddings, CriticalEventEmbeddings
12
+ from argus.backend.plugins.sct.testrun import SCTJunitReports, SCTTestRun, SubtestType
13
+ from argus.common.sct_types import GeminiResultsRequest, PerformanceResultsRequest, ResourceUpdateRequest
14
+ from argus.backend.plugins.sct.udt import (
15
+ CloudInstanceDetails,
16
+ CloudResource,
17
+ EventsBySeverity,
18
+ NemesisRunInfo,
19
+ NodeDescription,
20
+ PackageVersion,
21
+ PerformanceHDRHistogram,
22
+ )
23
+ from argus.backend.service.event_service import EventService
24
+ from argus.backend.util.common import chunk, get_build_number
25
+ from argus.common.enums import NemesisStatus, ResourceState, TestStatus
26
+
27
+ LOGGER = logging.getLogger(__name__)
28
+ MAX_SIMILARS = 20
29
+
30
+
31
+ class SCTServiceException(Exception):
32
+ pass
33
+
34
+
35
+ @dataclass(init=True, repr=True)
36
+ class NemesisSubmissionRequest:
37
+ name: str
38
+ class_name: str
39
+ start_time: int
40
+ node_name: str
41
+ node_ip: str
42
+ node_shards: int
43
+
44
+
45
+ @dataclass(init=True, repr=True)
46
+ class NemesisFinalizationRequest:
47
+ name: str
48
+ start_time: int
49
+ status: str
50
+ message: str
51
+
52
+
53
+ @dataclass(init=True, repr=True)
54
+ class EventSubmissionRequest:
55
+ severity: str
56
+ total_events: int
57
+ messages: list[str]
58
+
59
+
60
+ class SCTService:
61
+
62
+ @staticmethod
63
+ def submit_packages(run_id: str, packages: list[dict]) -> str:
64
+ try:
65
+ run: SCTTestRun = SCTTestRun.get(id=run_id)
66
+ for package_dict in packages:
67
+ package = PackageVersion(**package_dict)
68
+ if "target" in package.name:
69
+ SCTService.process_target_version(run, package)
70
+ if package not in run.packages:
71
+ run.packages.append(package)
72
+ run.save()
73
+ except SCTTestRun.DoesNotExist as exception:
74
+ LOGGER.error("Run %s not found for SCTTestRun", run_id)
75
+ raise SCTServiceException("Run not found", run_id) from exception
76
+
77
+ return "added"
78
+
79
+ @staticmethod
80
+ def process_target_version(run: SCTTestRun, package: PackageVersion):
81
+ if run.version_source and "upgrade-target" in run.version_source and package.name == "scylla-server-target":
82
+ return
83
+ run.version_source = package.name
84
+ run.scylla_version = package.version
85
+
86
+ @staticmethod
87
+ def set_sct_runner(run_id: str, public_ip: str, private_ip: str, region: str, backend: str, name: str = None):
88
+ try:
89
+ run: SCTTestRun = SCTTestRun.get(id=run_id)
90
+ details = CloudInstanceDetails(
91
+ public_ip=public_ip,
92
+ private_ip=private_ip,
93
+ provider=backend,
94
+ region=region,
95
+ )
96
+ run.sct_runner_host = details
97
+ resource = CloudResource(name=name or "sct-runner", resource_type="sct-runner", instance_info=details)
98
+ run.allocated_resources.append(resource)
99
+ run.save()
100
+ except SCTTestRun.DoesNotExist as exception:
101
+ LOGGER.error("Run %s not found for SCTTestRun", run_id)
102
+ raise SCTServiceException("Run not found", run_id) from exception
103
+
104
+ return "updated"
105
+
106
+ @staticmethod
107
+ def submit_screenshots(run_id: str, screenshot_links: list[str]) -> str:
108
+ try:
109
+ run: SCTTestRun = SCTTestRun.get(id=run_id)
110
+ for link in screenshot_links:
111
+ run.add_screenshot(link)
112
+ run.save()
113
+ except SCTTestRun.DoesNotExist as exception:
114
+ LOGGER.error("Run %s not found for SCTTestRun", run_id)
115
+ raise SCTServiceException("Run not found", run_id) from exception
116
+
117
+ return "submitted"
118
+
119
+ @staticmethod
120
+ def submit_gemini_results(run_id: str, gemini_data: GeminiResultsRequest) -> str:
121
+ try:
122
+ run: SCTTestRun = SCTTestRun.get(id=run_id)
123
+ run.subtest_name = SubtestType.GEMINI.value
124
+ run.oracle_nodes_count = gemini_data.get("oracle_nodes_count")
125
+ run.oracle_node_ami_id = gemini_data.get("oracle_node_ami_id")
126
+ run.oracle_node_instance_type = gemini_data.get("oracle_node_instance_type")
127
+ run.oracle_node_scylla_version = gemini_data.get("oracle_node_scylla_version")
128
+ run.gemini_command = gemini_data.get("gemini_command")
129
+ run.gemini_version = gemini_data.get("gemini_version")
130
+ run.gemini_status = gemini_data.get("gemini_status")
131
+ run.gemini_seed = str(gemini_data.get("gemini_seed"))
132
+ run.gemini_write_ops = gemini_data.get("gemini_write_ops")
133
+ run.gemini_write_errors = gemini_data.get("gemini_write_errors")
134
+ run.gemini_read_ops = gemini_data.get("gemini_read_ops")
135
+ run.gemini_read_errors = gemini_data.get("gemini_read_errors")
136
+ run.save()
137
+
138
+ if run.gemini_status != "PASSED":
139
+ run.status = TestStatus.FAILED
140
+ EventService.create_run_event(kind=ArgusEventTypes.TestRunStatusChanged, body={
141
+ "message": "[{username}] Setting run status to {status} due to Gemini reporting following status: {gemini_status}",
142
+ "username": g.user.username,
143
+ "status": TestStatus.FAILED.value,
144
+ "gemini_status": run.gemini_status,
145
+ }, user_id=g.user.id, run_id=run_id, release_id=run.release_id, test_id=run.test_id)
146
+ run.save()
147
+ except SCTTestRun.DoesNotExist as exception:
148
+ LOGGER.error("Run %s not found for SCTTestRun", run_id)
149
+ raise SCTServiceException("Run not found", run_id) from exception
150
+
151
+ return "submitted"
152
+
153
+ @staticmethod
154
+ def submit_performance_results(run_id: str, performance_results: PerformanceResultsRequest):
155
+ try:
156
+ run: SCTTestRun = SCTTestRun.get(id=run_id)
157
+ run.subtest_name = SubtestType.PERFORMANCE.value
158
+ run.perf_op_rate_average = performance_results.get("perf_op_rate_average")
159
+ run.perf_op_rate_total = performance_results.get("perf_op_rate_total")
160
+ run.perf_avg_latency_99th = performance_results.get("perf_avg_latency_99th")
161
+ run.perf_avg_latency_mean = performance_results.get("perf_avg_latency_mean")
162
+ run.perf_total_errors = performance_results.get("perf_total_errors")
163
+ run.stress_cmd = performance_results.get("stress_cmd")
164
+ run.test_name = performance_results.get("test_name")
165
+ run.save()
166
+
167
+ is_latency_test = "latency" in run.test_name
168
+ threshold_negative = -10
169
+
170
+ def cmp(lhs, rhs):
171
+ delta = rhs - lhs
172
+ change = int(math.fabs(delta) * 100 / rhs)
173
+ return change if delta >= 0 else change * -1
174
+
175
+ previous_runs = SCTTestRun.get_perf_results_for_test_name(run.build_id, run.start_time, run.test_name)
176
+ metrics_to_check = ["perf_avg_latency_99th",
177
+ "perf_avg_latency_mean"] if is_latency_test else ["perf_op_rate_total"]
178
+
179
+ older_runs_by_version = {}
180
+ for prev_run in previous_runs:
181
+ if not older_runs_by_version.get(prev_run["scylla_version"]):
182
+ older_runs_by_version[prev_run["scylla_version"]] = []
183
+ older_runs_by_version[prev_run["scylla_version"]].append(prev_run)
184
+
185
+ regression_found = False
186
+ regression_info = {
187
+ "version": None,
188
+ "delta": None,
189
+ "id": None,
190
+ "metric": None,
191
+ "job_url": None,
192
+ }
193
+
194
+ if performance_results["histograms"]:
195
+ for histogram in performance_results["histograms"]:
196
+ run.histograms = {k: PerformanceHDRHistogram(**v) for k, v in histogram.items()}
197
+
198
+ for version, runs in older_runs_by_version.items():
199
+ for metric in metrics_to_check:
200
+ best_run = sorted(runs, reverse=(not is_latency_test), key=lambda v: v[metric])[0]
201
+ last_run = runs[0]
202
+
203
+ metric_to_best = cmp(run[metric], best_run[metric])
204
+ metric_to_last = cmp(run[metric], last_run[metric])
205
+ if metric_to_last < threshold_negative:
206
+ regression_found = True
207
+ regression_info["metric"] = metric
208
+ regression_info["version"] = version
209
+ regression_info["job_url"] = last_run["build_job_url"]
210
+ regression_info["id"] = str(last_run["id"])
211
+ regression_info["delta"] = metric_to_last
212
+ break
213
+
214
+ if metric_to_best < threshold_negative:
215
+ regression_found = True
216
+ regression_info["metric"] = metric
217
+ regression_info["version"] = version
218
+ regression_info["job_url"] = best_run["build_job_url"]
219
+ regression_info["id"] = str(best_run["id"])
220
+ regression_info["delta"] = metric_to_best
221
+ break
222
+
223
+ if regression_found:
224
+ break
225
+
226
+ if regression_found:
227
+ run.status = TestStatus.FAILED.value
228
+ run.save()
229
+ EventService.create_run_event(kind=ArgusEventTypes.TestRunStatusChanged, body={
230
+ "message": "[{username}] Setting run status to {status} due to performance metric '{metric}' falling "
231
+ "below allowed threshold ({threshold_negative}): {delta}% compared to "
232
+ "<a href='/test/{test_id}/runs?additionalRuns[]={base_run_id}&additionalRuns[]={previous_run_id}'>This {version} (#{build_number}) run</a>",
233
+ "username": g.user.username,
234
+ "status": TestStatus.FAILED.value,
235
+ "metric": regression_info["metric"],
236
+ "threshold_negative": threshold_negative,
237
+ "delta": regression_info["delta"],
238
+ "test_id": str(run.test_id),
239
+ "base_run_id": str(run.id),
240
+ "previous_run_id": regression_info["id"],
241
+ "version": regression_info["version"],
242
+ "build_number": get_build_number(regression_info["job_url"])
243
+ }, user_id=g.user.id, run_id=run_id, release_id=run.release_id, test_id=run.test_id)
244
+ else:
245
+ # NOTE: This will override status set by SCT Events.
246
+ run.status = TestStatus.PASSED.value
247
+ run.save()
248
+
249
+ except SCTTestRun.DoesNotExist as exception:
250
+ LOGGER.error("Run %s not found for SCTTestRun", run_id)
251
+ raise SCTServiceException("Run not found", run_id) from exception
252
+
253
+ return "submitted"
254
+
255
+ @staticmethod
256
+ def get_performance_history_for_test(run_id: str):
257
+ try:
258
+ run: SCTTestRun = SCTTestRun.get(id=run_id)
259
+ rows = run.get_perf_results_for_test_name(
260
+ build_id=run.build_id, start_time=run.start_time, test_name=run.test_name)
261
+ return rows
262
+ except SCTTestRun.DoesNotExist as exception:
263
+ LOGGER.error("Run %s not found for SCTTestRun", run_id)
264
+ raise SCTServiceException("Run not found", run_id) from exception
265
+
266
+ @staticmethod
267
+ def create_resource(run_id: str, resource_details: dict) -> str:
268
+ instance_details = CloudInstanceDetails(**resource_details.pop("instance_details"))
269
+ resource = CloudResource(**resource_details, instance_info=instance_details)
270
+ try:
271
+ run: SCTTestRun = SCTTestRun.get(id=run_id)
272
+ run.get_resources().append(resource)
273
+ run.save()
274
+ except SCTTestRun.DoesNotExist as exception:
275
+ LOGGER.error("Run %s not found for SCTTestRun", run_id)
276
+ raise SCTServiceException("Run not found", run_id) from exception
277
+
278
+ return "created"
279
+
280
+ @staticmethod
281
+ def update_resource_shards(run_id: str, resource_name: str, new_shards: int) -> str:
282
+ try:
283
+ run: SCTTestRun = SCTTestRun.get(id=run_id)
284
+ resource = next(res for res in run.get_resources() if res.name == resource_name)
285
+ resource.get_instance_info().shards_amount = new_shards
286
+ run.save()
287
+ except StopIteration as exception:
288
+ LOGGER.error("Resource %s not found in run %s", resource_name, run_id)
289
+ raise SCTServiceException("Resource not found", resource_name) from exception
290
+ except SCTTestRun.DoesNotExist as exception:
291
+ LOGGER.error("Run %s not found for SCTTestRun", run_id)
292
+ raise SCTServiceException("Run not found", run_id) from exception
293
+
294
+ return "updated"
295
+
296
+ @staticmethod
297
+ def update_resource(run_id: str, resource_name: str, update_data: ResourceUpdateRequest) -> str:
298
+ try:
299
+ fields_updated = {}
300
+ run: SCTTestRun = SCTTestRun.get(id=run_id)
301
+ resource = next(res for res in run.get_resources() if res.name == resource_name)
302
+ instance_info = update_data.pop("instance_info", None)
303
+ resource.state = ResourceState(update_data.get("state", resource.state)).value
304
+ if instance_info:
305
+ resource_instance_info = resource.get_instance_info()
306
+ for k, v in instance_info.items():
307
+ if k in resource_instance_info.keys():
308
+ resource_instance_info[k] = v
309
+ fields_updated[k] = v
310
+ run.save()
311
+ except StopIteration as exception:
312
+ LOGGER.error("Resource %s not found in run %s", resource_name, run_id)
313
+ raise SCTServiceException("Resource not found", resource_name) from exception
314
+ except SCTTestRun.DoesNotExist as exception:
315
+ LOGGER.error("Run %s not found for SCTTestRun", run_id)
316
+ raise SCTServiceException("Run not found", run_id) from exception
317
+
318
+ return {
319
+ "state": "updated",
320
+ "fields": fields_updated
321
+ }
322
+
323
+ @staticmethod
324
+ def terminate_resource(run_id: str, resource_name: str, reason: str) -> str:
325
+ try:
326
+ run: SCTTestRun = SCTTestRun.get(id=run_id)
327
+ if "sct-runner" in resource_name: # FIXME: Temp solution until sct-runner name is propagated on submit
328
+ resource = next(res for res in run.get_resources() if "sct-runner" in res.name)
329
+ else:
330
+ resource = next(res for res in run.get_resources() if res.name == resource_name)
331
+ resource.get_instance_info().termination_reason = reason
332
+ resource.get_instance_info().termination_time = int(time())
333
+ resource.state = ResourceState.TERMINATED.value
334
+ run.save()
335
+ except StopIteration as exception:
336
+ LOGGER.error("Resource %s not found in run %s", resource_name, run_id)
337
+ raise SCTServiceException("Resource not found", resource_name) from exception
338
+ except SCTTestRun.DoesNotExist as exception:
339
+ LOGGER.error("Run %s not found for SCTTestRun", run_id)
340
+ raise SCTServiceException("Run not found", run_id) from exception
341
+
342
+ return "terminated"
343
+
344
+ @staticmethod
345
+ def submit_nemesis(run_id: str, nemesis_details: dict) -> str:
346
+ nem_req = NemesisSubmissionRequest(**nemesis_details)
347
+ node_desc = NodeDescription(name=nem_req.node_name, ip=nem_req.node_ip, shards=nem_req.node_shards)
348
+ nemesis_info = NemesisRunInfo(
349
+ class_name=nem_req.class_name,
350
+ name=nem_req.name,
351
+ start_time=int(nem_req.start_time),
352
+ end_time=0,
353
+ duration=0,
354
+ stack_trace="",
355
+ status=NemesisStatus.RUNNING.value,
356
+ target_node=node_desc,
357
+ )
358
+ try:
359
+ run: SCTTestRun = SCTTestRun.get(id=run_id)
360
+ run.add_nemesis(nemesis_info)
361
+ run.save()
362
+ except SCTTestRun.DoesNotExist as exception:
363
+ LOGGER.error("Run %s not found for SCTTestRun", run_id)
364
+ raise SCTServiceException("Run not found", run_id) from exception
365
+
366
+ return "created"
367
+
368
+ @staticmethod
369
+ def finalize_nemesis(run_id: str, nemesis_details: dict) -> str:
370
+ nem_req = NemesisFinalizationRequest(**nemesis_details)
371
+ try:
372
+ run: SCTTestRun = SCTTestRun.get(id=run_id)
373
+ nemesis = next(nem for nem in run.get_nemeses() if nem.name ==
374
+ nem_req.name and nem.start_time == nem_req.start_time)
375
+ nemesis.status = NemesisStatus(nem_req.status).value
376
+ nemesis.stack_trace = nem_req.message
377
+ nemesis.end_time = int(time())
378
+ run.save()
379
+ except StopIteration as exception:
380
+ LOGGER.error("Nemesis %s (%s) not found for run %s", nem_req.name, nem_req.start_time, run_id)
381
+ raise SCTServiceException("Nemesis not found", (nem_req.name, nem_req.start_time)) from exception
382
+ except SCTTestRun.DoesNotExist as exception:
383
+ LOGGER.error("Run %s not found for SCTTestRun", run_id)
384
+ raise SCTServiceException("Run not found", run_id) from exception
385
+
386
+ return "updated"
387
+
388
+ @staticmethod
389
+ def submit_events(run_id: str, events: list[dict]) -> str:
390
+ wrapped_events = [EventSubmissionRequest(**ev) for ev in events]
391
+ try:
392
+ run: SCTTestRun = SCTTestRun.get(id=run_id)
393
+ for event in wrapped_events:
394
+ wrapper = EventsBySeverity(severity=event.severity,
395
+ event_amount=event.total_events, last_events=event.messages)
396
+ run.get_events().append(wrapper)
397
+ coredumps = SCTService.locate_coredumps(run, run.get_events())
398
+ run.submit_logs(coredumps)
399
+ run.save()
400
+ except SCTTestRun.DoesNotExist as exception:
401
+ LOGGER.error("Run %s not found for SCTTestRun", run_id)
402
+ raise SCTServiceException("Run not found", run_id) from exception
403
+
404
+ return "added"
405
+
406
+ @staticmethod
407
+ def locate_coredumps(run: SCTTestRun, events: list[EventsBySeverity]) -> list[dict]:
408
+ flat_messages: list[str] = []
409
+ links = []
410
+ for es in events:
411
+ flat_messages.extend(es.last_events)
412
+ coredump_events = filter(lambda v: "coredumpevent" in v.lower(), flat_messages)
413
+ for idx, event in enumerate(coredump_events):
414
+ core_pattern = r"corefile_url=(?P<url>.+)$"
415
+ ts_pattern = r"^(?P<ts>\d{4}-\d{2}-\d{2} ([\d:]*)\.\d{3})"
416
+ node_name_pattern = r"node=(?P<name>.+)$"
417
+ core_url_match = re.search(core_pattern, event, re.MULTILINE)
418
+ node_name_match = re.search(node_name_pattern, event, re.MULTILINE)
419
+ ts_match = re.search(ts_pattern, event)
420
+ if core_url_match:
421
+ node_name = node_name_match.group("name") if node_name_match else f"unknown-node-{idx}"
422
+ split_name = node_name.split(" ")
423
+ node_name = split_name[1] if len(split_name) >= 2 else node_name
424
+ url = core_url_match.group("url")
425
+ timestamp_component = ""
426
+ if ts_match:
427
+ try:
428
+ timestamp = datetime.fromisoformat(ts_match.group("ts"))
429
+ timestamp_component = timestamp.strftime("-%Y-%m-%d_%H-%M-%S")
430
+ except ValueError:
431
+ pass
432
+ log_link = {
433
+ "log_name": f"core.scylla-{node_name}{timestamp_component}.gz",
434
+ "log_link": url
435
+ }
436
+ links.append(log_link)
437
+ return links
438
+
439
+ @staticmethod
440
+ def get_similar_events(run_id: str) -> list[dict]:
441
+ """Get similar events for each event in a test run
442
+
443
+ Args:
444
+ run_id: The test run ID to get similar events for
445
+
446
+ Returns:
447
+ List of dictionaries containing event_index, severity and similars_set for each event
448
+ """
449
+ error_embeddings = ErrorEventEmbeddings.filter(run_id=run_id).only(["event_index", "similars_map"]).all()
450
+ critical_embeddings = CriticalEventEmbeddings.filter(run_id=run_id).only(["event_index", "similars_map"]).all()
451
+
452
+ result = []
453
+ # Process ERROR embeddings
454
+ for embedding in error_embeddings:
455
+ result.append(
456
+ {
457
+ "event_index": embedding.event_index,
458
+ "severity": "ERROR",
459
+ "similars_set": [str(similar_run_id) for similar_run_id in embedding.similars_map],
460
+ }
461
+ )
462
+
463
+ # Process CRITICAL embeddings
464
+ for embedding in critical_embeddings:
465
+ result.append(
466
+ {
467
+ "event_index": embedding.event_index,
468
+ "severity": "CRITICAL",
469
+ "similars_set": [str(similar_run_id) for similar_run_id in embedding.similars_map],
470
+ }
471
+ )
472
+
473
+ return result
474
+
475
+ @staticmethod
476
+ def get_similar_runs_info(run_ids: list[str]):
477
+ """Get build IDs and issues for a list of run IDs
478
+
479
+ Args:
480
+ run_ids: List of run IDs to fetch information for
481
+
482
+ Returns:
483
+ Dictionary mapping run IDs to their information (build_id, start_time, and issues)
484
+ """
485
+ result = {}
486
+
487
+ # Step 1: Get issue links for all run_ids in batches
488
+ all_issue_links = {}
489
+
490
+ for batch_run_ids in chunk(run_ids):
491
+ batch_links = IssueLink.objects.filter(run_id__in=batch_run_ids).all()
492
+
493
+ for link in batch_links:
494
+ run_id_str = str(link.run_id)
495
+ if run_id_str not in all_issue_links:
496
+ all_issue_links[run_id_str] = []
497
+ all_issue_links[run_id_str].append(link)
498
+
499
+ # Step 2: Fetch all unique issue details
500
+ all_issue_ids = set()
501
+ for links in all_issue_links.values():
502
+ all_issue_ids.update(link.issue_id for link in links)
503
+
504
+ issues_by_id = {}
505
+ if all_issue_ids:
506
+ for batch_issue_ids in chunk(list(all_issue_ids)):
507
+ batch_issues = GithubIssue.filter(id__in=batch_issue_ids).all()
508
+
509
+ for issue in batch_issues:
510
+ issues_by_id[issue.id] = issue
511
+
512
+ # Step 3: Fetch test runs only for run_ids that have issue links (limiting to MAX_SIMILARS runs)
513
+ runs_with_issues = list(all_issue_links.keys())
514
+
515
+ test_runs = {}
516
+ if runs_with_issues:
517
+ for run_id in runs_with_issues[:MAX_SIMILARS]:
518
+ try:
519
+ test_run = SCTTestRun.get(id=run_id)
520
+ test_runs[run_id] = test_run
521
+ except Exception as e:
522
+ LOGGER.debug(f"Failed to fetch test run {run_id}: {str(e)}")
523
+
524
+ # Step 4: Assign run and issue details to result for runs with issues
525
+ for run_id in runs_with_issues:
526
+ try:
527
+ test_run = test_runs.get(run_id)
528
+ if not test_run:
529
+ continue
530
+
531
+ links = all_issue_links.get(run_id, [])
532
+ issues = [issues_by_id[link.issue_id] for link in links if link.issue_id in issues_by_id]
533
+
534
+ try:
535
+ build_number = int(
536
+ test_run.build_job_url[:-1].split("/")[-1]
537
+ )
538
+ except Exception as e:
539
+ LOGGER.error(
540
+ f"Error parsing build number for run {run_id}: {test_run.build_job_url[:-1].split('/')} - {str(e)}")
541
+ build_number = -1
542
+
543
+ for pkg_name in ["scylla-server-upgraded", "scylla-server", "scylla-server-target"]:
544
+ sut_version = next(
545
+ (f"{pkg.version}-{pkg.date}" for pkg in test_run.packages if pkg.name == pkg_name), None)
546
+ if sut_version:
547
+ break
548
+
549
+ result[run_id] = {
550
+ "build_id": f"{test_run.build_id}#{build_number}",
551
+ "start_time": test_run.start_time.isoformat(),
552
+ "version": sut_version or "unknown",
553
+ "issues": [
554
+ {
555
+ "number": issue.number,
556
+ "state": issue.state,
557
+ "title": issue.title,
558
+ "url": issue.url,
559
+ }
560
+ for issue in issues
561
+ ],
562
+ }
563
+ except Exception as e:
564
+ LOGGER.error(f"Error fetching info for run {run_id}: {str(e)}")
565
+ result[run_id] = {"build_id": None, "start_time": None, "issues": []}
566
+
567
+ # Step 5: If less than MAX_SIMILARS results, fetch MAX_SIMILARS more run details
568
+ if len(result) < MAX_SIMILARS:
569
+ remaining_run_ids = [run_id for run_id in run_ids if run_id not in result]
570
+ additional_needed = min(MAX_SIMILARS - len(result), len(remaining_run_ids))
571
+
572
+ if additional_needed > 0:
573
+ additional_run_ids = remaining_run_ids[:additional_needed]
574
+
575
+ additional_test_runs = {}
576
+ for run_id in additional_run_ids:
577
+ try:
578
+ test_run = SCTTestRun.get(id=run_id)
579
+ additional_test_runs[run_id] = test_run
580
+ except Exception as e:
581
+ LOGGER.debug(f"Failed to fetch additional test run {run_id}: {str(e)}")
582
+
583
+ for run_id in additional_run_ids:
584
+ try:
585
+ test_run = additional_test_runs.get(run_id)
586
+ if not test_run:
587
+ continue
588
+
589
+ try:
590
+ build_number = int(
591
+ test_run.build_job_url[:-1].split("/")[-1]
592
+ )
593
+ except Exception as e:
594
+ LOGGER.error(
595
+ f"Error parsing build number for run {run_id}: {test_run.build_job_url[:-1].split('/')} - {str(e)}")
596
+ build_number = -1
597
+
598
+ for pkg_name in ["scylla-server-upgraded", "scylla-server", "scylla-server-target"]:
599
+ sut_version = next(
600
+ (f"{pkg.version}-{pkg.date}" for pkg in test_run.packages if pkg.name == pkg_name), None)
601
+ if sut_version:
602
+ break
603
+
604
+ result[run_id] = {
605
+ "build_id": f"{test_run.build_id}#{build_number}",
606
+ "start_time": test_run.start_time.isoformat(),
607
+ "version": sut_version or "unknown",
608
+ "issues": [],
609
+ }
610
+ except Exception as e:
611
+ LOGGER.error(f"Error fetching info for run {run_id}: {str(e)}")
612
+ result[run_id] = {"build_id": None, "start_time": None, "issues": []}
613
+
614
+ return result
615
+
616
+ @staticmethod
617
+ def get_scylla_version_kernels_report(release_name: str):
618
+ all_release_runs = SCTTestRun.get_version_data_for_release(release_name=release_name)
619
+ kernels_by_version = {}
620
+ kernel_metadata = {}
621
+ for run in all_release_runs:
622
+ packages = run["packages"]
623
+ if not packages:
624
+ continue
625
+ scylla_pkgs = {p["name"]: p for p in packages if "scylla-server" in p["name"]}
626
+ scylla_pkg = scylla_pkgs["scylla-server-upgraded"] if scylla_pkgs.get(
627
+ "scylla-server-upgraded") else scylla_pkgs.get("scylla-server")
628
+ version = f"{scylla_pkg['version']}-{scylla_pkg['date']}.{scylla_pkg['revision_id']}" if scylla_pkgs else "unknown"
629
+ kernel_packages = [p for p in packages if "kernel" in p["name"]]
630
+ kernel_package = kernel_packages[0] if len(kernel_packages) > 0 else None
631
+ if not kernel_package:
632
+ continue
633
+ version_list = set(kernels_by_version.get(version, []))
634
+ version_list.add(kernel_package["version"])
635
+ kernels_by_version[version] = list(version_list)
636
+ metadata = kernel_metadata.get(
637
+ kernel_package.version,
638
+ {
639
+ "passed": 0,
640
+ "failed": 0,
641
+ "aborted": 0,
642
+ }
643
+ )
644
+ if run["status"] in ["passed", "failed", "aborted", "test_error"]:
645
+ metadata[run["status"]] += 1
646
+ kernel_metadata[kernel_package["version"]] = metadata
647
+
648
+ return {
649
+ "versions": kernels_by_version,
650
+ "metadata": kernel_metadata
651
+ }
652
+
653
+ @staticmethod
654
+ def junit_submit(run_id: str, file_name: str, content: str) -> bool:
655
+ try:
656
+ report = SCTJunitReports.get(test_id=run_id, file_name=file_name)
657
+ if report:
658
+ raise SCTServiceException(f"Report {file_name} already exists.", file_name)
659
+ except SCTJunitReports.DoesNotExist:
660
+ pass
661
+ report = SCTJunitReports()
662
+ report.test_id = run_id
663
+ report.file_name = file_name
664
+
665
+ xml_content = str(base64.decodebytes(bytes(content, encoding="utf-8")), encoding="utf-8")
666
+ try:
667
+ _ = ElementTree.fromstring(xml_content)
668
+ except Exception:
669
+ raise SCTServiceException(f"Malformed JUnit report submitted")
670
+
671
+ report.report = xml_content
672
+ report.save()
673
+
674
+ return True
675
+
676
+ @staticmethod
677
+ def junit_get_all(run_id: str) -> list[SCTJunitReports]:
678
+ return list(SCTJunitReports.filter(test_id=run_id).all())
679
+
680
+ @staticmethod
681
+ def junit_get_single(run_id: str, file_name: str) -> SCTJunitReports:
682
+ return SCTJunitReports.get(test_id=run_id, file_name=file_name)