argus-alm 0.14.2__py3-none-any.whl → 0.15.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- argus/_version.py +21 -0
- argus/backend/.gitkeep +0 -0
- argus/backend/__init__.py +0 -0
- argus/backend/cli.py +57 -0
- argus/backend/controller/__init__.py +0 -0
- argus/backend/controller/admin.py +20 -0
- argus/backend/controller/admin_api.py +355 -0
- argus/backend/controller/api.py +589 -0
- argus/backend/controller/auth.py +67 -0
- argus/backend/controller/client_api.py +109 -0
- argus/backend/controller/main.py +316 -0
- argus/backend/controller/notification_api.py +72 -0
- argus/backend/controller/notifications.py +13 -0
- argus/backend/controller/planner_api.py +194 -0
- argus/backend/controller/team.py +129 -0
- argus/backend/controller/team_ui.py +19 -0
- argus/backend/controller/testrun_api.py +513 -0
- argus/backend/controller/view_api.py +188 -0
- argus/backend/controller/views_widgets/__init__.py +0 -0
- argus/backend/controller/views_widgets/graphed_stats.py +54 -0
- argus/backend/controller/views_widgets/graphs.py +68 -0
- argus/backend/controller/views_widgets/highlights.py +135 -0
- argus/backend/controller/views_widgets/nemesis_stats.py +26 -0
- argus/backend/controller/views_widgets/summary.py +43 -0
- argus/backend/db.py +98 -0
- argus/backend/error_handlers.py +41 -0
- argus/backend/events/event_processors.py +34 -0
- argus/backend/models/__init__.py +0 -0
- argus/backend/models/argus_ai.py +24 -0
- argus/backend/models/github_issue.py +60 -0
- argus/backend/models/plan.py +24 -0
- argus/backend/models/result.py +187 -0
- argus/backend/models/runtime_store.py +58 -0
- argus/backend/models/view_widgets.py +25 -0
- argus/backend/models/web.py +403 -0
- argus/backend/plugins/__init__.py +0 -0
- argus/backend/plugins/core.py +248 -0
- argus/backend/plugins/driver_matrix_tests/controller.py +66 -0
- argus/backend/plugins/driver_matrix_tests/model.py +429 -0
- argus/backend/plugins/driver_matrix_tests/plugin.py +21 -0
- argus/backend/plugins/driver_matrix_tests/raw_types.py +62 -0
- argus/backend/plugins/driver_matrix_tests/service.py +61 -0
- argus/backend/plugins/driver_matrix_tests/udt.py +42 -0
- argus/backend/plugins/generic/model.py +86 -0
- argus/backend/plugins/generic/plugin.py +15 -0
- argus/backend/plugins/generic/types.py +14 -0
- argus/backend/plugins/loader.py +39 -0
- argus/backend/plugins/sct/controller.py +224 -0
- argus/backend/plugins/sct/plugin.py +37 -0
- argus/backend/plugins/sct/resource_setup.py +177 -0
- argus/backend/plugins/sct/service.py +682 -0
- argus/backend/plugins/sct/testrun.py +288 -0
- argus/backend/plugins/sct/udt.py +100 -0
- argus/backend/plugins/sirenada/model.py +118 -0
- argus/backend/plugins/sirenada/plugin.py +16 -0
- argus/backend/service/admin.py +26 -0
- argus/backend/service/argus_service.py +696 -0
- argus/backend/service/build_system_monitor.py +185 -0
- argus/backend/service/client_service.py +127 -0
- argus/backend/service/event_service.py +18 -0
- argus/backend/service/github_service.py +233 -0
- argus/backend/service/jenkins_service.py +269 -0
- argus/backend/service/notification_manager.py +159 -0
- argus/backend/service/planner_service.py +608 -0
- argus/backend/service/release_manager.py +229 -0
- argus/backend/service/results_service.py +690 -0
- argus/backend/service/stats.py +610 -0
- argus/backend/service/team_manager_service.py +82 -0
- argus/backend/service/test_lookup.py +172 -0
- argus/backend/service/testrun.py +489 -0
- argus/backend/service/user.py +308 -0
- argus/backend/service/views.py +219 -0
- argus/backend/service/views_widgets/__init__.py +0 -0
- argus/backend/service/views_widgets/graphed_stats.py +180 -0
- argus/backend/service/views_widgets/highlights.py +374 -0
- argus/backend/service/views_widgets/nemesis_stats.py +34 -0
- argus/backend/template_filters.py +27 -0
- argus/backend/tests/__init__.py +0 -0
- argus/backend/tests/client_service/__init__.py +0 -0
- argus/backend/tests/client_service/test_submit_results.py +79 -0
- argus/backend/tests/conftest.py +180 -0
- argus/backend/tests/results_service/__init__.py +0 -0
- argus/backend/tests/results_service/test_best_results.py +178 -0
- argus/backend/tests/results_service/test_cell.py +65 -0
- argus/backend/tests/results_service/test_chartjs_additional_functions.py +259 -0
- argus/backend/tests/results_service/test_create_chartjs.py +220 -0
- argus/backend/tests/results_service/test_result_metadata.py +100 -0
- argus/backend/tests/results_service/test_results_service.py +203 -0
- argus/backend/tests/results_service/test_validation_rules.py +213 -0
- argus/backend/tests/view_widgets/__init__.py +0 -0
- argus/backend/tests/view_widgets/test_highlights_api.py +532 -0
- argus/backend/util/common.py +65 -0
- argus/backend/util/config.py +38 -0
- argus/backend/util/encoders.py +56 -0
- argus/backend/util/logsetup.py +80 -0
- argus/backend/util/module_loaders.py +30 -0
- argus/backend/util/send_email.py +91 -0
- argus/client/base.py +1 -3
- argus/client/driver_matrix_tests/cli.py +17 -8
- argus/client/generic/cli.py +4 -2
- argus/client/generic/client.py +1 -0
- argus/client/generic_result.py +48 -9
- argus/client/sct/client.py +1 -3
- argus/client/sirenada/client.py +4 -1
- argus/client/tests/__init__.py +0 -0
- argus/client/tests/conftest.py +19 -0
- argus/client/tests/test_package.py +45 -0
- argus/client/tests/test_results.py +224 -0
- argus/common/sct_types.py +3 -0
- argus/common/sirenada_types.py +1 -1
- {argus_alm-0.14.2.dist-info → argus_alm-0.15.2.dist-info}/METADATA +43 -19
- argus_alm-0.15.2.dist-info/RECORD +122 -0
- {argus_alm-0.14.2.dist-info → argus_alm-0.15.2.dist-info}/WHEEL +2 -1
- argus_alm-0.15.2.dist-info/entry_points.txt +3 -0
- argus_alm-0.15.2.dist-info/top_level.txt +1 -0
- argus_alm-0.14.2.dist-info/RECORD +0 -20
- argus_alm-0.14.2.dist-info/entry_points.txt +0 -4
- {argus_alm-0.14.2.dist-info → argus_alm-0.15.2.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,682 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
import logging
|
|
5
|
+
import math
|
|
6
|
+
import re
|
|
7
|
+
from time import time
|
|
8
|
+
from xml.etree import ElementTree
|
|
9
|
+
from flask import g
|
|
10
|
+
from argus.backend.models.github_issue import GithubIssue, IssueLink
|
|
11
|
+
from argus.backend.models.web import ArgusEventTypes, ErrorEventEmbeddings, CriticalEventEmbeddings
|
|
12
|
+
from argus.backend.plugins.sct.testrun import SCTJunitReports, SCTTestRun, SubtestType
|
|
13
|
+
from argus.common.sct_types import GeminiResultsRequest, PerformanceResultsRequest, ResourceUpdateRequest
|
|
14
|
+
from argus.backend.plugins.sct.udt import (
|
|
15
|
+
CloudInstanceDetails,
|
|
16
|
+
CloudResource,
|
|
17
|
+
EventsBySeverity,
|
|
18
|
+
NemesisRunInfo,
|
|
19
|
+
NodeDescription,
|
|
20
|
+
PackageVersion,
|
|
21
|
+
PerformanceHDRHistogram,
|
|
22
|
+
)
|
|
23
|
+
from argus.backend.service.event_service import EventService
|
|
24
|
+
from argus.backend.util.common import chunk, get_build_number
|
|
25
|
+
from argus.common.enums import NemesisStatus, ResourceState, TestStatus
|
|
26
|
+
|
|
27
|
+
LOGGER = logging.getLogger(__name__)
|
|
28
|
+
MAX_SIMILARS = 20
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class SCTServiceException(Exception):
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(init=True, repr=True)
|
|
36
|
+
class NemesisSubmissionRequest:
|
|
37
|
+
name: str
|
|
38
|
+
class_name: str
|
|
39
|
+
start_time: int
|
|
40
|
+
node_name: str
|
|
41
|
+
node_ip: str
|
|
42
|
+
node_shards: int
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass(init=True, repr=True)
|
|
46
|
+
class NemesisFinalizationRequest:
|
|
47
|
+
name: str
|
|
48
|
+
start_time: int
|
|
49
|
+
status: str
|
|
50
|
+
message: str
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass(init=True, repr=True)
|
|
54
|
+
class EventSubmissionRequest:
|
|
55
|
+
severity: str
|
|
56
|
+
total_events: int
|
|
57
|
+
messages: list[str]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class SCTService:
|
|
61
|
+
|
|
62
|
+
@staticmethod
|
|
63
|
+
def submit_packages(run_id: str, packages: list[dict]) -> str:
|
|
64
|
+
try:
|
|
65
|
+
run: SCTTestRun = SCTTestRun.get(id=run_id)
|
|
66
|
+
for package_dict in packages:
|
|
67
|
+
package = PackageVersion(**package_dict)
|
|
68
|
+
if "target" in package.name:
|
|
69
|
+
SCTService.process_target_version(run, package)
|
|
70
|
+
if package not in run.packages:
|
|
71
|
+
run.packages.append(package)
|
|
72
|
+
run.save()
|
|
73
|
+
except SCTTestRun.DoesNotExist as exception:
|
|
74
|
+
LOGGER.error("Run %s not found for SCTTestRun", run_id)
|
|
75
|
+
raise SCTServiceException("Run not found", run_id) from exception
|
|
76
|
+
|
|
77
|
+
return "added"
|
|
78
|
+
|
|
79
|
+
@staticmethod
|
|
80
|
+
def process_target_version(run: SCTTestRun, package: PackageVersion):
|
|
81
|
+
if run.version_source and "upgrade-target" in run.version_source and package.name == "scylla-server-target":
|
|
82
|
+
return
|
|
83
|
+
run.version_source = package.name
|
|
84
|
+
run.scylla_version = package.version
|
|
85
|
+
|
|
86
|
+
@staticmethod
|
|
87
|
+
def set_sct_runner(run_id: str, public_ip: str, private_ip: str, region: str, backend: str, name: str = None):
|
|
88
|
+
try:
|
|
89
|
+
run: SCTTestRun = SCTTestRun.get(id=run_id)
|
|
90
|
+
details = CloudInstanceDetails(
|
|
91
|
+
public_ip=public_ip,
|
|
92
|
+
private_ip=private_ip,
|
|
93
|
+
provider=backend,
|
|
94
|
+
region=region,
|
|
95
|
+
)
|
|
96
|
+
run.sct_runner_host = details
|
|
97
|
+
resource = CloudResource(name=name or "sct-runner", resource_type="sct-runner", instance_info=details)
|
|
98
|
+
run.allocated_resources.append(resource)
|
|
99
|
+
run.save()
|
|
100
|
+
except SCTTestRun.DoesNotExist as exception:
|
|
101
|
+
LOGGER.error("Run %s not found for SCTTestRun", run_id)
|
|
102
|
+
raise SCTServiceException("Run not found", run_id) from exception
|
|
103
|
+
|
|
104
|
+
return "updated"
|
|
105
|
+
|
|
106
|
+
@staticmethod
|
|
107
|
+
def submit_screenshots(run_id: str, screenshot_links: list[str]) -> str:
|
|
108
|
+
try:
|
|
109
|
+
run: SCTTestRun = SCTTestRun.get(id=run_id)
|
|
110
|
+
for link in screenshot_links:
|
|
111
|
+
run.add_screenshot(link)
|
|
112
|
+
run.save()
|
|
113
|
+
except SCTTestRun.DoesNotExist as exception:
|
|
114
|
+
LOGGER.error("Run %s not found for SCTTestRun", run_id)
|
|
115
|
+
raise SCTServiceException("Run not found", run_id) from exception
|
|
116
|
+
|
|
117
|
+
return "submitted"
|
|
118
|
+
|
|
119
|
+
@staticmethod
|
|
120
|
+
def submit_gemini_results(run_id: str, gemini_data: GeminiResultsRequest) -> str:
|
|
121
|
+
try:
|
|
122
|
+
run: SCTTestRun = SCTTestRun.get(id=run_id)
|
|
123
|
+
run.subtest_name = SubtestType.GEMINI.value
|
|
124
|
+
run.oracle_nodes_count = gemini_data.get("oracle_nodes_count")
|
|
125
|
+
run.oracle_node_ami_id = gemini_data.get("oracle_node_ami_id")
|
|
126
|
+
run.oracle_node_instance_type = gemini_data.get("oracle_node_instance_type")
|
|
127
|
+
run.oracle_node_scylla_version = gemini_data.get("oracle_node_scylla_version")
|
|
128
|
+
run.gemini_command = gemini_data.get("gemini_command")
|
|
129
|
+
run.gemini_version = gemini_data.get("gemini_version")
|
|
130
|
+
run.gemini_status = gemini_data.get("gemini_status")
|
|
131
|
+
run.gemini_seed = str(gemini_data.get("gemini_seed"))
|
|
132
|
+
run.gemini_write_ops = gemini_data.get("gemini_write_ops")
|
|
133
|
+
run.gemini_write_errors = gemini_data.get("gemini_write_errors")
|
|
134
|
+
run.gemini_read_ops = gemini_data.get("gemini_read_ops")
|
|
135
|
+
run.gemini_read_errors = gemini_data.get("gemini_read_errors")
|
|
136
|
+
run.save()
|
|
137
|
+
|
|
138
|
+
if run.gemini_status != "PASSED":
|
|
139
|
+
run.status = TestStatus.FAILED
|
|
140
|
+
EventService.create_run_event(kind=ArgusEventTypes.TestRunStatusChanged, body={
|
|
141
|
+
"message": "[{username}] Setting run status to {status} due to Gemini reporting following status: {gemini_status}",
|
|
142
|
+
"username": g.user.username,
|
|
143
|
+
"status": TestStatus.FAILED.value,
|
|
144
|
+
"gemini_status": run.gemini_status,
|
|
145
|
+
}, user_id=g.user.id, run_id=run_id, release_id=run.release_id, test_id=run.test_id)
|
|
146
|
+
run.save()
|
|
147
|
+
except SCTTestRun.DoesNotExist as exception:
|
|
148
|
+
LOGGER.error("Run %s not found for SCTTestRun", run_id)
|
|
149
|
+
raise SCTServiceException("Run not found", run_id) from exception
|
|
150
|
+
|
|
151
|
+
return "submitted"
|
|
152
|
+
|
|
153
|
+
@staticmethod
|
|
154
|
+
def submit_performance_results(run_id: str, performance_results: PerformanceResultsRequest):
|
|
155
|
+
try:
|
|
156
|
+
run: SCTTestRun = SCTTestRun.get(id=run_id)
|
|
157
|
+
run.subtest_name = SubtestType.PERFORMANCE.value
|
|
158
|
+
run.perf_op_rate_average = performance_results.get("perf_op_rate_average")
|
|
159
|
+
run.perf_op_rate_total = performance_results.get("perf_op_rate_total")
|
|
160
|
+
run.perf_avg_latency_99th = performance_results.get("perf_avg_latency_99th")
|
|
161
|
+
run.perf_avg_latency_mean = performance_results.get("perf_avg_latency_mean")
|
|
162
|
+
run.perf_total_errors = performance_results.get("perf_total_errors")
|
|
163
|
+
run.stress_cmd = performance_results.get("stress_cmd")
|
|
164
|
+
run.test_name = performance_results.get("test_name")
|
|
165
|
+
run.save()
|
|
166
|
+
|
|
167
|
+
is_latency_test = "latency" in run.test_name
|
|
168
|
+
threshold_negative = -10
|
|
169
|
+
|
|
170
|
+
def cmp(lhs, rhs):
|
|
171
|
+
delta = rhs - lhs
|
|
172
|
+
change = int(math.fabs(delta) * 100 / rhs)
|
|
173
|
+
return change if delta >= 0 else change * -1
|
|
174
|
+
|
|
175
|
+
previous_runs = SCTTestRun.get_perf_results_for_test_name(run.build_id, run.start_time, run.test_name)
|
|
176
|
+
metrics_to_check = ["perf_avg_latency_99th",
|
|
177
|
+
"perf_avg_latency_mean"] if is_latency_test else ["perf_op_rate_total"]
|
|
178
|
+
|
|
179
|
+
older_runs_by_version = {}
|
|
180
|
+
for prev_run in previous_runs:
|
|
181
|
+
if not older_runs_by_version.get(prev_run["scylla_version"]):
|
|
182
|
+
older_runs_by_version[prev_run["scylla_version"]] = []
|
|
183
|
+
older_runs_by_version[prev_run["scylla_version"]].append(prev_run)
|
|
184
|
+
|
|
185
|
+
regression_found = False
|
|
186
|
+
regression_info = {
|
|
187
|
+
"version": None,
|
|
188
|
+
"delta": None,
|
|
189
|
+
"id": None,
|
|
190
|
+
"metric": None,
|
|
191
|
+
"job_url": None,
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
if performance_results["histograms"]:
|
|
195
|
+
for histogram in performance_results["histograms"]:
|
|
196
|
+
run.histograms = {k: PerformanceHDRHistogram(**v) for k, v in histogram.items()}
|
|
197
|
+
|
|
198
|
+
for version, runs in older_runs_by_version.items():
|
|
199
|
+
for metric in metrics_to_check:
|
|
200
|
+
best_run = sorted(runs, reverse=(not is_latency_test), key=lambda v: v[metric])[0]
|
|
201
|
+
last_run = runs[0]
|
|
202
|
+
|
|
203
|
+
metric_to_best = cmp(run[metric], best_run[metric])
|
|
204
|
+
metric_to_last = cmp(run[metric], last_run[metric])
|
|
205
|
+
if metric_to_last < threshold_negative:
|
|
206
|
+
regression_found = True
|
|
207
|
+
regression_info["metric"] = metric
|
|
208
|
+
regression_info["version"] = version
|
|
209
|
+
regression_info["job_url"] = last_run["build_job_url"]
|
|
210
|
+
regression_info["id"] = str(last_run["id"])
|
|
211
|
+
regression_info["delta"] = metric_to_last
|
|
212
|
+
break
|
|
213
|
+
|
|
214
|
+
if metric_to_best < threshold_negative:
|
|
215
|
+
regression_found = True
|
|
216
|
+
regression_info["metric"] = metric
|
|
217
|
+
regression_info["version"] = version
|
|
218
|
+
regression_info["job_url"] = best_run["build_job_url"]
|
|
219
|
+
regression_info["id"] = str(best_run["id"])
|
|
220
|
+
regression_info["delta"] = metric_to_best
|
|
221
|
+
break
|
|
222
|
+
|
|
223
|
+
if regression_found:
|
|
224
|
+
break
|
|
225
|
+
|
|
226
|
+
if regression_found:
|
|
227
|
+
run.status = TestStatus.FAILED.value
|
|
228
|
+
run.save()
|
|
229
|
+
EventService.create_run_event(kind=ArgusEventTypes.TestRunStatusChanged, body={
|
|
230
|
+
"message": "[{username}] Setting run status to {status} due to performance metric '{metric}' falling "
|
|
231
|
+
"below allowed threshold ({threshold_negative}): {delta}% compared to "
|
|
232
|
+
"<a href='/test/{test_id}/runs?additionalRuns[]={base_run_id}&additionalRuns[]={previous_run_id}'>This {version} (#{build_number}) run</a>",
|
|
233
|
+
"username": g.user.username,
|
|
234
|
+
"status": TestStatus.FAILED.value,
|
|
235
|
+
"metric": regression_info["metric"],
|
|
236
|
+
"threshold_negative": threshold_negative,
|
|
237
|
+
"delta": regression_info["delta"],
|
|
238
|
+
"test_id": str(run.test_id),
|
|
239
|
+
"base_run_id": str(run.id),
|
|
240
|
+
"previous_run_id": regression_info["id"],
|
|
241
|
+
"version": regression_info["version"],
|
|
242
|
+
"build_number": get_build_number(regression_info["job_url"])
|
|
243
|
+
}, user_id=g.user.id, run_id=run_id, release_id=run.release_id, test_id=run.test_id)
|
|
244
|
+
else:
|
|
245
|
+
# NOTE: This will override status set by SCT Events.
|
|
246
|
+
run.status = TestStatus.PASSED.value
|
|
247
|
+
run.save()
|
|
248
|
+
|
|
249
|
+
except SCTTestRun.DoesNotExist as exception:
|
|
250
|
+
LOGGER.error("Run %s not found for SCTTestRun", run_id)
|
|
251
|
+
raise SCTServiceException("Run not found", run_id) from exception
|
|
252
|
+
|
|
253
|
+
return "submitted"
|
|
254
|
+
|
|
255
|
+
@staticmethod
|
|
256
|
+
def get_performance_history_for_test(run_id: str):
|
|
257
|
+
try:
|
|
258
|
+
run: SCTTestRun = SCTTestRun.get(id=run_id)
|
|
259
|
+
rows = run.get_perf_results_for_test_name(
|
|
260
|
+
build_id=run.build_id, start_time=run.start_time, test_name=run.test_name)
|
|
261
|
+
return rows
|
|
262
|
+
except SCTTestRun.DoesNotExist as exception:
|
|
263
|
+
LOGGER.error("Run %s not found for SCTTestRun", run_id)
|
|
264
|
+
raise SCTServiceException("Run not found", run_id) from exception
|
|
265
|
+
|
|
266
|
+
@staticmethod
|
|
267
|
+
def create_resource(run_id: str, resource_details: dict) -> str:
|
|
268
|
+
instance_details = CloudInstanceDetails(**resource_details.pop("instance_details"))
|
|
269
|
+
resource = CloudResource(**resource_details, instance_info=instance_details)
|
|
270
|
+
try:
|
|
271
|
+
run: SCTTestRun = SCTTestRun.get(id=run_id)
|
|
272
|
+
run.get_resources().append(resource)
|
|
273
|
+
run.save()
|
|
274
|
+
except SCTTestRun.DoesNotExist as exception:
|
|
275
|
+
LOGGER.error("Run %s not found for SCTTestRun", run_id)
|
|
276
|
+
raise SCTServiceException("Run not found", run_id) from exception
|
|
277
|
+
|
|
278
|
+
return "created"
|
|
279
|
+
|
|
280
|
+
@staticmethod
|
|
281
|
+
def update_resource_shards(run_id: str, resource_name: str, new_shards: int) -> str:
|
|
282
|
+
try:
|
|
283
|
+
run: SCTTestRun = SCTTestRun.get(id=run_id)
|
|
284
|
+
resource = next(res for res in run.get_resources() if res.name == resource_name)
|
|
285
|
+
resource.get_instance_info().shards_amount = new_shards
|
|
286
|
+
run.save()
|
|
287
|
+
except StopIteration as exception:
|
|
288
|
+
LOGGER.error("Resource %s not found in run %s", resource_name, run_id)
|
|
289
|
+
raise SCTServiceException("Resource not found", resource_name) from exception
|
|
290
|
+
except SCTTestRun.DoesNotExist as exception:
|
|
291
|
+
LOGGER.error("Run %s not found for SCTTestRun", run_id)
|
|
292
|
+
raise SCTServiceException("Run not found", run_id) from exception
|
|
293
|
+
|
|
294
|
+
return "updated"
|
|
295
|
+
|
|
296
|
+
@staticmethod
|
|
297
|
+
def update_resource(run_id: str, resource_name: str, update_data: ResourceUpdateRequest) -> str:
|
|
298
|
+
try:
|
|
299
|
+
fields_updated = {}
|
|
300
|
+
run: SCTTestRun = SCTTestRun.get(id=run_id)
|
|
301
|
+
resource = next(res for res in run.get_resources() if res.name == resource_name)
|
|
302
|
+
instance_info = update_data.pop("instance_info", None)
|
|
303
|
+
resource.state = ResourceState(update_data.get("state", resource.state)).value
|
|
304
|
+
if instance_info:
|
|
305
|
+
resource_instance_info = resource.get_instance_info()
|
|
306
|
+
for k, v in instance_info.items():
|
|
307
|
+
if k in resource_instance_info.keys():
|
|
308
|
+
resource_instance_info[k] = v
|
|
309
|
+
fields_updated[k] = v
|
|
310
|
+
run.save()
|
|
311
|
+
except StopIteration as exception:
|
|
312
|
+
LOGGER.error("Resource %s not found in run %s", resource_name, run_id)
|
|
313
|
+
raise SCTServiceException("Resource not found", resource_name) from exception
|
|
314
|
+
except SCTTestRun.DoesNotExist as exception:
|
|
315
|
+
LOGGER.error("Run %s not found for SCTTestRun", run_id)
|
|
316
|
+
raise SCTServiceException("Run not found", run_id) from exception
|
|
317
|
+
|
|
318
|
+
return {
|
|
319
|
+
"state": "updated",
|
|
320
|
+
"fields": fields_updated
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
@staticmethod
|
|
324
|
+
def terminate_resource(run_id: str, resource_name: str, reason: str) -> str:
|
|
325
|
+
try:
|
|
326
|
+
run: SCTTestRun = SCTTestRun.get(id=run_id)
|
|
327
|
+
if "sct-runner" in resource_name: # FIXME: Temp solution until sct-runner name is propagated on submit
|
|
328
|
+
resource = next(res for res in run.get_resources() if "sct-runner" in res.name)
|
|
329
|
+
else:
|
|
330
|
+
resource = next(res for res in run.get_resources() if res.name == resource_name)
|
|
331
|
+
resource.get_instance_info().termination_reason = reason
|
|
332
|
+
resource.get_instance_info().termination_time = int(time())
|
|
333
|
+
resource.state = ResourceState.TERMINATED.value
|
|
334
|
+
run.save()
|
|
335
|
+
except StopIteration as exception:
|
|
336
|
+
LOGGER.error("Resource %s not found in run %s", resource_name, run_id)
|
|
337
|
+
raise SCTServiceException("Resource not found", resource_name) from exception
|
|
338
|
+
except SCTTestRun.DoesNotExist as exception:
|
|
339
|
+
LOGGER.error("Run %s not found for SCTTestRun", run_id)
|
|
340
|
+
raise SCTServiceException("Run not found", run_id) from exception
|
|
341
|
+
|
|
342
|
+
return "terminated"
|
|
343
|
+
|
|
344
|
+
@staticmethod
|
|
345
|
+
def submit_nemesis(run_id: str, nemesis_details: dict) -> str:
|
|
346
|
+
nem_req = NemesisSubmissionRequest(**nemesis_details)
|
|
347
|
+
node_desc = NodeDescription(name=nem_req.node_name, ip=nem_req.node_ip, shards=nem_req.node_shards)
|
|
348
|
+
nemesis_info = NemesisRunInfo(
|
|
349
|
+
class_name=nem_req.class_name,
|
|
350
|
+
name=nem_req.name,
|
|
351
|
+
start_time=int(nem_req.start_time),
|
|
352
|
+
end_time=0,
|
|
353
|
+
duration=0,
|
|
354
|
+
stack_trace="",
|
|
355
|
+
status=NemesisStatus.RUNNING.value,
|
|
356
|
+
target_node=node_desc,
|
|
357
|
+
)
|
|
358
|
+
try:
|
|
359
|
+
run: SCTTestRun = SCTTestRun.get(id=run_id)
|
|
360
|
+
run.add_nemesis(nemesis_info)
|
|
361
|
+
run.save()
|
|
362
|
+
except SCTTestRun.DoesNotExist as exception:
|
|
363
|
+
LOGGER.error("Run %s not found for SCTTestRun", run_id)
|
|
364
|
+
raise SCTServiceException("Run not found", run_id) from exception
|
|
365
|
+
|
|
366
|
+
return "created"
|
|
367
|
+
|
|
368
|
+
@staticmethod
|
|
369
|
+
def finalize_nemesis(run_id: str, nemesis_details: dict) -> str:
|
|
370
|
+
nem_req = NemesisFinalizationRequest(**nemesis_details)
|
|
371
|
+
try:
|
|
372
|
+
run: SCTTestRun = SCTTestRun.get(id=run_id)
|
|
373
|
+
nemesis = next(nem for nem in run.get_nemeses() if nem.name ==
|
|
374
|
+
nem_req.name and nem.start_time == nem_req.start_time)
|
|
375
|
+
nemesis.status = NemesisStatus(nem_req.status).value
|
|
376
|
+
nemesis.stack_trace = nem_req.message
|
|
377
|
+
nemesis.end_time = int(time())
|
|
378
|
+
run.save()
|
|
379
|
+
except StopIteration as exception:
|
|
380
|
+
LOGGER.error("Nemesis %s (%s) not found for run %s", nem_req.name, nem_req.start_time, run_id)
|
|
381
|
+
raise SCTServiceException("Nemesis not found", (nem_req.name, nem_req.start_time)) from exception
|
|
382
|
+
except SCTTestRun.DoesNotExist as exception:
|
|
383
|
+
LOGGER.error("Run %s not found for SCTTestRun", run_id)
|
|
384
|
+
raise SCTServiceException("Run not found", run_id) from exception
|
|
385
|
+
|
|
386
|
+
return "updated"
|
|
387
|
+
|
|
388
|
+
@staticmethod
|
|
389
|
+
def submit_events(run_id: str, events: list[dict]) -> str:
|
|
390
|
+
wrapped_events = [EventSubmissionRequest(**ev) for ev in events]
|
|
391
|
+
try:
|
|
392
|
+
run: SCTTestRun = SCTTestRun.get(id=run_id)
|
|
393
|
+
for event in wrapped_events:
|
|
394
|
+
wrapper = EventsBySeverity(severity=event.severity,
|
|
395
|
+
event_amount=event.total_events, last_events=event.messages)
|
|
396
|
+
run.get_events().append(wrapper)
|
|
397
|
+
coredumps = SCTService.locate_coredumps(run, run.get_events())
|
|
398
|
+
run.submit_logs(coredumps)
|
|
399
|
+
run.save()
|
|
400
|
+
except SCTTestRun.DoesNotExist as exception:
|
|
401
|
+
LOGGER.error("Run %s not found for SCTTestRun", run_id)
|
|
402
|
+
raise SCTServiceException("Run not found", run_id) from exception
|
|
403
|
+
|
|
404
|
+
return "added"
|
|
405
|
+
|
|
406
|
+
@staticmethod
|
|
407
|
+
def locate_coredumps(run: SCTTestRun, events: list[EventsBySeverity]) -> list[dict]:
|
|
408
|
+
flat_messages: list[str] = []
|
|
409
|
+
links = []
|
|
410
|
+
for es in events:
|
|
411
|
+
flat_messages.extend(es.last_events)
|
|
412
|
+
coredump_events = filter(lambda v: "coredumpevent" in v.lower(), flat_messages)
|
|
413
|
+
for idx, event in enumerate(coredump_events):
|
|
414
|
+
core_pattern = r"corefile_url=(?P<url>.+)$"
|
|
415
|
+
ts_pattern = r"^(?P<ts>\d{4}-\d{2}-\d{2} ([\d:]*)\.\d{3})"
|
|
416
|
+
node_name_pattern = r"node=(?P<name>.+)$"
|
|
417
|
+
core_url_match = re.search(core_pattern, event, re.MULTILINE)
|
|
418
|
+
node_name_match = re.search(node_name_pattern, event, re.MULTILINE)
|
|
419
|
+
ts_match = re.search(ts_pattern, event)
|
|
420
|
+
if core_url_match:
|
|
421
|
+
node_name = node_name_match.group("name") if node_name_match else f"unknown-node-{idx}"
|
|
422
|
+
split_name = node_name.split(" ")
|
|
423
|
+
node_name = split_name[1] if len(split_name) >= 2 else node_name
|
|
424
|
+
url = core_url_match.group("url")
|
|
425
|
+
timestamp_component = ""
|
|
426
|
+
if ts_match:
|
|
427
|
+
try:
|
|
428
|
+
timestamp = datetime.fromisoformat(ts_match.group("ts"))
|
|
429
|
+
timestamp_component = timestamp.strftime("-%Y-%m-%d_%H-%M-%S")
|
|
430
|
+
except ValueError:
|
|
431
|
+
pass
|
|
432
|
+
log_link = {
|
|
433
|
+
"log_name": f"core.scylla-{node_name}{timestamp_component}.gz",
|
|
434
|
+
"log_link": url
|
|
435
|
+
}
|
|
436
|
+
links.append(log_link)
|
|
437
|
+
return links
|
|
438
|
+
|
|
439
|
+
@staticmethod
|
|
440
|
+
def get_similar_events(run_id: str) -> list[dict]:
|
|
441
|
+
"""Get similar events for each event in a test run
|
|
442
|
+
|
|
443
|
+
Args:
|
|
444
|
+
run_id: The test run ID to get similar events for
|
|
445
|
+
|
|
446
|
+
Returns:
|
|
447
|
+
List of dictionaries containing event_index, severity and similars_set for each event
|
|
448
|
+
"""
|
|
449
|
+
error_embeddings = ErrorEventEmbeddings.filter(run_id=run_id).only(["event_index", "similars_map"]).all()
|
|
450
|
+
critical_embeddings = CriticalEventEmbeddings.filter(run_id=run_id).only(["event_index", "similars_map"]).all()
|
|
451
|
+
|
|
452
|
+
result = []
|
|
453
|
+
# Process ERROR embeddings
|
|
454
|
+
for embedding in error_embeddings:
|
|
455
|
+
result.append(
|
|
456
|
+
{
|
|
457
|
+
"event_index": embedding.event_index,
|
|
458
|
+
"severity": "ERROR",
|
|
459
|
+
"similars_set": [str(similar_run_id) for similar_run_id in embedding.similars_map],
|
|
460
|
+
}
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
# Process CRITICAL embeddings
|
|
464
|
+
for embedding in critical_embeddings:
|
|
465
|
+
result.append(
|
|
466
|
+
{
|
|
467
|
+
"event_index": embedding.event_index,
|
|
468
|
+
"severity": "CRITICAL",
|
|
469
|
+
"similars_set": [str(similar_run_id) for similar_run_id in embedding.similars_map],
|
|
470
|
+
}
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
return result
|
|
474
|
+
|
|
475
|
+
@staticmethod
|
|
476
|
+
def get_similar_runs_info(run_ids: list[str]):
|
|
477
|
+
"""Get build IDs and issues for a list of run IDs
|
|
478
|
+
|
|
479
|
+
Args:
|
|
480
|
+
run_ids: List of run IDs to fetch information for
|
|
481
|
+
|
|
482
|
+
Returns:
|
|
483
|
+
Dictionary mapping run IDs to their information (build_id, start_time, and issues)
|
|
484
|
+
"""
|
|
485
|
+
result = {}
|
|
486
|
+
|
|
487
|
+
# Step 1: Get issue links for all run_ids in batches
|
|
488
|
+
all_issue_links = {}
|
|
489
|
+
|
|
490
|
+
for batch_run_ids in chunk(run_ids):
|
|
491
|
+
batch_links = IssueLink.objects.filter(run_id__in=batch_run_ids).all()
|
|
492
|
+
|
|
493
|
+
for link in batch_links:
|
|
494
|
+
run_id_str = str(link.run_id)
|
|
495
|
+
if run_id_str not in all_issue_links:
|
|
496
|
+
all_issue_links[run_id_str] = []
|
|
497
|
+
all_issue_links[run_id_str].append(link)
|
|
498
|
+
|
|
499
|
+
# Step 2: Fetch all unique issue details
|
|
500
|
+
all_issue_ids = set()
|
|
501
|
+
for links in all_issue_links.values():
|
|
502
|
+
all_issue_ids.update(link.issue_id for link in links)
|
|
503
|
+
|
|
504
|
+
issues_by_id = {}
|
|
505
|
+
if all_issue_ids:
|
|
506
|
+
for batch_issue_ids in chunk(list(all_issue_ids)):
|
|
507
|
+
batch_issues = GithubIssue.filter(id__in=batch_issue_ids).all()
|
|
508
|
+
|
|
509
|
+
for issue in batch_issues:
|
|
510
|
+
issues_by_id[issue.id] = issue
|
|
511
|
+
|
|
512
|
+
# Step 3: Fetch test runs only for run_ids that have issue links (limiting to MAX_SIMILARS runs)
|
|
513
|
+
runs_with_issues = list(all_issue_links.keys())
|
|
514
|
+
|
|
515
|
+
test_runs = {}
|
|
516
|
+
if runs_with_issues:
|
|
517
|
+
for run_id in runs_with_issues[:MAX_SIMILARS]:
|
|
518
|
+
try:
|
|
519
|
+
test_run = SCTTestRun.get(id=run_id)
|
|
520
|
+
test_runs[run_id] = test_run
|
|
521
|
+
except Exception as e:
|
|
522
|
+
LOGGER.debug(f"Failed to fetch test run {run_id}: {str(e)}")
|
|
523
|
+
|
|
524
|
+
# Step 4: Assign run and issue details to result for runs with issues
|
|
525
|
+
for run_id in runs_with_issues:
|
|
526
|
+
try:
|
|
527
|
+
test_run = test_runs.get(run_id)
|
|
528
|
+
if not test_run:
|
|
529
|
+
continue
|
|
530
|
+
|
|
531
|
+
links = all_issue_links.get(run_id, [])
|
|
532
|
+
issues = [issues_by_id[link.issue_id] for link in links if link.issue_id in issues_by_id]
|
|
533
|
+
|
|
534
|
+
try:
|
|
535
|
+
build_number = int(
|
|
536
|
+
test_run.build_job_url[:-1].split("/")[-1]
|
|
537
|
+
)
|
|
538
|
+
except Exception as e:
|
|
539
|
+
LOGGER.error(
|
|
540
|
+
f"Error parsing build number for run {run_id}: {test_run.build_job_url[:-1].split('/')} - {str(e)}")
|
|
541
|
+
build_number = -1
|
|
542
|
+
|
|
543
|
+
for pkg_name in ["scylla-server-upgraded", "scylla-server", "scylla-server-target"]:
|
|
544
|
+
sut_version = next(
|
|
545
|
+
(f"{pkg.version}-{pkg.date}" for pkg in test_run.packages if pkg.name == pkg_name), None)
|
|
546
|
+
if sut_version:
|
|
547
|
+
break
|
|
548
|
+
|
|
549
|
+
result[run_id] = {
|
|
550
|
+
"build_id": f"{test_run.build_id}#{build_number}",
|
|
551
|
+
"start_time": test_run.start_time.isoformat(),
|
|
552
|
+
"version": sut_version or "unknown",
|
|
553
|
+
"issues": [
|
|
554
|
+
{
|
|
555
|
+
"number": issue.number,
|
|
556
|
+
"state": issue.state,
|
|
557
|
+
"title": issue.title,
|
|
558
|
+
"url": issue.url,
|
|
559
|
+
}
|
|
560
|
+
for issue in issues
|
|
561
|
+
],
|
|
562
|
+
}
|
|
563
|
+
except Exception as e:
|
|
564
|
+
LOGGER.error(f"Error fetching info for run {run_id}: {str(e)}")
|
|
565
|
+
result[run_id] = {"build_id": None, "start_time": None, "issues": []}
|
|
566
|
+
|
|
567
|
+
# Step 5: If less than MAX_SIMILARS results, fetch MAX_SIMILARS more run details
|
|
568
|
+
if len(result) < MAX_SIMILARS:
|
|
569
|
+
remaining_run_ids = [run_id for run_id in run_ids if run_id not in result]
|
|
570
|
+
additional_needed = min(MAX_SIMILARS - len(result), len(remaining_run_ids))
|
|
571
|
+
|
|
572
|
+
if additional_needed > 0:
|
|
573
|
+
additional_run_ids = remaining_run_ids[:additional_needed]
|
|
574
|
+
|
|
575
|
+
additional_test_runs = {}
|
|
576
|
+
for run_id in additional_run_ids:
|
|
577
|
+
try:
|
|
578
|
+
test_run = SCTTestRun.get(id=run_id)
|
|
579
|
+
additional_test_runs[run_id] = test_run
|
|
580
|
+
except Exception as e:
|
|
581
|
+
LOGGER.debug(f"Failed to fetch additional test run {run_id}: {str(e)}")
|
|
582
|
+
|
|
583
|
+
for run_id in additional_run_ids:
|
|
584
|
+
try:
|
|
585
|
+
test_run = additional_test_runs.get(run_id)
|
|
586
|
+
if not test_run:
|
|
587
|
+
continue
|
|
588
|
+
|
|
589
|
+
try:
|
|
590
|
+
build_number = int(
|
|
591
|
+
test_run.build_job_url[:-1].split("/")[-1]
|
|
592
|
+
)
|
|
593
|
+
except Exception as e:
|
|
594
|
+
LOGGER.error(
|
|
595
|
+
f"Error parsing build number for run {run_id}: {test_run.build_job_url[:-1].split('/')} - {str(e)}")
|
|
596
|
+
build_number = -1
|
|
597
|
+
|
|
598
|
+
for pkg_name in ["scylla-server-upgraded", "scylla-server", "scylla-server-target"]:
|
|
599
|
+
sut_version = next(
|
|
600
|
+
(f"{pkg.version}-{pkg.date}" for pkg in test_run.packages if pkg.name == pkg_name), None)
|
|
601
|
+
if sut_version:
|
|
602
|
+
break
|
|
603
|
+
|
|
604
|
+
result[run_id] = {
|
|
605
|
+
"build_id": f"{test_run.build_id}#{build_number}",
|
|
606
|
+
"start_time": test_run.start_time.isoformat(),
|
|
607
|
+
"version": sut_version or "unknown",
|
|
608
|
+
"issues": [],
|
|
609
|
+
}
|
|
610
|
+
except Exception as e:
|
|
611
|
+
LOGGER.error(f"Error fetching info for run {run_id}: {str(e)}")
|
|
612
|
+
result[run_id] = {"build_id": None, "start_time": None, "issues": []}
|
|
613
|
+
|
|
614
|
+
return result
|
|
615
|
+
|
|
616
|
+
@staticmethod
|
|
617
|
+
def get_scylla_version_kernels_report(release_name: str):
|
|
618
|
+
all_release_runs = SCTTestRun.get_version_data_for_release(release_name=release_name)
|
|
619
|
+
kernels_by_version = {}
|
|
620
|
+
kernel_metadata = {}
|
|
621
|
+
for run in all_release_runs:
|
|
622
|
+
packages = run["packages"]
|
|
623
|
+
if not packages:
|
|
624
|
+
continue
|
|
625
|
+
scylla_pkgs = {p["name"]: p for p in packages if "scylla-server" in p["name"]}
|
|
626
|
+
scylla_pkg = scylla_pkgs["scylla-server-upgraded"] if scylla_pkgs.get(
|
|
627
|
+
"scylla-server-upgraded") else scylla_pkgs.get("scylla-server")
|
|
628
|
+
version = f"{scylla_pkg['version']}-{scylla_pkg['date']}.{scylla_pkg['revision_id']}" if scylla_pkgs else "unknown"
|
|
629
|
+
kernel_packages = [p for p in packages if "kernel" in p["name"]]
|
|
630
|
+
kernel_package = kernel_packages[0] if len(kernel_packages) > 0 else None
|
|
631
|
+
if not kernel_package:
|
|
632
|
+
continue
|
|
633
|
+
version_list = set(kernels_by_version.get(version, []))
|
|
634
|
+
version_list.add(kernel_package["version"])
|
|
635
|
+
kernels_by_version[version] = list(version_list)
|
|
636
|
+
metadata = kernel_metadata.get(
|
|
637
|
+
kernel_package.version,
|
|
638
|
+
{
|
|
639
|
+
"passed": 0,
|
|
640
|
+
"failed": 0,
|
|
641
|
+
"aborted": 0,
|
|
642
|
+
}
|
|
643
|
+
)
|
|
644
|
+
if run["status"] in ["passed", "failed", "aborted", "test_error"]:
|
|
645
|
+
metadata[run["status"]] += 1
|
|
646
|
+
kernel_metadata[kernel_package["version"]] = metadata
|
|
647
|
+
|
|
648
|
+
return {
|
|
649
|
+
"versions": kernels_by_version,
|
|
650
|
+
"metadata": kernel_metadata
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
@staticmethod
|
|
654
|
+
def junit_submit(run_id: str, file_name: str, content: str) -> bool:
|
|
655
|
+
try:
|
|
656
|
+
report = SCTJunitReports.get(test_id=run_id, file_name=file_name)
|
|
657
|
+
if report:
|
|
658
|
+
raise SCTServiceException(f"Report {file_name} already exists.", file_name)
|
|
659
|
+
except SCTJunitReports.DoesNotExist:
|
|
660
|
+
pass
|
|
661
|
+
report = SCTJunitReports()
|
|
662
|
+
report.test_id = run_id
|
|
663
|
+
report.file_name = file_name
|
|
664
|
+
|
|
665
|
+
xml_content = str(base64.decodebytes(bytes(content, encoding="utf-8")), encoding="utf-8")
|
|
666
|
+
try:
|
|
667
|
+
_ = ElementTree.fromstring(xml_content)
|
|
668
|
+
except Exception:
|
|
669
|
+
raise SCTServiceException(f"Malformed JUnit report submitted")
|
|
670
|
+
|
|
671
|
+
report.report = xml_content
|
|
672
|
+
report.save()
|
|
673
|
+
|
|
674
|
+
return True
|
|
675
|
+
|
|
676
|
+
@staticmethod
|
|
677
|
+
def junit_get_all(run_id: str) -> list[SCTJunitReports]:
|
|
678
|
+
return list(SCTJunitReports.filter(test_id=run_id).all())
|
|
679
|
+
|
|
680
|
+
@staticmethod
|
|
681
|
+
def junit_get_single(run_id: str, file_name: str) -> SCTJunitReports:
|
|
682
|
+
return SCTJunitReports.get(test_id=run_id, file_name=file_name)
|