argus-alm 0.11.3__py3-none-any.whl → 0.11.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- argus/backend/controller/api.py +12 -0
- argus/backend/controller/main.py +1 -3
- argus/backend/controller/team.py +1 -1
- argus/backend/controller/testrun_api.py +16 -0
- argus/backend/events/event_processors.py +1 -0
- argus/backend/models/web.py +1 -0
- argus/backend/plugins/core.py +17 -1
- argus/backend/plugins/driver_matrix_tests/model.py +5 -1
- argus/backend/plugins/sct/controller.py +47 -0
- argus/backend/plugins/sct/plugin.py +3 -1
- argus/backend/plugins/sct/service.py +176 -2
- argus/backend/plugins/sct/testrun.py +78 -21
- argus/backend/plugins/sct/types.py +38 -0
- argus/backend/plugins/sct/udt.py +13 -0
- argus/backend/service/argus_service.py +16 -32
- argus/backend/service/build_system_monitor.py +21 -3
- argus/backend/service/event_service.py +18 -0
- argus/backend/service/stats.py +69 -15
- argus/backend/service/testrun.py +59 -21
- argus/backend/util/encoders.py +3 -0
- argus/backend/util/enums.py +1 -0
- argus/client/driver_matrix_tests/client.py +19 -6
- argus/client/sct/client.py +50 -4
- {argus_alm-0.11.3.dist-info → argus_alm-0.11.6.dist-info}/METADATA +1 -1
- {argus_alm-0.11.3.dist-info → argus_alm-0.11.6.dist-info}/RECORD +27 -25
- {argus_alm-0.11.3.dist-info → argus_alm-0.11.6.dist-info}/WHEEL +1 -1
- {argus_alm-0.11.3.dist-info → argus_alm-0.11.6.dist-info}/LICENSE +0 -0
argus/backend/controller/api.py
CHANGED
|
@@ -472,3 +472,15 @@ def resolve_artifact_size():
|
|
|
472
472
|
"artifactSize": length,
|
|
473
473
|
}
|
|
474
474
|
}
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
@bp.route("/user/jobs")
|
|
478
|
+
@api_login_required
|
|
479
|
+
def user_jobs():
|
|
480
|
+
service = ArgusService()
|
|
481
|
+
result = list(service.get_jobs_for_user(user=g.user))
|
|
482
|
+
|
|
483
|
+
return {
|
|
484
|
+
"status": "ok",
|
|
485
|
+
"response": result
|
|
486
|
+
}
|
argus/backend/controller/main.py
CHANGED
|
@@ -241,9 +241,7 @@ def update_password():
|
|
|
241
241
|
@bp.route("/profile/jobs", methods=["GET"])
|
|
242
242
|
@login_required
|
|
243
243
|
def profile_jobs():
|
|
244
|
-
|
|
245
|
-
jobs = service.get_jobs_for_user(g.user)
|
|
246
|
-
return render_template("profile_jobs.html.j2", runs=jobs)
|
|
244
|
+
return render_template("profile_jobs.html.j2")
|
|
247
245
|
|
|
248
246
|
|
|
249
247
|
@bp.route("/profile/schedules", methods=["GET"])
|
argus/backend/controller/team.py
CHANGED
|
@@ -107,7 +107,7 @@ def user_teams(user_id: str):
|
|
|
107
107
|
@api_login_required
|
|
108
108
|
def user_jobs(user_id: str):
|
|
109
109
|
user = User.get(id=UUID(user_id))
|
|
110
|
-
result = ArgusService().get_jobs_for_user(user)
|
|
110
|
+
result = list(ArgusService().get_jobs_for_user(user))
|
|
111
111
|
|
|
112
112
|
return {
|
|
113
113
|
"status": "ok",
|
|
@@ -269,3 +269,19 @@ def sct_terminate_stuck_runs():
|
|
|
269
269
|
"total": result
|
|
270
270
|
}
|
|
271
271
|
}
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
@bp.route("/ignore_jobs", methods=["POST"])
|
|
275
|
+
@api_login_required
|
|
276
|
+
def ignore_jobs():
|
|
277
|
+
payload = get_payload(request)
|
|
278
|
+
service = TestRunService()
|
|
279
|
+
|
|
280
|
+
result = service.ignore_jobs(test_id=payload["testId"], reason=payload["reason"])
|
|
281
|
+
|
|
282
|
+
return {
|
|
283
|
+
"status": "ok",
|
|
284
|
+
"response": {
|
|
285
|
+
"affectedJobs": result
|
|
286
|
+
}
|
|
287
|
+
}
|
|
@@ -30,4 +30,5 @@ EVENT_PROCESSORS = {
|
|
|
30
30
|
ArgusEventTypes.TestRunIssueAdded: event_process_issue_added,
|
|
31
31
|
ArgusEventTypes.TestRunIssueRemoved: event_process_issue_added,
|
|
32
32
|
ArgusEventTypes.TestRunInvestigationStatusChanged: event_process_investigation_status_changed,
|
|
33
|
+
ArgusEventTypes.TestRunBatchInvestigationStatusChange: event_process_investigation_status_changed,
|
|
33
34
|
}
|
argus/backend/models/web.py
CHANGED
|
@@ -190,6 +190,7 @@ class ArgusEventTypes(str, Enum):
|
|
|
190
190
|
AssigneeChanged = "ARGUS_ASSIGNEE_CHANGE"
|
|
191
191
|
TestRunStatusChanged = "ARGUS_TEST_RUN_STATUS_CHANGE"
|
|
192
192
|
TestRunInvestigationStatusChanged = "ARGUS_TEST_RUN_INVESTIGATION_STATUS_CHANGE"
|
|
193
|
+
TestRunBatchInvestigationStatusChange = "ARGUS_TEST_RUN_INVESTIGATION_BATCH_STATUS_CHANGE"
|
|
193
194
|
TestRunCommentPosted = "ARGUS_TEST_RUN_COMMENT_POSTED"
|
|
194
195
|
TestRunCommentUpdated = "ARGUS_TEST_RUN_COMMENT_UPDATED"
|
|
195
196
|
TestRunCommentDeleted = "ARGUS_TEST_RUN_COMMENT_DELETED"
|
argus/backend/plugins/core.py
CHANGED
|
@@ -109,11 +109,27 @@ class PluginModelBase(Model):
|
|
|
109
109
|
def get_jobs_assigned_to_user(cls, user: User):
|
|
110
110
|
cluster = ScyllaCluster.get()
|
|
111
111
|
query = cluster.prepare("SELECT build_id, start_time, release_id, group_id, assignee, "
|
|
112
|
-
f"test_id, id, status, investigation_status, build_job_url FROM {cls.table_name()} WHERE assignee = ?")
|
|
112
|
+
f"test_id, id, status, investigation_status, build_job_url, scylla_version FROM {cls.table_name()} WHERE assignee = ?")
|
|
113
113
|
rows = cluster.session.execute(query=query, parameters=(user.id,))
|
|
114
114
|
|
|
115
115
|
return list(rows)
|
|
116
116
|
|
|
117
|
+
@classmethod
|
|
118
|
+
def get_jobs_meta_by_test_id(cls, test_id: UUID):
|
|
119
|
+
cluster = ScyllaCluster.get()
|
|
120
|
+
query = cluster.prepare(f"SELECT build_id, start_time, id, test_id, release_id, group_id, status, investigation_status FROM {cls.table_name()} WHERE test_id = ?")
|
|
121
|
+
rows = cluster.session.execute(query=query, parameters=(test_id,))
|
|
122
|
+
|
|
123
|
+
return list(rows)
|
|
124
|
+
|
|
125
|
+
@classmethod
|
|
126
|
+
def prepare_investigation_status_update_query(cls, build_id: str, start_time: datetime, new_status: TestInvestigationStatus):
|
|
127
|
+
cluster = ScyllaCluster.get()
|
|
128
|
+
query = cluster.prepare(f"UPDATE {cls.table_name()} SET investigation_status = ? WHERE build_id = ? AND start_time = ?")
|
|
129
|
+
bound_query = query.bind(values=(new_status.value, build_id, start_time))
|
|
130
|
+
|
|
131
|
+
return bound_query
|
|
132
|
+
|
|
117
133
|
@classmethod
|
|
118
134
|
def get_stats_for_release(cls, release: ArgusRelease):
|
|
119
135
|
cluster = ScyllaCluster.get()
|
|
@@ -58,6 +58,10 @@ class DriverTestRun(PluginModelBase):
|
|
|
58
58
|
run.build_id = req.job_name
|
|
59
59
|
run.build_job_url = req.job_url
|
|
60
60
|
run.assign_categories()
|
|
61
|
+
try:
|
|
62
|
+
run.assignee = run.get_scheduled_assignee()
|
|
63
|
+
except Exception: # pylint: disable=broad-except
|
|
64
|
+
run.assignee = None
|
|
61
65
|
for key, value in req.test_environment.items():
|
|
62
66
|
env_info = EnvironmentInfo()
|
|
63
67
|
env_info.key = key
|
|
@@ -102,7 +106,7 @@ class DriverTestRun(PluginModelBase):
|
|
|
102
106
|
collection.suites.append(suite)
|
|
103
107
|
run.test_collection.append(collection)
|
|
104
108
|
|
|
105
|
-
run.status = run._determine_run_status()
|
|
109
|
+
run.status = run._determine_run_status().value
|
|
106
110
|
run.save()
|
|
107
111
|
return run
|
|
108
112
|
|
|
@@ -31,6 +31,23 @@ def sct_submit_screenshots(run_id: str):
|
|
|
31
31
|
}
|
|
32
32
|
|
|
33
33
|
|
|
34
|
+
@bp.route("/<string:run_id>/sct_runner/set", methods=["POST"])
|
|
35
|
+
@api_login_required
|
|
36
|
+
def sct_set_runner(run_id: str):
|
|
37
|
+
payload = get_payload(request)
|
|
38
|
+
result = SCTService.set_sct_runner(
|
|
39
|
+
run_id=run_id,
|
|
40
|
+
public_ip=payload["public_ip"],
|
|
41
|
+
private_ip=payload["private_ip"],
|
|
42
|
+
region=payload["region"],
|
|
43
|
+
backend=payload["backend"]
|
|
44
|
+
)
|
|
45
|
+
return {
|
|
46
|
+
"status": "ok",
|
|
47
|
+
"response": result
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
|
|
34
51
|
@bp.route("/<string:run_id>/resource/create", methods=["POST"])
|
|
35
52
|
@api_login_required
|
|
36
53
|
def sct_resource_create(run_id: str):
|
|
@@ -97,6 +114,36 @@ def sct_events_submit(run_id: str):
|
|
|
97
114
|
}
|
|
98
115
|
|
|
99
116
|
|
|
117
|
+
@bp.route("/<string:run_id>/gemini/submit", methods=["POST"])
|
|
118
|
+
@api_login_required
|
|
119
|
+
def sct_gemini_results_submit(run_id: str):
|
|
120
|
+
payload = get_payload(request)
|
|
121
|
+
result = SCTService.submit_gemini_results(run_id=run_id, gemini_data=payload["gemini_data"])
|
|
122
|
+
return {
|
|
123
|
+
"status": "ok",
|
|
124
|
+
"response": result
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
@bp.route("/<string:run_id>/performance/submit", methods=["POST"])
|
|
128
|
+
@api_login_required
|
|
129
|
+
def sct_performance_results_submit(run_id: str):
|
|
130
|
+
payload = get_payload(request)
|
|
131
|
+
result = SCTService.submit_performance_results(run_id=run_id, performance_results=payload["performance_results"])
|
|
132
|
+
return {
|
|
133
|
+
"status": "ok",
|
|
134
|
+
"response": result
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
@bp.route("/<string:run_id>/performance/history", methods=["GET"])
|
|
138
|
+
@api_login_required
|
|
139
|
+
def sct_get_performance_history(run_id: str):
|
|
140
|
+
result = SCTService.get_performance_history_for_test(run_id=run_id)
|
|
141
|
+
return {
|
|
142
|
+
"status": "ok",
|
|
143
|
+
"response": result
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
|
|
100
147
|
@bp.route("/release/<string:release_name>/kernels", methods=["GET"])
|
|
101
148
|
@api_login_required
|
|
102
149
|
def sct_get_kernel_report(release_name: str):
|
|
@@ -12,6 +12,7 @@ from argus.backend.plugins.sct.udt import (
|
|
|
12
12
|
NemesisRunInfo,
|
|
13
13
|
NodeDescription,
|
|
14
14
|
PackageVersion,
|
|
15
|
+
PerformanceHDRHistogram,
|
|
15
16
|
)
|
|
16
17
|
|
|
17
18
|
|
|
@@ -31,5 +32,6 @@ class PluginInfo(PluginInfoBase):
|
|
|
31
32
|
CloudSetupDetails,
|
|
32
33
|
CloudNodesInfo,
|
|
33
34
|
CloudInstanceDetails,
|
|
34
|
-
PackageVersion
|
|
35
|
+
PackageVersion,
|
|
36
|
+
PerformanceHDRHistogram,
|
|
35
37
|
]
|
|
@@ -1,7 +1,12 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
|
+
from functools import reduce
|
|
2
3
|
import logging
|
|
4
|
+
import math
|
|
3
5
|
from time import time
|
|
4
|
-
from
|
|
6
|
+
from flask import g
|
|
7
|
+
from argus.backend.models.web import ArgusEventTypes
|
|
8
|
+
from argus.backend.plugins.sct.testrun import SCTTestRun, SubtestType
|
|
9
|
+
from argus.backend.plugins.sct.types import GeminiResultsRequest, PerformanceResultsRequest
|
|
5
10
|
from argus.backend.plugins.sct.udt import (
|
|
6
11
|
CloudInstanceDetails,
|
|
7
12
|
CloudResource,
|
|
@@ -9,8 +14,11 @@ from argus.backend.plugins.sct.udt import (
|
|
|
9
14
|
NemesisRunInfo,
|
|
10
15
|
NodeDescription,
|
|
11
16
|
PackageVersion,
|
|
17
|
+
PerformanceHDRHistogram,
|
|
12
18
|
)
|
|
13
|
-
from argus.backend.
|
|
19
|
+
from argus.backend.service.event_service import EventService
|
|
20
|
+
from argus.backend.util.common import get_build_number
|
|
21
|
+
from argus.backend.util.enums import NemesisStatus, ResourceState, TestStatus
|
|
14
22
|
|
|
15
23
|
LOGGER = logging.getLogger(__name__)
|
|
16
24
|
|
|
@@ -60,6 +68,24 @@ class SCTService:
|
|
|
60
68
|
|
|
61
69
|
return "added"
|
|
62
70
|
|
|
71
|
+
|
|
72
|
+
@staticmethod
|
|
73
|
+
def set_sct_runner(run_id: str, public_ip: str, private_ip: str, region: str, backend: str):
|
|
74
|
+
try:
|
|
75
|
+
run: SCTTestRun = SCTTestRun.get(id=run_id)
|
|
76
|
+
run.sct_runner_host = CloudInstanceDetails(
|
|
77
|
+
public_ip=public_ip,
|
|
78
|
+
private_ip=private_ip,
|
|
79
|
+
provider=backend,
|
|
80
|
+
region=region,
|
|
81
|
+
)
|
|
82
|
+
run.save()
|
|
83
|
+
except SCTTestRun.DoesNotExist as exception:
|
|
84
|
+
LOGGER.error("Run %s not found for SCTTestRun", run_id)
|
|
85
|
+
raise SCTServiceException("Run not found", run_id) from exception
|
|
86
|
+
|
|
87
|
+
return "updated"
|
|
88
|
+
|
|
63
89
|
@staticmethod
|
|
64
90
|
def submit_screenshots(run_id: str, screenshot_links: list[str]) -> str:
|
|
65
91
|
try:
|
|
@@ -73,6 +99,154 @@ class SCTService:
|
|
|
73
99
|
|
|
74
100
|
return "submitted"
|
|
75
101
|
|
|
102
|
+
@staticmethod
|
|
103
|
+
def submit_gemini_results(run_id: str, gemini_data: GeminiResultsRequest) -> str:
|
|
104
|
+
try:
|
|
105
|
+
run: SCTTestRun = SCTTestRun.get(id=run_id)
|
|
106
|
+
run.subtest_name = SubtestType.GEMINI.value
|
|
107
|
+
run.oracle_nodes_count = gemini_data.get("oracle_nodes_count")
|
|
108
|
+
run.oracle_node_ami_id = gemini_data.get("oracle_node_ami_id")
|
|
109
|
+
run.oracle_node_instance_type = gemini_data.get("oracle_node_instance_type")
|
|
110
|
+
run.oracle_node_scylla_version = gemini_data.get("oracle_node_scylla_version")
|
|
111
|
+
run.gemini_command = gemini_data.get("gemini_command")
|
|
112
|
+
run.gemini_version = gemini_data.get("gemini_version")
|
|
113
|
+
run.gemini_status = gemini_data.get("gemini_status")
|
|
114
|
+
run.gemini_seed = str(gemini_data.get("gemini_seed"))
|
|
115
|
+
run.gemini_write_ops = gemini_data.get("gemini_write_ops")
|
|
116
|
+
run.gemini_write_errors = gemini_data.get("gemini_write_errors")
|
|
117
|
+
run.gemini_read_ops = gemini_data.get("gemini_read_ops")
|
|
118
|
+
run.gemini_read_errors = gemini_data.get("gemini_read_errors")
|
|
119
|
+
run.save()
|
|
120
|
+
|
|
121
|
+
if run.gemini_status != "PASSED":
|
|
122
|
+
run.status = TestStatus.FAILED
|
|
123
|
+
EventService.create_run_event(kind=ArgusEventTypes.TestRunStatusChanged, body={
|
|
124
|
+
"message": "[{username}] Setting run status to {status} due to Gemini reporting following status: {gemini_status}",
|
|
125
|
+
"username": g.user.username,
|
|
126
|
+
"status": TestStatus.FAILED.value,
|
|
127
|
+
"gemini_status": run.gemini_status,
|
|
128
|
+
}, user_id=g.user.id, run_id=run_id, release_id=run.release_id, test_id=run.test_id)
|
|
129
|
+
run.save()
|
|
130
|
+
except SCTTestRun.DoesNotExist as exception:
|
|
131
|
+
LOGGER.error("Run %s not found for SCTTestRun", run_id)
|
|
132
|
+
raise SCTServiceException("Run not found", run_id) from exception
|
|
133
|
+
|
|
134
|
+
return "submitted"
|
|
135
|
+
|
|
136
|
+
@staticmethod
|
|
137
|
+
def submit_performance_results(run_id: str, performance_results: PerformanceResultsRequest):
|
|
138
|
+
# pylint: disable=too-many-statements
|
|
139
|
+
try:
|
|
140
|
+
run: SCTTestRun = SCTTestRun.get(id=run_id)
|
|
141
|
+
run.subtest_name = SubtestType.PERFORMANCE.value
|
|
142
|
+
run.perf_op_rate_average = performance_results.get("perf_op_rate_average")
|
|
143
|
+
run.perf_op_rate_total = performance_results.get("perf_op_rate_total")
|
|
144
|
+
run.perf_avg_latency_99th = performance_results.get("perf_avg_latency_99th")
|
|
145
|
+
run.perf_avg_latency_mean = performance_results.get("perf_avg_latency_mean")
|
|
146
|
+
run.perf_total_errors = performance_results.get("perf_total_errors")
|
|
147
|
+
run.stress_cmd = performance_results.get("stress_cmd")
|
|
148
|
+
run.test_name = performance_results.get("test_name")
|
|
149
|
+
run.save()
|
|
150
|
+
|
|
151
|
+
is_latency_test = "latency" in run.test_name
|
|
152
|
+
threshold_negative = -10
|
|
153
|
+
|
|
154
|
+
def cmp(lhs, rhs):
|
|
155
|
+
delta = rhs - lhs
|
|
156
|
+
change = int(math.fabs(delta) * 100 / rhs)
|
|
157
|
+
return change if delta >= 0 else change * -1
|
|
158
|
+
|
|
159
|
+
previous_runs = SCTTestRun.get_perf_results_for_test_name(run.build_id, run.start_time, run.test_name)
|
|
160
|
+
metrics_to_check = ["perf_avg_latency_99th", "perf_avg_latency_mean"] if is_latency_test else ["perf_op_rate_total"]
|
|
161
|
+
|
|
162
|
+
older_runs_by_version = {}
|
|
163
|
+
for prev_run in previous_runs:
|
|
164
|
+
if not older_runs_by_version.get(prev_run["scylla_version"]):
|
|
165
|
+
older_runs_by_version[prev_run["scylla_version"]] = []
|
|
166
|
+
older_runs_by_version[prev_run["scylla_version"]].append(prev_run)
|
|
167
|
+
|
|
168
|
+
regression_found = False
|
|
169
|
+
regression_info = {
|
|
170
|
+
"version": None,
|
|
171
|
+
"delta": None,
|
|
172
|
+
"id": None,
|
|
173
|
+
"metric": None,
|
|
174
|
+
"job_url": None,
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
if performance_results["histograms"]:
|
|
178
|
+
for histogram in performance_results["histograms"]:
|
|
179
|
+
run.histograms = { k: PerformanceHDRHistogram(**v) for k, v in histogram.items() }
|
|
180
|
+
|
|
181
|
+
for version, runs in older_runs_by_version.items():
|
|
182
|
+
for metric in metrics_to_check:
|
|
183
|
+
# pylint: disable=cell-var-from-loop
|
|
184
|
+
best_run = sorted(runs, reverse=(not is_latency_test), key=lambda v: v[metric])[0]
|
|
185
|
+
last_run = runs[0]
|
|
186
|
+
|
|
187
|
+
metric_to_best = cmp(run[metric], best_run[metric])
|
|
188
|
+
metric_to_last = cmp(run[metric], last_run[metric])
|
|
189
|
+
if metric_to_last < threshold_negative:
|
|
190
|
+
regression_found = True
|
|
191
|
+
regression_info["metric"] = metric
|
|
192
|
+
regression_info["version"] = version
|
|
193
|
+
regression_info["job_url"] = last_run["build_job_url"]
|
|
194
|
+
regression_info["id"] = str(last_run["id"])
|
|
195
|
+
regression_info["delta"] = metric_to_last
|
|
196
|
+
break
|
|
197
|
+
|
|
198
|
+
if metric_to_best < threshold_negative:
|
|
199
|
+
regression_found = True
|
|
200
|
+
regression_info["metric"] = metric
|
|
201
|
+
regression_info["version"] = version
|
|
202
|
+
regression_info["job_url"] = best_run["build_job_url"]
|
|
203
|
+
regression_info["id"] = str(best_run["id"])
|
|
204
|
+
regression_info["delta"] = metric_to_best
|
|
205
|
+
break
|
|
206
|
+
|
|
207
|
+
if regression_found:
|
|
208
|
+
break
|
|
209
|
+
|
|
210
|
+
if regression_found:
|
|
211
|
+
run.status = TestStatus.FAILED.value
|
|
212
|
+
run.save()
|
|
213
|
+
EventService.create_run_event(kind=ArgusEventTypes.TestRunStatusChanged, body={
|
|
214
|
+
"message": "[{username}] Setting run status to {status} due to performance metric '{metric}' falling "
|
|
215
|
+
"below allowed threshold ({threshold_negative}): {delta}% compared to "
|
|
216
|
+
"<a href='/test/{test_id}/runs?additionalRuns[]={base_run_id}&additionalRuns[]={previous_run_id}'>This {version} (#{build_number}) run</a>",
|
|
217
|
+
"username": g.user.username,
|
|
218
|
+
"status": TestStatus.FAILED.value,
|
|
219
|
+
"metric": regression_info["metric"],
|
|
220
|
+
"threshold_negative": threshold_negative,
|
|
221
|
+
"delta": regression_info["delta"],
|
|
222
|
+
"test_id": str(run.test_id),
|
|
223
|
+
"base_run_id": str(run.id),
|
|
224
|
+
"previous_run_id": regression_info["id"],
|
|
225
|
+
"version": regression_info["version"],
|
|
226
|
+
"build_number": get_build_number(regression_info["job_url"])
|
|
227
|
+
}, user_id=g.user.id, run_id=run_id, release_id=run.release_id, test_id=run.test_id)
|
|
228
|
+
else:
|
|
229
|
+
# NOTE: This will override status set by SCT Events.
|
|
230
|
+
run.status = TestStatus.PASSED.value
|
|
231
|
+
run.save()
|
|
232
|
+
|
|
233
|
+
except SCTTestRun.DoesNotExist as exception:
|
|
234
|
+
LOGGER.error("Run %s not found for SCTTestRun", run_id)
|
|
235
|
+
raise SCTServiceException("Run not found", run_id) from exception
|
|
236
|
+
|
|
237
|
+
return "submitted"
|
|
238
|
+
|
|
239
|
+
@staticmethod
|
|
240
|
+
def get_performance_history_for_test(run_id: str):
|
|
241
|
+
try:
|
|
242
|
+
run: SCTTestRun = SCTTestRun.get(id=run_id)
|
|
243
|
+
rows = run.get_perf_results_for_test_name(build_id=run.build_id, start_time=run.start_time, test_name=run.test_name)
|
|
244
|
+
return rows
|
|
245
|
+
except SCTTestRun.DoesNotExist as exception:
|
|
246
|
+
LOGGER.error("Run %s not found for SCTTestRun", run_id)
|
|
247
|
+
raise SCTServiceException("Run not found", run_id) from exception
|
|
248
|
+
|
|
249
|
+
|
|
76
250
|
@staticmethod
|
|
77
251
|
def create_resource(run_id: str, resource_details: dict) -> str:
|
|
78
252
|
instance_details = CloudInstanceDetails(**resource_details.pop("instance_details"))
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
+
from enum import Enum
|
|
1
2
|
import logging
|
|
2
3
|
from datetime import datetime
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Optional
|
|
4
6
|
from uuid import UUID
|
|
5
7
|
|
|
6
8
|
from cassandra.cqlengine import columns
|
|
@@ -16,6 +18,7 @@ from argus.backend.plugins.sct.udt import (
|
|
|
16
18
|
EventsBySeverity,
|
|
17
19
|
NemesisRunInfo,
|
|
18
20
|
PackageVersion,
|
|
21
|
+
PerformanceHDRHistogram
|
|
19
22
|
)
|
|
20
23
|
|
|
21
24
|
LOGGER = logging.getLogger(__name__)
|
|
@@ -31,6 +34,11 @@ SCT_REGION_PROPERTY_MAP = {
|
|
|
31
34
|
}
|
|
32
35
|
|
|
33
36
|
|
|
37
|
+
class SubtestType(str, Enum):
|
|
38
|
+
GEMINI = "gemini"
|
|
39
|
+
PERFORMANCE = "performance"
|
|
40
|
+
|
|
41
|
+
|
|
34
42
|
@dataclass(init=True, repr=True, frozen=True)
|
|
35
43
|
class SCTTestRunSubmissionRequest():
|
|
36
44
|
schema_version: str
|
|
@@ -39,9 +47,9 @@ class SCTTestRunSubmissionRequest():
|
|
|
39
47
|
job_url: str
|
|
40
48
|
started_by: str
|
|
41
49
|
commit_id: str
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
50
|
+
sct_config: dict | None
|
|
51
|
+
runner_public_ip: Optional[str] = field(default=None)
|
|
52
|
+
runner_private_ip: Optional[str] = field(default=None)
|
|
45
53
|
|
|
46
54
|
|
|
47
55
|
class SCTTestRun(PluginModelBase):
|
|
@@ -49,6 +57,7 @@ class SCTTestRun(PluginModelBase):
|
|
|
49
57
|
_plugin_name = "scylla-cluster-tests"
|
|
50
58
|
|
|
51
59
|
# Test Details
|
|
60
|
+
test_name = columns.Text()
|
|
52
61
|
scm_revision_id = columns.Text()
|
|
53
62
|
started_by = columns.Text()
|
|
54
63
|
config_files = columns.List(value_type=columns.Text())
|
|
@@ -69,6 +78,33 @@ class SCTTestRun(PluginModelBase):
|
|
|
69
78
|
nemesis_data = columns.List(value_type=columns.UserDefinedType(user_type=NemesisRunInfo))
|
|
70
79
|
screenshots = columns.List(value_type=columns.Text())
|
|
71
80
|
|
|
81
|
+
# Subtest
|
|
82
|
+
subtest_name = columns.Text()
|
|
83
|
+
|
|
84
|
+
# Gemini-related fields
|
|
85
|
+
oracle_nodes_count = columns.Integer()
|
|
86
|
+
oracle_node_ami_id = columns.Text()
|
|
87
|
+
oracle_node_instance_type = columns.Text()
|
|
88
|
+
oracle_node_scylla_version = columns.Text()
|
|
89
|
+
gemini_command = columns.Text()
|
|
90
|
+
gemini_version = columns.Text()
|
|
91
|
+
gemini_status = columns.Text()
|
|
92
|
+
gemini_seed = columns.Text()
|
|
93
|
+
gemini_write_ops = columns.Integer()
|
|
94
|
+
gemini_write_errors = columns.Integer()
|
|
95
|
+
gemini_read_ops = columns.Integer()
|
|
96
|
+
gemini_read_errors = columns.Integer()
|
|
97
|
+
|
|
98
|
+
# Performance fields
|
|
99
|
+
perf_op_rate_average = columns.Double()
|
|
100
|
+
perf_op_rate_total = columns.Double()
|
|
101
|
+
perf_avg_latency_99th = columns.Double()
|
|
102
|
+
perf_avg_latency_mean = columns.Double()
|
|
103
|
+
perf_total_errors = columns.Double()
|
|
104
|
+
stress_cmd = columns.Text()
|
|
105
|
+
|
|
106
|
+
histograms = columns.List(value_type=columns.Map(key_type=columns.Text(), value_type=columns.UserDefinedType(user_type=PerformanceHDRHistogram)))
|
|
107
|
+
|
|
72
108
|
@classmethod
|
|
73
109
|
def _stats_query(cls) -> str:
|
|
74
110
|
return ("SELECT id, test_id, group_id, release_id, status, start_time, build_job_url, build_id, "
|
|
@@ -102,7 +138,17 @@ class SCTTestRun(PluginModelBase):
|
|
|
102
138
|
return list(rows)
|
|
103
139
|
|
|
104
140
|
@classmethod
|
|
105
|
-
def
|
|
141
|
+
def get_perf_results_for_test_name(cls, build_id: str, start_time: float, test_name: str):
|
|
142
|
+
cluster = ScyllaCluster.get()
|
|
143
|
+
query = cluster.prepare(f"SELECT build_id, packages, scylla_version, test_name, perf_op_rate_average, perf_op_rate_total, "
|
|
144
|
+
"perf_avg_latency_99th, perf_avg_latency_mean, perf_total_errors, id, start_time, build_job_url"
|
|
145
|
+
f" FROM {cls.table_name()} WHERE build_id = ? AND start_time < ? AND test_name = ? ALLOW FILTERING")
|
|
146
|
+
rows = cluster.session.execute(query=query, parameters=(build_id, start_time, test_name))
|
|
147
|
+
|
|
148
|
+
return list(rows)
|
|
149
|
+
|
|
150
|
+
@classmethod
|
|
151
|
+
def init_sct_run(cls, req: SCTTestRunSubmissionRequest):
|
|
106
152
|
run = cls()
|
|
107
153
|
run.build_id = req.job_name
|
|
108
154
|
run.assign_categories()
|
|
@@ -116,24 +162,35 @@ class SCTTestRun(PluginModelBase):
|
|
|
116
162
|
run.started_by = req.started_by
|
|
117
163
|
run.build_job_url = req.job_url
|
|
118
164
|
|
|
119
|
-
|
|
120
|
-
region_key = SCT_REGION_PROPERTY_MAP.get(backend, SCT_REGION_PROPERTY_MAP["default"])
|
|
121
|
-
raw_regions = req.sct_config.get(region_key) or "undefined_region"
|
|
122
|
-
regions = raw_regions.split() if isinstance(raw_regions, str) else raw_regions
|
|
123
|
-
primary_region = regions[0]
|
|
124
|
-
|
|
125
|
-
run.cloud_setup = ResourceSetup.get_resource_setup(backend=backend, sct_config=req.sct_config)
|
|
165
|
+
return run
|
|
126
166
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
167
|
+
@classmethod
|
|
168
|
+
def from_sct_config(cls, req: SCTTestRunSubmissionRequest):
|
|
169
|
+
try:
|
|
170
|
+
run = cls.get(id=req.run_id)
|
|
171
|
+
except cls.DoesNotExist:
|
|
172
|
+
run = cls.init_sct_run(req)
|
|
173
|
+
run.save()
|
|
174
|
+
|
|
175
|
+
if req.sct_config:
|
|
176
|
+
backend = req.sct_config.get("cluster_backend")
|
|
177
|
+
region_key = SCT_REGION_PROPERTY_MAP.get(backend, SCT_REGION_PROPERTY_MAP["default"])
|
|
178
|
+
raw_regions = req.sct_config.get(region_key) or "undefined_region"
|
|
179
|
+
regions = raw_regions.split() if isinstance(raw_regions, str) else raw_regions
|
|
180
|
+
primary_region = regions[0]
|
|
181
|
+
if req.runner_public_ip: # NOTE: Legacy support, not needed otherwise
|
|
182
|
+
run.sct_runner_host = CloudInstanceDetails(
|
|
183
|
+
public_ip=req.runner_public_ip,
|
|
184
|
+
private_ip=req.runner_private_ip,
|
|
185
|
+
provider=backend,
|
|
186
|
+
region=primary_region,
|
|
187
|
+
)
|
|
188
|
+
run.cloud_setup = ResourceSetup.get_resource_setup(backend=backend, sct_config=req.sct_config)
|
|
189
|
+
|
|
190
|
+
run.config_files = req.sct_config.get("config_files")
|
|
191
|
+
run.region_name = regions
|
|
192
|
+
run.save()
|
|
133
193
|
|
|
134
|
-
run.config_files = req.sct_config.get("config_files")
|
|
135
|
-
run.region_name = regions
|
|
136
|
-
run.save()
|
|
137
194
|
return run
|
|
138
195
|
|
|
139
196
|
def get_resources(self) -> list[CloudResource]:
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from typing import TypedDict
|
|
2
|
+
|
|
3
|
+
class RawHDRHistogram(TypedDict):
|
|
4
|
+
start_time: int
|
|
5
|
+
percentile_90: float
|
|
6
|
+
percentile_50: float
|
|
7
|
+
percentile_99_999: float
|
|
8
|
+
percentile_95: float
|
|
9
|
+
end_time: float
|
|
10
|
+
percentile_99_99: float
|
|
11
|
+
percentile_99: float
|
|
12
|
+
stddev: float
|
|
13
|
+
percentile_99_9: float
|
|
14
|
+
|
|
15
|
+
class GeminiResultsRequest(TypedDict):
|
|
16
|
+
oracle_nodes_count: int
|
|
17
|
+
oracle_node_ami_id: str
|
|
18
|
+
oracle_node_instance_type: str
|
|
19
|
+
oracle_node_scylla_version: str
|
|
20
|
+
gemini_command: str
|
|
21
|
+
gemini_version: str
|
|
22
|
+
gemini_status: str
|
|
23
|
+
gemini_seed: str
|
|
24
|
+
gemini_write_ops: int
|
|
25
|
+
gemini_write_errors: int
|
|
26
|
+
gemini_read_ops: int
|
|
27
|
+
gemini_read_errors: int
|
|
28
|
+
|
|
29
|
+
class PerformanceResultsRequest(TypedDict):
|
|
30
|
+
test_name: str
|
|
31
|
+
stress_cmd: str
|
|
32
|
+
perf_op_rate_average: float
|
|
33
|
+
perf_op_rate_total: float
|
|
34
|
+
perf_avg_latency_99th: float
|
|
35
|
+
perf_avg_latency_mean: float
|
|
36
|
+
perf_total_errors: str
|
|
37
|
+
|
|
38
|
+
histograms: list[dict[str, RawHDRHistogram]] | None
|
argus/backend/plugins/sct/udt.py
CHANGED
|
@@ -78,3 +78,16 @@ class NemesisRunInfo(UserType):
|
|
|
78
78
|
start_time = columns.Integer()
|
|
79
79
|
end_time = columns.Integer()
|
|
80
80
|
stack_trace = columns.Text()
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class PerformanceHDRHistogram(UserType):
|
|
84
|
+
start_time = columns.Integer()
|
|
85
|
+
percentile_90 = columns.Float()
|
|
86
|
+
percentile_50 = columns.Float()
|
|
87
|
+
percentile_99_999 = columns.Float()
|
|
88
|
+
percentile_95 = columns.Float()
|
|
89
|
+
end_time = columns.Float()
|
|
90
|
+
percentile_99_99 = columns.Float()
|
|
91
|
+
percentile_99 = columns.Float()
|
|
92
|
+
stddev = columns.Float()
|
|
93
|
+
percentile_99_9 = columns.Float()
|