ai-testing-swarm 0.1.14__tar.gz → 0.1.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai_testing_swarm-0.1.14/src/ai_testing_swarm.egg-info → ai_testing_swarm-0.1.16}/PKG-INFO +35 -2
- ai_testing_swarm-0.1.14/PKG-INFO → ai_testing_swarm-0.1.16/README.md +30 -12
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/pyproject.toml +9 -1
- ai_testing_swarm-0.1.16/src/ai_testing_swarm/__init__.py +1 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/cli.py +57 -17
- ai_testing_swarm-0.1.16/src/ai_testing_swarm/core/auth_matrix.py +93 -0
- ai_testing_swarm-0.1.16/src/ai_testing_swarm/core/risk.py +161 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/orchestrator.py +48 -5
- ai_testing_swarm-0.1.16/src/ai_testing_swarm/reporting/dashboard.py +218 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/reporting/report_writer.py +123 -3
- ai_testing_swarm-0.1.16/src/ai_testing_swarm/reporting/trend.py +110 -0
- ai_testing_swarm-0.1.14/README.md → ai_testing_swarm-0.1.16/src/ai_testing_swarm.egg-info/PKG-INFO +45 -1
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm.egg-info/SOURCES.txt +6 -0
- ai_testing_swarm-0.1.16/src/ai_testing_swarm.egg-info/requires.txt +8 -0
- ai_testing_swarm-0.1.16/tests/test_batch2_trend_and_auth.py +73 -0
- ai_testing_swarm-0.1.16/tests/test_risk_scoring_and_gate.py +41 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/tests/test_swarm_api.py +8 -5
- ai_testing_swarm-0.1.14/src/ai_testing_swarm/__init__.py +0 -1
- ai_testing_swarm-0.1.14/src/ai_testing_swarm.egg-info/requires.txt +0 -3
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/setup.cfg +0 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/agents/__init__.py +0 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/agents/execution_agent.py +0 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/agents/learning_agent.py +0 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/agents/llm_reasoning_agent.py +0 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/agents/release_gate_agent.py +0 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/agents/test_planner_agent.py +0 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/agents/test_writer_agent.py +0 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/agents/ui_agent.py +0 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/core/__init__.py +0 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/core/api_client.py +0 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/core/config.py +0 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/core/curl_parser.py +0 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/core/openai_client.py +0 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/core/openapi_loader.py +0 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/core/openapi_validator.py +0 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/core/safety.py +0 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/reporting/__init__.py +0 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm.egg-info/dependency_links.txt +0 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm.egg-info/entry_points.txt +0 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm.egg-info/top_level.txt +0 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/tests/test_openapi_loader.py +0 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/tests/test_openapi_validator.py +0 -0
- {ai_testing_swarm-0.1.14 → ai_testing_swarm-0.1.16}/tests/test_policy_expected_negatives.py +0 -0
|
@@ -1,13 +1,17 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-testing-swarm
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.16
|
|
4
4
|
Summary: AI-powered testing swarm
|
|
5
5
|
Author-email: Arif Shah <ashah7775@gmail.com>
|
|
6
6
|
License: MIT
|
|
7
7
|
Requires-Python: >=3.9
|
|
8
8
|
Description-Content-Type: text/markdown
|
|
9
|
+
Requires-Dist: requests>=2.28
|
|
10
|
+
Requires-Dist: PyYAML>=6.0
|
|
9
11
|
Provides-Extra: openapi
|
|
10
12
|
Requires-Dist: jsonschema>=4.0; extra == "openapi"
|
|
13
|
+
Provides-Extra: dev
|
|
14
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
11
15
|
|
|
12
16
|
# AI Testing Swarm
|
|
13
17
|
|
|
@@ -68,7 +72,9 @@ A report is written under:
|
|
|
68
72
|
- `./ai_swarm_reports/<METHOD>_<endpoint>/<METHOD>_<endpoint>_<timestamp>.<json|md|html>`
|
|
69
73
|
|
|
70
74
|
Reports include:
|
|
71
|
-
- per-test results
|
|
75
|
+
- per-test results (including deterministic `risk_score` 0..100)
|
|
76
|
+
- endpoint-level risk gate (`PASS`/`WARN`/`BLOCK`)
|
|
77
|
+
- trend vs previous run for the same endpoint (risk delta + regressions)
|
|
72
78
|
- summary counts by status code / failure type
|
|
73
79
|
- optional AI summary (if enabled)
|
|
74
80
|
|
|
@@ -138,6 +144,28 @@ Then generates broad coverage across:
|
|
|
138
144
|
|
|
139
145
|
---
|
|
140
146
|
|
|
147
|
+
## Auth matrix runner (multiple tokens/headers)
|
|
148
|
+
|
|
149
|
+
To run the *same* request under multiple auth contexts (e.g., user/admin tokens), create `auth_matrix.yaml`:
|
|
150
|
+
|
|
151
|
+
```yaml
|
|
152
|
+
cases:
|
|
153
|
+
- name: user
|
|
154
|
+
headers:
|
|
155
|
+
Authorization: "Bearer USER_TOKEN"
|
|
156
|
+
- name: admin
|
|
157
|
+
headers:
|
|
158
|
+
Authorization: "Bearer ADMIN_TOKEN"
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
Run:
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
ai-test --input request.json --auth-matrix auth_matrix.yaml
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
Each auth case is written as a separate report using a `run_label` suffix (e.g. `__auth-user`).
|
|
168
|
+
|
|
141
169
|
## Safety mode (recommended for CI/demos)
|
|
142
170
|
|
|
143
171
|
Mutation testing can be noisy and may accidentally stress a real environment.
|
|
@@ -189,6 +217,11 @@ Reports include:
|
|
|
189
217
|
- `summary.counts_by_failure_type`
|
|
190
218
|
- `summary.counts_by_status_code`
|
|
191
219
|
- `summary.slow_tests` (based on SLA)
|
|
220
|
+
- `meta.endpoint_risk_score` + `meta.gate_status`
|
|
221
|
+
- `trend.*` (previous comparison if a prior report exists)
|
|
222
|
+
|
|
223
|
+
A static dashboard index is generated at:
|
|
224
|
+
- `./ai_swarm_reports/index.html` (latest JSON report per endpoint, sorted by regressions/risk)
|
|
192
225
|
|
|
193
226
|
SLA threshold:
|
|
194
227
|
- `AI_SWARM_SLA_MS` (default: `2000`)
|
|
@@ -1,14 +1,3 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: ai-testing-swarm
|
|
3
|
-
Version: 0.1.14
|
|
4
|
-
Summary: AI-powered testing swarm
|
|
5
|
-
Author-email: Arif Shah <ashah7775@gmail.com>
|
|
6
|
-
License: MIT
|
|
7
|
-
Requires-Python: >=3.9
|
|
8
|
-
Description-Content-Type: text/markdown
|
|
9
|
-
Provides-Extra: openapi
|
|
10
|
-
Requires-Dist: jsonschema>=4.0; extra == "openapi"
|
|
11
|
-
|
|
12
1
|
# AI Testing Swarm
|
|
13
2
|
|
|
14
3
|
AI Testing Swarm is a **super-advanced, mutation-driven API testing framework** (with optional OpenAPI + OpenAI augmentation) built on top of **pytest**.
|
|
@@ -68,7 +57,9 @@ A report is written under:
|
|
|
68
57
|
- `./ai_swarm_reports/<METHOD>_<endpoint>/<METHOD>_<endpoint>_<timestamp>.<json|md|html>`
|
|
69
58
|
|
|
70
59
|
Reports include:
|
|
71
|
-
- per-test results
|
|
60
|
+
- per-test results (including deterministic `risk_score` 0..100)
|
|
61
|
+
- endpoint-level risk gate (`PASS`/`WARN`/`BLOCK`)
|
|
62
|
+
- trend vs previous run for the same endpoint (risk delta + regressions)
|
|
72
63
|
- summary counts by status code / failure type
|
|
73
64
|
- optional AI summary (if enabled)
|
|
74
65
|
|
|
@@ -138,6 +129,28 @@ Then generates broad coverage across:
|
|
|
138
129
|
|
|
139
130
|
---
|
|
140
131
|
|
|
132
|
+
## Auth matrix runner (multiple tokens/headers)
|
|
133
|
+
|
|
134
|
+
To run the *same* request under multiple auth contexts (e.g., user/admin tokens), create `auth_matrix.yaml`:
|
|
135
|
+
|
|
136
|
+
```yaml
|
|
137
|
+
cases:
|
|
138
|
+
- name: user
|
|
139
|
+
headers:
|
|
140
|
+
Authorization: "Bearer USER_TOKEN"
|
|
141
|
+
- name: admin
|
|
142
|
+
headers:
|
|
143
|
+
Authorization: "Bearer ADMIN_TOKEN"
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
Run:
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
ai-test --input request.json --auth-matrix auth_matrix.yaml
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
Each auth case is written as a separate report using a `run_label` suffix (e.g. `__auth-user`).
|
|
153
|
+
|
|
141
154
|
## Safety mode (recommended for CI/demos)
|
|
142
155
|
|
|
143
156
|
Mutation testing can be noisy and may accidentally stress a real environment.
|
|
@@ -189,6 +202,11 @@ Reports include:
|
|
|
189
202
|
- `summary.counts_by_failure_type`
|
|
190
203
|
- `summary.counts_by_status_code`
|
|
191
204
|
- `summary.slow_tests` (based on SLA)
|
|
205
|
+
- `meta.endpoint_risk_score` + `meta.gate_status`
|
|
206
|
+
- `trend.*` (previous comparison if a prior report exists)
|
|
207
|
+
|
|
208
|
+
A static dashboard index is generated at:
|
|
209
|
+
- `./ai_swarm_reports/index.html` (latest JSON report per endpoint, sorted by regressions/risk)
|
|
192
210
|
|
|
193
211
|
SLA threshold:
|
|
194
212
|
- `AI_SWARM_SLA_MS` (default: `2000`)
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "ai-testing-swarm"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.16"
|
|
8
8
|
description = "AI-powered testing swarm"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -13,10 +13,18 @@ authors = [
|
|
|
13
13
|
]
|
|
14
14
|
license = { text = "MIT" }
|
|
15
15
|
|
|
16
|
+
dependencies = [
|
|
17
|
+
"requests>=2.28",
|
|
18
|
+
"PyYAML>=6.0",
|
|
19
|
+
]
|
|
20
|
+
|
|
16
21
|
[project.optional-dependencies]
|
|
17
22
|
openapi = [
|
|
18
23
|
"jsonschema>=4.0",
|
|
19
24
|
]
|
|
25
|
+
dev = [
|
|
26
|
+
"pytest>=8.0",
|
|
27
|
+
]
|
|
20
28
|
|
|
21
29
|
[project.scripts]
|
|
22
30
|
ai-test = "ai_testing_swarm.cli:main"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.16"
|
|
@@ -107,6 +107,29 @@ def main():
|
|
|
107
107
|
help="Report format to write (default: json)",
|
|
108
108
|
)
|
|
109
109
|
|
|
110
|
+
parser.add_argument(
|
|
111
|
+
"--auth-matrix",
|
|
112
|
+
default="",
|
|
113
|
+
help=(
|
|
114
|
+
"Optional path to auth_matrix.yaml/json to run the same endpoint under multiple auth headers. "
|
|
115
|
+
"Each case is reported separately via a run label suffix."
|
|
116
|
+
),
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Batch1: risk gate thresholds (backward compatible defaults)
|
|
120
|
+
parser.add_argument(
|
|
121
|
+
"--gate-warn",
|
|
122
|
+
type=int,
|
|
123
|
+
default=30,
|
|
124
|
+
help="Gate WARN threshold for endpoint risk score (default: 30)",
|
|
125
|
+
)
|
|
126
|
+
parser.add_argument(
|
|
127
|
+
"--gate-block",
|
|
128
|
+
type=int,
|
|
129
|
+
default=80,
|
|
130
|
+
help="Gate BLOCK threshold for endpoint risk score (default: 80)",
|
|
131
|
+
)
|
|
132
|
+
|
|
110
133
|
args = parser.parse_args()
|
|
111
134
|
|
|
112
135
|
# ------------------------------------------------------------
|
|
@@ -127,24 +150,41 @@ def main():
|
|
|
127
150
|
import os
|
|
128
151
|
os.environ["AI_SWARM_PUBLIC_ONLY"] = "1"
|
|
129
152
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
153
|
+
orch = SwarmOrchestrator()
|
|
154
|
+
|
|
155
|
+
def _print_console(decision, results, *, label: str = ""):
|
|
156
|
+
if label:
|
|
157
|
+
print(f"\n=== AUTH CASE: {label} ===")
|
|
158
|
+
print("\n=== RELEASE DECISION ===")
|
|
159
|
+
print(decision)
|
|
160
|
+
print("\n=== TEST RESULTS ===")
|
|
161
|
+
for r in results:
|
|
162
|
+
response = r.get("response", {})
|
|
163
|
+
status_code = response.get("status_code")
|
|
164
|
+
print(f"{r.get('name'):25} {str(status_code):5} {r.get('reason')}")
|
|
165
|
+
|
|
166
|
+
if args.auth_matrix:
|
|
167
|
+
from ai_testing_swarm.core.auth_matrix import load_auth_matrix, merge_auth_headers
|
|
168
|
+
|
|
169
|
+
cases = load_auth_matrix(args.auth_matrix)
|
|
170
|
+
for c in cases:
|
|
171
|
+
req2 = merge_auth_headers(request, c)
|
|
172
|
+
decision, results = orch.run(
|
|
173
|
+
req2,
|
|
174
|
+
report_format=args.report_format,
|
|
175
|
+
gate_warn=args.gate_warn,
|
|
176
|
+
gate_block=args.gate_block,
|
|
177
|
+
run_label=f"auth-{c.name}",
|
|
178
|
+
)
|
|
179
|
+
_print_console(decision, results, label=c.name)
|
|
180
|
+
else:
|
|
181
|
+
decision, results = orch.run(
|
|
182
|
+
request,
|
|
183
|
+
report_format=args.report_format,
|
|
184
|
+
gate_warn=args.gate_warn,
|
|
185
|
+
gate_block=args.gate_block,
|
|
147
186
|
)
|
|
187
|
+
_print_console(decision, results)
|
|
148
188
|
|
|
149
189
|
|
|
150
190
|
if __name__ == "__main__":
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import yaml
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True)
|
|
11
|
+
class AuthCase:
|
|
12
|
+
name: str
|
|
13
|
+
headers: dict[str, str]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _sanitize_case_name(name: str) -> str:
|
|
17
|
+
name = str(name or "").strip()
|
|
18
|
+
if not name:
|
|
19
|
+
return "case"
|
|
20
|
+
# Keep it filesystem-friendly.
|
|
21
|
+
out = []
|
|
22
|
+
for ch in name:
|
|
23
|
+
if ch.isalnum() or ch in ("-", "_", "."):
|
|
24
|
+
out.append(ch)
|
|
25
|
+
else:
|
|
26
|
+
out.append("-")
|
|
27
|
+
return "".join(out).strip("-") or "case"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def load_auth_matrix(path: str | Path) -> list[AuthCase]:
|
|
31
|
+
"""Load an auth matrix config (yaml/json).
|
|
32
|
+
|
|
33
|
+
Schema:
|
|
34
|
+
{
|
|
35
|
+
"cases": [
|
|
36
|
+
{"name": "user", "headers": {"Authorization": "Bearer ..."}},
|
|
37
|
+
{"name": "admin", "headers": {"Authorization": "Bearer ..."}}
|
|
38
|
+
]
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
Notes:
|
|
42
|
+
- This is intentionally minimal and explicit.
|
|
43
|
+
- Headers are merged into the base request headers (case wins).
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
p = Path(path)
|
|
47
|
+
raw = p.read_text(encoding="utf-8")
|
|
48
|
+
if p.suffix.lower() in {".yaml", ".yml"}:
|
|
49
|
+
data = yaml.safe_load(raw) or {}
|
|
50
|
+
else:
|
|
51
|
+
data = json.loads(raw)
|
|
52
|
+
|
|
53
|
+
cases = data.get("cases") if isinstance(data, dict) else None
|
|
54
|
+
if not isinstance(cases, list) or not cases:
|
|
55
|
+
raise ValueError("auth matrix must contain a non-empty 'cases' list")
|
|
56
|
+
|
|
57
|
+
out: list[AuthCase] = []
|
|
58
|
+
for i, c in enumerate(cases):
|
|
59
|
+
if not isinstance(c, dict):
|
|
60
|
+
raise ValueError(f"auth case #{i} must be an object")
|
|
61
|
+
name = _sanitize_case_name(c.get("name") or f"case{i+1}")
|
|
62
|
+
headers = c.get("headers") or {}
|
|
63
|
+
if not isinstance(headers, dict):
|
|
64
|
+
raise ValueError(f"auth case '{name}' headers must be an object")
|
|
65
|
+
# stringify values (avoid accidental ints)
|
|
66
|
+
headers2 = {str(k): str(v) for k, v in headers.items() if v is not None}
|
|
67
|
+
out.append(AuthCase(name=name, headers=headers2))
|
|
68
|
+
|
|
69
|
+
# Ensure unique names
|
|
70
|
+
seen: set[str] = set()
|
|
71
|
+
uniq: list[AuthCase] = []
|
|
72
|
+
for c in out:
|
|
73
|
+
nm = c.name
|
|
74
|
+
if nm not in seen:
|
|
75
|
+
uniq.append(c)
|
|
76
|
+
seen.add(nm)
|
|
77
|
+
else:
|
|
78
|
+
j = 2
|
|
79
|
+
while f"{nm}-{j}" in seen:
|
|
80
|
+
j += 1
|
|
81
|
+
new = f"{nm}-{j}"
|
|
82
|
+
uniq.append(AuthCase(name=new, headers=c.headers))
|
|
83
|
+
seen.add(new)
|
|
84
|
+
|
|
85
|
+
return uniq
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def merge_auth_headers(request: dict, auth_case: AuthCase) -> dict:
|
|
89
|
+
req = dict(request)
|
|
90
|
+
base_headers = dict(req.get("headers") or {})
|
|
91
|
+
base_headers.update(auth_case.headers or {})
|
|
92
|
+
req["headers"] = base_headers
|
|
93
|
+
return req
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"""Risk scoring for AI Testing Swarm.
|
|
2
|
+
|
|
3
|
+
Batch1 additions:
|
|
4
|
+
- Compute a numeric risk_score per test result (0..100)
|
|
5
|
+
- Aggregate endpoint risk and drive gate thresholds (PASS/WARN/BLOCK)
|
|
6
|
+
|
|
7
|
+
Design goals:
|
|
8
|
+
- Backward compatible: consumers can ignore risk_score fields.
|
|
9
|
+
- Deterministic and explainable.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from dataclasses import dataclass
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass(frozen=True)
|
|
18
|
+
class RiskThresholds:
|
|
19
|
+
"""Thresholds for the endpoint gate.
|
|
20
|
+
|
|
21
|
+
Gate is computed from endpoint_risk_score (currently max test risk).
|
|
22
|
+
|
|
23
|
+
- PASS: score < warn
|
|
24
|
+
- WARN: warn <= score < block
|
|
25
|
+
- BLOCK: score >= block
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
warn: int = 30
|
|
29
|
+
block: int = 80
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# Keep aligned with orchestrator/release gate semantics
|
|
33
|
+
EXPECTED_FAILURES: set[str] = {
|
|
34
|
+
"success",
|
|
35
|
+
"missing_param",
|
|
36
|
+
"invalid_param",
|
|
37
|
+
"security",
|
|
38
|
+
"method_not_allowed",
|
|
39
|
+
"not_found",
|
|
40
|
+
"content_negotiation",
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
RISKY_FAILURES: set[str] = {
|
|
44
|
+
"unknown",
|
|
45
|
+
"missing_param_accepted",
|
|
46
|
+
"null_param_accepted",
|
|
47
|
+
"invalid_param_accepted",
|
|
48
|
+
"headers_accepted",
|
|
49
|
+
"method_risk",
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
BLOCKING_FAILURES: set[str] = {
|
|
53
|
+
"auth_issue",
|
|
54
|
+
"infra",
|
|
55
|
+
"security_risk",
|
|
56
|
+
"server_error",
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def clamp_int(x: int, lo: int, hi: int) -> int:
|
|
61
|
+
return lo if x < lo else hi if x > hi else x
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def compute_test_risk_score(result: dict, *, sla_ms: int | None = None) -> int:
|
|
65
|
+
"""Compute a risk score for a single test result.
|
|
66
|
+
|
|
67
|
+
Inputs expected (best-effort):
|
|
68
|
+
- result['failure_type']
|
|
69
|
+
- result['status']
|
|
70
|
+
- result['mutation']['strategy'] (optional)
|
|
71
|
+
- result['response']['status_code']
|
|
72
|
+
- result['response']['elapsed_ms']
|
|
73
|
+
- result['response']['openapi_validation'] (list)
|
|
74
|
+
|
|
75
|
+
Returns: int in range 0..100.
|
|
76
|
+
|
|
77
|
+
Batch2: strategy-aware weighting.
|
|
78
|
+
The same failure_type can be more/less severe depending on the test strategy.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
ft = str(result.get("failure_type") or "unknown")
|
|
82
|
+
status = str(result.get("status") or "")
|
|
83
|
+
mutation = result.get("mutation") or {}
|
|
84
|
+
strategy = str(mutation.get("strategy") or "").strip().lower()
|
|
85
|
+
|
|
86
|
+
resp = result.get("response") or {}
|
|
87
|
+
sc = resp.get("status_code")
|
|
88
|
+
|
|
89
|
+
# Base score from semantic classification.
|
|
90
|
+
if ft in EXPECTED_FAILURES:
|
|
91
|
+
base = 0
|
|
92
|
+
elif ft in RISKY_FAILURES:
|
|
93
|
+
base = 35
|
|
94
|
+
elif ft in BLOCKING_FAILURES:
|
|
95
|
+
base = 90
|
|
96
|
+
else:
|
|
97
|
+
# Unknown failure types are treated as high risk but not always a hard blocker.
|
|
98
|
+
base = 60
|
|
99
|
+
|
|
100
|
+
# Strategy-aware overrides (only when the strategy is known).
|
|
101
|
+
# These are designed to stay deterministic and explainable.
|
|
102
|
+
if strategy == "security" and ft == "security_risk":
|
|
103
|
+
base = max(base, 100)
|
|
104
|
+
if strategy in {"missing_param", "null_param", "invalid_param"} and ft.endswith("_accepted"):
|
|
105
|
+
# Validation bypass signals.
|
|
106
|
+
base = max(base, 80)
|
|
107
|
+
if strategy == "headers" and ft == "headers_accepted":
|
|
108
|
+
base = max(base, 55)
|
|
109
|
+
if strategy == "method_misuse" and ft == "method_risk":
|
|
110
|
+
base = max(base, 85)
|
|
111
|
+
if strategy == "auth" and ft == "auth_issue":
|
|
112
|
+
# Often indicates environment/config drift rather than product risk.
|
|
113
|
+
base = min(base, 70)
|
|
114
|
+
|
|
115
|
+
# Status-code adjustments (defense in depth)
|
|
116
|
+
if isinstance(sc, int):
|
|
117
|
+
if 500 <= sc:
|
|
118
|
+
base = max(base, 90)
|
|
119
|
+
elif sc in (401, 403):
|
|
120
|
+
base = max(base, 80)
|
|
121
|
+
elif 400 <= sc < 500:
|
|
122
|
+
# Client errors for negative tests are expected; keep base.
|
|
123
|
+
base = max(base, base)
|
|
124
|
+
|
|
125
|
+
# Explicit FAILED status implies something unexpected.
|
|
126
|
+
if status.upper() == "FAILED":
|
|
127
|
+
base = max(base, 70)
|
|
128
|
+
|
|
129
|
+
# OpenAPI validation issues are a small additive risk.
|
|
130
|
+
issues = resp.get("openapi_validation") or []
|
|
131
|
+
if isinstance(issues, list) and issues:
|
|
132
|
+
base += 10
|
|
133
|
+
|
|
134
|
+
# SLA breach is a small additive risk.
|
|
135
|
+
if sla_ms is not None:
|
|
136
|
+
elapsed = resp.get("elapsed_ms")
|
|
137
|
+
if isinstance(elapsed, int) and elapsed > sla_ms:
|
|
138
|
+
base += 5
|
|
139
|
+
|
|
140
|
+
return clamp_int(int(base), 0, 100)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def compute_endpoint_risk_score(results: list[dict]) -> int:
|
|
144
|
+
"""Aggregate endpoint risk score.
|
|
145
|
+
|
|
146
|
+
Current policy: endpoint risk is the max risk_score across tests.
|
|
147
|
+
(This makes gating stable and easy to interpret.)
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
scores = [r.get("risk_score") for r in (results or []) if isinstance(r.get("risk_score"), int)]
|
|
151
|
+
if not scores:
|
|
152
|
+
return 0
|
|
153
|
+
return int(max(scores))
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def gate_from_score(score: int, thresholds: RiskThresholds) -> str:
|
|
157
|
+
if score >= thresholds.block:
|
|
158
|
+
return "BLOCK"
|
|
159
|
+
if score >= thresholds.warn:
|
|
160
|
+
return "WARN"
|
|
161
|
+
return "PASS"
|
|
@@ -5,6 +5,12 @@ from ai_testing_swarm.agents.learning_agent import LearningAgent
|
|
|
5
5
|
from ai_testing_swarm.agents.release_gate_agent import ReleaseGateAgent
|
|
6
6
|
from ai_testing_swarm.reporting.report_writer import write_report
|
|
7
7
|
from ai_testing_swarm.core.safety import enforce_public_only
|
|
8
|
+
from ai_testing_swarm.core.risk import (
|
|
9
|
+
RiskThresholds,
|
|
10
|
+
compute_test_risk_score,
|
|
11
|
+
compute_endpoint_risk_score,
|
|
12
|
+
gate_from_score,
|
|
13
|
+
)
|
|
8
14
|
|
|
9
15
|
import logging
|
|
10
16
|
|
|
@@ -40,8 +46,21 @@ class SwarmOrchestrator:
|
|
|
40
46
|
self.learner = LearningAgent()
|
|
41
47
|
self.release_gate = ReleaseGateAgent()
|
|
42
48
|
|
|
43
|
-
def run(
|
|
44
|
-
|
|
49
|
+
def run(
|
|
50
|
+
self,
|
|
51
|
+
request: dict,
|
|
52
|
+
*,
|
|
53
|
+
report_format: str = "json",
|
|
54
|
+
gate_warn: int = 30,
|
|
55
|
+
gate_block: int = 80,
|
|
56
|
+
run_label: str | None = None,
|
|
57
|
+
):
|
|
58
|
+
"""Runs the full AI testing swarm and returns (decision, results).
|
|
59
|
+
|
|
60
|
+
gate_warn/gate_block:
|
|
61
|
+
Thresholds for PASS/WARN/BLOCK gate based on endpoint risk.
|
|
62
|
+
(Kept optional for backward compatibility.)
|
|
63
|
+
"""
|
|
45
64
|
|
|
46
65
|
# Safety hook (currently no-op; kept for backward compatibility)
|
|
47
66
|
enforce_public_only(request["url"])
|
|
@@ -78,6 +97,19 @@ class SwarmOrchestrator:
|
|
|
78
97
|
else "FAILED"
|
|
79
98
|
),
|
|
80
99
|
})
|
|
100
|
+
|
|
101
|
+
# Batch1: numeric risk score per test (0..100)
|
|
102
|
+
try:
|
|
103
|
+
from ai_testing_swarm.core.config import AI_SWARM_SLA_MS
|
|
104
|
+
|
|
105
|
+
execution_result["risk_score"] = compute_test_risk_score(
|
|
106
|
+
execution_result,
|
|
107
|
+
sla_ms=AI_SWARM_SLA_MS,
|
|
108
|
+
)
|
|
109
|
+
except Exception:
|
|
110
|
+
# Keep backwards compatibility: don't fail the run if scoring breaks.
|
|
111
|
+
execution_result["risk_score"] = 0
|
|
112
|
+
|
|
81
113
|
# Optional learning step
|
|
82
114
|
try:
|
|
83
115
|
self.learner.learn(test_name, classification)
|
|
@@ -97,14 +129,25 @@ class SwarmOrchestrator:
|
|
|
97
129
|
results.append(results_by_name[t["name"]])
|
|
98
130
|
|
|
99
131
|
# --------------------------------------------------------
|
|
100
|
-
# 3️⃣ RELEASE DECISION
|
|
132
|
+
# 3️⃣ RELEASE DECISION + RISK GATE
|
|
101
133
|
# --------------------------------------------------------
|
|
102
134
|
decision = self.release_gate.decide(results)
|
|
103
135
|
|
|
136
|
+
thresholds = RiskThresholds(warn=int(gate_warn), block=int(gate_block))
|
|
137
|
+
endpoint_risk_score = compute_endpoint_risk_score(results)
|
|
138
|
+
gate_status = gate_from_score(endpoint_risk_score, thresholds)
|
|
139
|
+
|
|
104
140
|
# --------------------------------------------------------
|
|
105
|
-
# 4️⃣ WRITE
|
|
141
|
+
# 4️⃣ WRITE REPORT
|
|
106
142
|
# --------------------------------------------------------
|
|
107
|
-
meta = {
|
|
143
|
+
meta = {
|
|
144
|
+
"decision": decision,
|
|
145
|
+
"gate_status": gate_status,
|
|
146
|
+
"gate_thresholds": {"warn": thresholds.warn, "block": thresholds.block},
|
|
147
|
+
"endpoint_risk_score": endpoint_risk_score,
|
|
148
|
+
}
|
|
149
|
+
if run_label:
|
|
150
|
+
meta["run_label"] = str(run_label)
|
|
108
151
|
|
|
109
152
|
# Optional AI summary for humans (best-effort)
|
|
110
153
|
try:
|