ai-testing-swarm 0.1.15__tar.gz → 0.1.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {ai_testing_swarm-0.1.15/src/ai_testing_swarm.egg-info → ai_testing_swarm-0.1.16}/PKG-INFO +33 -2
  2. ai_testing_swarm-0.1.15/PKG-INFO → ai_testing_swarm-0.1.16/README.md +30 -14
  3. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/pyproject.toml +4 -1
  4. ai_testing_swarm-0.1.16/src/ai_testing_swarm/__init__.py +1 -0
  5. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/cli.py +43 -22
  6. ai_testing_swarm-0.1.16/src/ai_testing_swarm/core/auth_matrix.py +93 -0
  7. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/core/risk.py +22 -0
  8. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/orchestrator.py +3 -0
  9. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/reporting/dashboard.py +35 -5
  10. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/reporting/report_writer.py +73 -0
  11. ai_testing_swarm-0.1.16/src/ai_testing_swarm/reporting/trend.py +110 -0
  12. ai_testing_swarm-0.1.15/README.md → ai_testing_swarm-0.1.16/src/ai_testing_swarm.egg-info/PKG-INFO +45 -1
  13. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm.egg-info/SOURCES.txt +3 -0
  14. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm.egg-info/requires.txt +3 -0
  15. ai_testing_swarm-0.1.16/tests/test_batch2_trend_and_auth.py +73 -0
  16. ai_testing_swarm-0.1.15/src/ai_testing_swarm/__init__.py +0 -1
  17. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/setup.cfg +0 -0
  18. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/agents/__init__.py +0 -0
  19. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/agents/execution_agent.py +0 -0
  20. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/agents/learning_agent.py +0 -0
  21. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/agents/llm_reasoning_agent.py +0 -0
  22. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/agents/release_gate_agent.py +0 -0
  23. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/agents/test_planner_agent.py +0 -0
  24. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/agents/test_writer_agent.py +0 -0
  25. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/agents/ui_agent.py +0 -0
  26. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/core/__init__.py +0 -0
  27. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/core/api_client.py +0 -0
  28. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/core/config.py +0 -0
  29. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/core/curl_parser.py +0 -0
  30. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/core/openai_client.py +0 -0
  31. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/core/openapi_loader.py +0 -0
  32. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/core/openapi_validator.py +0 -0
  33. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/core/safety.py +0 -0
  34. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm/reporting/__init__.py +0 -0
  35. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm.egg-info/dependency_links.txt +0 -0
  36. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm.egg-info/entry_points.txt +0 -0
  37. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/src/ai_testing_swarm.egg-info/top_level.txt +0 -0
  38. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/tests/test_openapi_loader.py +0 -0
  39. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/tests/test_openapi_validator.py +0 -0
  40. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/tests/test_policy_expected_negatives.py +0 -0
  41. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/tests/test_risk_scoring_and_gate.py +0 -0
  42. {ai_testing_swarm-0.1.15 → ai_testing_swarm-0.1.16}/tests/test_swarm_api.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-testing-swarm
3
- Version: 0.1.15
3
+ Version: 0.1.16
4
4
  Summary: AI-powered testing swarm
5
5
  Author-email: Arif Shah <ashah7775@gmail.com>
6
6
  License: MIT
@@ -10,6 +10,8 @@ Requires-Dist: requests>=2.28
10
10
  Requires-Dist: PyYAML>=6.0
11
11
  Provides-Extra: openapi
12
12
  Requires-Dist: jsonschema>=4.0; extra == "openapi"
13
+ Provides-Extra: dev
14
+ Requires-Dist: pytest>=8.0; extra == "dev"
13
15
 
14
16
  # AI Testing Swarm
15
17
 
@@ -70,7 +72,9 @@ A report is written under:
70
72
  - `./ai_swarm_reports/<METHOD>_<endpoint>/<METHOD>_<endpoint>_<timestamp>.<json|md|html>`
71
73
 
72
74
  Reports include:
73
- - per-test results
75
+ - per-test results (including deterministic `risk_score` 0..100)
76
+ - endpoint-level risk gate (`PASS`/`WARN`/`BLOCK`)
77
+ - trend vs previous run for the same endpoint (risk delta + regressions)
74
78
  - summary counts by status code / failure type
75
79
  - optional AI summary (if enabled)
76
80
 
@@ -140,6 +144,28 @@ Then generates broad coverage across:
140
144
 
141
145
  ---
142
146
 
147
+ ## Auth matrix runner (multiple tokens/headers)
148
+
149
+ To run the *same* request under multiple auth contexts (e.g., user/admin tokens), create `auth_matrix.yaml`:
150
+
151
+ ```yaml
152
+ cases:
153
+ - name: user
154
+ headers:
155
+ Authorization: "Bearer USER_TOKEN"
156
+ - name: admin
157
+ headers:
158
+ Authorization: "Bearer ADMIN_TOKEN"
159
+ ```
160
+
161
+ Run:
162
+
163
+ ```bash
164
+ ai-test --input request.json --auth-matrix auth_matrix.yaml
165
+ ```
166
+
167
+ Each auth case is written as a separate report using a `run_label` suffix (e.g. `__auth-user`).
168
+
143
169
  ## Safety mode (recommended for CI/demos)
144
170
 
145
171
  Mutation testing can be noisy and may accidentally stress a real environment.
@@ -191,6 +217,11 @@ Reports include:
191
217
  - `summary.counts_by_failure_type`
192
218
  - `summary.counts_by_status_code`
193
219
  - `summary.slow_tests` (based on SLA)
220
+ - `meta.endpoint_risk_score` + `meta.gate_status`
221
+ - `trend.*` (previous comparison if a prior report exists)
222
+
223
+ A static dashboard index is generated at:
224
+ - `./ai_swarm_reports/index.html` (latest JSON report per endpoint, sorted by regressions/risk)
194
225
 
195
226
  SLA threshold:
196
227
  - `AI_SWARM_SLA_MS` (default: `2000`)
@@ -1,16 +1,3 @@
1
- Metadata-Version: 2.4
2
- Name: ai-testing-swarm
3
- Version: 0.1.15
4
- Summary: AI-powered testing swarm
5
- Author-email: Arif Shah <ashah7775@gmail.com>
6
- License: MIT
7
- Requires-Python: >=3.9
8
- Description-Content-Type: text/markdown
9
- Requires-Dist: requests>=2.28
10
- Requires-Dist: PyYAML>=6.0
11
- Provides-Extra: openapi
12
- Requires-Dist: jsonschema>=4.0; extra == "openapi"
13
-
14
1
  # AI Testing Swarm
15
2
 
16
3
  AI Testing Swarm is a **super-advanced, mutation-driven API testing framework** (with optional OpenAPI + OpenAI augmentation) built on top of **pytest**.
@@ -70,7 +57,9 @@ A report is written under:
70
57
  - `./ai_swarm_reports/<METHOD>_<endpoint>/<METHOD>_<endpoint>_<timestamp>.<json|md|html>`
71
58
 
72
59
  Reports include:
73
- - per-test results
60
+ - per-test results (including deterministic `risk_score` 0..100)
61
+ - endpoint-level risk gate (`PASS`/`WARN`/`BLOCK`)
62
+ - trend vs previous run for the same endpoint (risk delta + regressions)
74
63
  - summary counts by status code / failure type
75
64
  - optional AI summary (if enabled)
76
65
 
@@ -140,6 +129,28 @@ Then generates broad coverage across:
140
129
 
141
130
  ---
142
131
 
132
+ ## Auth matrix runner (multiple tokens/headers)
133
+
134
+ To run the *same* request under multiple auth contexts (e.g., user/admin tokens), create `auth_matrix.yaml`:
135
+
136
+ ```yaml
137
+ cases:
138
+ - name: user
139
+ headers:
140
+ Authorization: "Bearer USER_TOKEN"
141
+ - name: admin
142
+ headers:
143
+ Authorization: "Bearer ADMIN_TOKEN"
144
+ ```
145
+
146
+ Run:
147
+
148
+ ```bash
149
+ ai-test --input request.json --auth-matrix auth_matrix.yaml
150
+ ```
151
+
152
+ Each auth case is written as a separate report using a `run_label` suffix (e.g. `__auth-user`).
153
+
143
154
  ## Safety mode (recommended for CI/demos)
144
155
 
145
156
  Mutation testing can be noisy and may accidentally stress a real environment.
@@ -191,6 +202,11 @@ Reports include:
191
202
  - `summary.counts_by_failure_type`
192
203
  - `summary.counts_by_status_code`
193
204
  - `summary.slow_tests` (based on SLA)
205
+ - `meta.endpoint_risk_score` + `meta.gate_status`
206
+ - `trend.*` (previous comparison if a prior report exists)
207
+
208
+ A static dashboard index is generated at:
209
+ - `./ai_swarm_reports/index.html` (latest JSON report per endpoint, sorted by regressions/risk)
194
210
 
195
211
  SLA threshold:
196
212
  - `AI_SWARM_SLA_MS` (default: `2000`)
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ai-testing-swarm"
7
- version = "0.1.15"
7
+ version = "0.1.16"
8
8
  description = "AI-powered testing swarm"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -22,6 +22,9 @@ dependencies = [
22
22
  openapi = [
23
23
  "jsonschema>=4.0",
24
24
  ]
25
+ dev = [
26
+ "pytest>=8.0",
27
+ ]
25
28
 
26
29
  [project.scripts]
27
30
  ai-test = "ai_testing_swarm.cli:main"
@@ -0,0 +1 @@
1
+ __version__ = "0.1.16"
@@ -107,6 +107,15 @@ def main():
107
107
  help="Report format to write (default: json)",
108
108
  )
109
109
 
110
+ parser.add_argument(
111
+ "--auth-matrix",
112
+ default="",
113
+ help=(
114
+ "Optional path to auth_matrix.yaml/json to run the same endpoint under multiple auth headers. "
115
+ "Each case is reported separately via a run label suffix."
116
+ ),
117
+ )
118
+
110
119
  # Batch1: risk gate thresholds (backward compatible defaults)
111
120
  parser.add_argument(
112
121
  "--gate-warn",
@@ -141,29 +150,41 @@ def main():
141
150
  import os
142
151
  os.environ["AI_SWARM_PUBLIC_ONLY"] = "1"
143
152
 
144
- decision, results = SwarmOrchestrator().run(
145
- request,
146
- report_format=args.report_format,
147
- gate_warn=args.gate_warn,
148
- gate_block=args.gate_block,
149
- )
150
-
151
- # ------------------------------------------------------------
152
- # Console output
153
- # ------------------------------------------------------------
154
- print("\n=== RELEASE DECISION ===")
155
- print(decision)
156
-
157
- print("\n=== TEST RESULTS ===")
158
- for r in results:
159
- response = r.get("response", {})
160
- status_code = response.get("status_code")
161
-
162
- print(
163
- f"{r.get('name'):25} "
164
- f"{str(status_code):5} "
165
- f"{r.get('reason')}"
153
+ orch = SwarmOrchestrator()
154
+
155
+ def _print_console(decision, results, *, label: str = ""):
156
+ if label:
157
+ print(f"\n=== AUTH CASE: {label} ===")
158
+ print("\n=== RELEASE DECISION ===")
159
+ print(decision)
160
+ print("\n=== TEST RESULTS ===")
161
+ for r in results:
162
+ response = r.get("response", {})
163
+ status_code = response.get("status_code")
164
+ print(f"{r.get('name'):25} {str(status_code):5} {r.get('reason')}")
165
+
166
+ if args.auth_matrix:
167
+ from ai_testing_swarm.core.auth_matrix import load_auth_matrix, merge_auth_headers
168
+
169
+ cases = load_auth_matrix(args.auth_matrix)
170
+ for c in cases:
171
+ req2 = merge_auth_headers(request, c)
172
+ decision, results = orch.run(
173
+ req2,
174
+ report_format=args.report_format,
175
+ gate_warn=args.gate_warn,
176
+ gate_block=args.gate_block,
177
+ run_label=f"auth-{c.name}",
178
+ )
179
+ _print_console(decision, results, label=c.name)
180
+ else:
181
+ decision, results = orch.run(
182
+ request,
183
+ report_format=args.report_format,
184
+ gate_warn=args.gate_warn,
185
+ gate_block=args.gate_block,
166
186
  )
187
+ _print_console(decision, results)
167
188
 
168
189
 
169
190
  if __name__ == "__main__":
@@ -0,0 +1,93 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+
7
+ import yaml
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class AuthCase:
12
+ name: str
13
+ headers: dict[str, str]
14
+
15
+
16
+ def _sanitize_case_name(name: str) -> str:
17
+ name = str(name or "").strip()
18
+ if not name:
19
+ return "case"
20
+ # Keep it filesystem-friendly.
21
+ out = []
22
+ for ch in name:
23
+ if ch.isalnum() or ch in ("-", "_", "."):
24
+ out.append(ch)
25
+ else:
26
+ out.append("-")
27
+ return "".join(out).strip("-") or "case"
28
+
29
+
30
+ def load_auth_matrix(path: str | Path) -> list[AuthCase]:
31
+ """Load an auth matrix config (yaml/json).
32
+
33
+ Schema:
34
+ {
35
+ "cases": [
36
+ {"name": "user", "headers": {"Authorization": "Bearer ..."}},
37
+ {"name": "admin", "headers": {"Authorization": "Bearer ..."}}
38
+ ]
39
+ }
40
+
41
+ Notes:
42
+ - This is intentionally minimal and explicit.
43
+ - Headers are merged into the base request headers (case wins).
44
+ """
45
+
46
+ p = Path(path)
47
+ raw = p.read_text(encoding="utf-8")
48
+ if p.suffix.lower() in {".yaml", ".yml"}:
49
+ data = yaml.safe_load(raw) or {}
50
+ else:
51
+ data = json.loads(raw)
52
+
53
+ cases = data.get("cases") if isinstance(data, dict) else None
54
+ if not isinstance(cases, list) or not cases:
55
+ raise ValueError("auth matrix must contain a non-empty 'cases' list")
56
+
57
+ out: list[AuthCase] = []
58
+ for i, c in enumerate(cases):
59
+ if not isinstance(c, dict):
60
+ raise ValueError(f"auth case #{i} must be an object")
61
+ name = _sanitize_case_name(c.get("name") or f"case{i+1}")
62
+ headers = c.get("headers") or {}
63
+ if not isinstance(headers, dict):
64
+ raise ValueError(f"auth case '{name}' headers must be an object")
65
+ # stringify values (avoid accidental ints)
66
+ headers2 = {str(k): str(v) for k, v in headers.items() if v is not None}
67
+ out.append(AuthCase(name=name, headers=headers2))
68
+
69
+ # Ensure unique names
70
+ seen: set[str] = set()
71
+ uniq: list[AuthCase] = []
72
+ for c in out:
73
+ nm = c.name
74
+ if nm not in seen:
75
+ uniq.append(c)
76
+ seen.add(nm)
77
+ else:
78
+ j = 2
79
+ while f"{nm}-{j}" in seen:
80
+ j += 1
81
+ new = f"{nm}-{j}"
82
+ uniq.append(AuthCase(name=new, headers=c.headers))
83
+ seen.add(new)
84
+
85
+ return uniq
86
+
87
+
88
+ def merge_auth_headers(request: dict, auth_case: AuthCase) -> dict:
89
+ req = dict(request)
90
+ base_headers = dict(req.get("headers") or {})
91
+ base_headers.update(auth_case.headers or {})
92
+ req["headers"] = base_headers
93
+ return req
@@ -67,15 +67,22 @@ def compute_test_risk_score(result: dict, *, sla_ms: int | None = None) -> int:
67
67
  Inputs expected (best-effort):
68
68
  - result['failure_type']
69
69
  - result['status']
70
+ - result['mutation']['strategy'] (optional)
70
71
  - result['response']['status_code']
71
72
  - result['response']['elapsed_ms']
72
73
  - result['response']['openapi_validation'] (list)
73
74
 
74
75
  Returns: int in range 0..100.
76
+
77
+ Batch2: strategy-aware weighting.
78
+ The same failure_type can be more/less severe depending on the test strategy.
75
79
  """
76
80
 
77
81
  ft = str(result.get("failure_type") or "unknown")
78
82
  status = str(result.get("status") or "")
83
+ mutation = result.get("mutation") or {}
84
+ strategy = str(mutation.get("strategy") or "").strip().lower()
85
+
79
86
  resp = result.get("response") or {}
80
87
  sc = resp.get("status_code")
81
88
 
@@ -90,6 +97,21 @@ def compute_test_risk_score(result: dict, *, sla_ms: int | None = None) -> int:
90
97
  # Unknown failure types are treated as high risk but not always a hard blocker.
91
98
  base = 60
92
99
 
100
+ # Strategy-aware overrides (only when the strategy is known).
101
+ # These are designed to stay deterministic and explainable.
102
+ if strategy == "security" and ft == "security_risk":
103
+ base = max(base, 100)
104
+ if strategy in {"missing_param", "null_param", "invalid_param"} and ft.endswith("_accepted"):
105
+ # Validation bypass signals.
106
+ base = max(base, 80)
107
+ if strategy == "headers" and ft == "headers_accepted":
108
+ base = max(base, 55)
109
+ if strategy == "method_misuse" and ft == "method_risk":
110
+ base = max(base, 85)
111
+ if strategy == "auth" and ft == "auth_issue":
112
+ # Often indicates environment/config drift rather than product risk.
113
+ base = min(base, 70)
114
+
93
115
  # Status-code adjustments (defense in depth)
94
116
  if isinstance(sc, int):
95
117
  if 500 <= sc:
@@ -53,6 +53,7 @@ class SwarmOrchestrator:
53
53
  report_format: str = "json",
54
54
  gate_warn: int = 30,
55
55
  gate_block: int = 80,
56
+ run_label: str | None = None,
56
57
  ):
57
58
  """Runs the full AI testing swarm and returns (decision, results).
58
59
 
@@ -145,6 +146,8 @@ class SwarmOrchestrator:
145
146
  "gate_thresholds": {"warn": thresholds.warn, "block": thresholds.block},
146
147
  "endpoint_risk_score": endpoint_risk_score,
147
148
  }
149
+ if run_label:
150
+ meta["run_label"] = str(run_label)
148
151
 
149
152
  # Optional AI summary for humans (best-effort)
150
153
  try:
@@ -26,6 +26,8 @@ class EndpointRow:
26
26
  decision: str
27
27
  report_relpath: str
28
28
  top_risks: list[dict]
29
+ risk_delta: int | None = None
30
+ regression_count: int = 0
29
31
 
30
32
 
31
33
  def _latest_json_report(endpoint_dir: Path) -> Path | None:
@@ -65,6 +67,19 @@ def write_dashboard_index(reports_dir: Path) -> str:
65
67
  if endpoint_risk is None:
66
68
  endpoint_risk = summary.get("endpoint_risk_score", 0)
67
69
 
70
+ trend = rpt.get("trend") or {}
71
+ risk_delta = trend.get("endpoint_risk_delta") if isinstance(trend, dict) else None
72
+ try:
73
+ risk_delta = int(risk_delta) if risk_delta is not None else None
74
+ except Exception:
75
+ risk_delta = None
76
+
77
+ regression_count = 0
78
+ try:
79
+ regression_count = int(trend.get("regression_count") or 0) if isinstance(trend, dict) else 0
80
+ except Exception:
81
+ regression_count = 0
82
+
68
83
  row = EndpointRow(
69
84
  endpoint_dir=child.name,
70
85
  endpoint=str(rpt.get("endpoint") or child.name),
@@ -74,11 +89,13 @@ def write_dashboard_index(reports_dir: Path) -> str:
74
89
  decision=str(meta.get("decision") or ""),
75
90
  report_relpath=str(child.name + "/" + latest.name),
76
91
  top_risks=list(summary.get("top_risks") or []),
92
+ risk_delta=risk_delta,
93
+ regression_count=regression_count,
77
94
  )
78
95
  rows.append(row)
79
96
 
80
- # Sort by risk (desc) then recent
81
- rows.sort(key=lambda r: (r.endpoint_risk_score, r.run_time), reverse=True)
97
+ # Batch2: surface regressions first, then risk (desc), then recency.
98
+ rows.sort(key=lambda r: (int(r.regression_count or 0), r.endpoint_risk_score, r.run_time), reverse=True)
82
99
 
83
100
  # Global top risks across endpoints
84
101
  global_risks = []
@@ -102,16 +119,24 @@ def write_dashboard_index(reports_dir: Path) -> str:
102
119
  cls = {"PASS": "pass", "WARN": "warn", "BLOCK": "block"}.get(gate, "")
103
120
  return f"<span class='gate {cls}'>{_html_escape(gate)}</span>"
104
121
 
122
+ def _delta_badge(d: int | None) -> str:
123
+ if d is None:
124
+ return ""
125
+ cls = "pos" if d > 0 else "neg" if d < 0 else "zero"
126
+ sign = "+" if d > 0 else ""
127
+ return f"<span class='delta {cls}'>{sign}{_html_escape(d)}</span>"
128
+
105
129
  endpoint_rows_html = "".join(
106
130
  "<tr>"
107
131
  f"<td>{badge(r.gate_status)}</td>"
108
- f"<td><code>{_html_escape(r.endpoint_risk_score)}</code></td>"
132
+ f"<td><code>{_html_escape(r.endpoint_risk_score)}</code> {_delta_badge(r.risk_delta)}</td>"
133
+ f"<td><code>{_html_escape(r.regression_count)}</code></td>"
109
134
  f"<td><a href='{_html_escape(r.report_relpath)}'>{_html_escape(r.endpoint)}</a></td>"
110
135
  f"<td><code>{_html_escape(r.run_time)}</code></td>"
111
136
  f"<td><code>{_html_escape(r.decision)}</code></td>"
112
137
  "</tr>"
113
138
  for r in rows
114
- ) or "<tr><td colspan='5'>(no JSON reports found)</td></tr>"
139
+ ) or "<tr><td colspan='6'>(no JSON reports found)</td></tr>"
115
140
 
116
141
  top_risks_html = "".join(
117
142
  "<tr>"
@@ -135,6 +160,10 @@ def write_dashboard_index(reports_dir: Path) -> str:
135
160
  .gate.warn{background:#fff7e6; border-color:#ffab00;}
136
161
  .gate.block{background:#ffebe6; border-color:#ff5630;}
137
162
  .muted{color:#555}
163
+ .delta{display:inline-block; margin-left:6px; padding:1px 8px; border-radius:999px; font-size:12px; border:1px solid #bbb;}
164
+ .delta.pos{background:#ffebe6; border-color:#ff5630;}
165
+ .delta.neg{background:#e6ffed; border-color:#36b37e;}
166
+ .delta.zero{background:#f1f2f4; border-color:#bbb;}
138
167
  """
139
168
 
140
169
  html = f"""<!doctype html>
@@ -153,7 +182,8 @@ def write_dashboard_index(reports_dir: Path) -> str:
153
182
  <thead>
154
183
  <tr>
155
184
  <th>Gate</th>
156
- <th>Risk</th>
185
+ <th>Risk (Δ)</th>
186
+ <th>Regressions</th>
157
187
  <th>Endpoint</th>
158
188
  <th>Run time</th>
159
189
  <th>Decision</th>
@@ -113,6 +113,26 @@ def _render_markdown(report: dict) -> str:
113
113
 
114
114
  lines.append("")
115
115
 
116
+ # Batch2: trend
117
+ trend = report.get("trend") or {}
118
+ if trend.get("has_previous"):
119
+ lines.append("## Trend vs previous run")
120
+ lines.append("")
121
+ lines.append(f"- **Endpoint risk delta:** `{trend.get('endpoint_risk_delta')}` (prev={trend.get('endpoint_risk_prev')})")
122
+ lines.append(f"- **Regressions:** `{trend.get('regression_count')}`")
123
+ regs = trend.get("regressions") or []
124
+ if regs:
125
+ lines.append("")
126
+ lines.append("### Regressed tests")
127
+ for x in regs[:10]:
128
+ lines.append(
129
+ "- "
130
+ f"`{_markdown_escape(x.get('name'))}`: "
131
+ f"{_markdown_escape(x.get('prev_status'))}→{_markdown_escape(x.get('curr_status'))} "
132
+ f"(risk {x.get('prev_risk_score')}→{x.get('curr_risk_score')})"
133
+ )
134
+ lines.append("")
135
+
116
136
  summary = report.get("summary") or {}
117
137
  counts_ft = summary.get("counts_by_failure_type") or {}
118
138
  counts_sc = summary.get("counts_by_status_code") or {}
@@ -174,6 +194,28 @@ def _render_html(report: dict) -> str:
174
194
  failed = [r for r in results if str(r.get("status")) == "FAILED"]
175
195
  top_risky = (risky + failed)[:10]
176
196
 
197
+ trend = report.get("trend") or {}
198
+ trend_html = ""
199
+ if trend.get("has_previous"):
200
+ regs = trend.get("regressions") or []
201
+ items = "".join(
202
+ "<li><code>{}</code>: {}→{} (risk {}→{})</li>".format(
203
+ _html_escape(x.get("name")),
204
+ _html_escape(x.get("prev_status")),
205
+ _html_escape(x.get("curr_status")),
206
+ _html_escape(x.get("prev_risk_score")),
207
+ _html_escape(x.get("curr_risk_score")),
208
+ )
209
+ for x in regs[:10]
210
+ ) or "<li>(none)</li>"
211
+ trend_html = (
212
+ "<h2>Trend vs previous run</h2>"
213
+ f"<div class='meta'><div><b>Endpoint risk delta:</b> <code>{_html_escape(trend.get('endpoint_risk_delta'))}</code> "
214
+ f"(prev <code>{_html_escape(trend.get('endpoint_risk_prev'))}</code>)</div>"
215
+ f"<div><b>Regressions:</b> <code>{_html_escape(trend.get('regression_count'))}</code></div></div>"
216
+ f"<ul>{items}</ul>"
217
+ )
218
+
177
219
  def _kv_list(d: dict) -> str:
178
220
  items = sorted((d or {}).items(), key=lambda kv: (-kv[1], kv[0]))
179
221
  return "".join(f"<li><b>{_html_escape(k)}</b>: {v}</li>" for k, v in items) or "<li>(none)</li>"
@@ -251,6 +293,8 @@ def _render_html(report: dict) -> str:
251
293
  <div><b>Endpoint risk:</b> <code>{_html_escape(meta.get('endpoint_risk_score'))}</code></div>
252
294
  </div>
253
295
 
296
+ {trend_html}
297
+
254
298
  <h2>Summary</h2>
255
299
  <div class='grid'>
256
300
  <div><h3>Counts by failure type</h3><ul>{_kv_list(summary.get('counts_by_failure_type') or {})}</ul></div>
@@ -287,6 +331,15 @@ def write_report(
287
331
  url = request.get("url", "")
288
332
 
289
333
  endpoint_name = extract_endpoint_name(method, url)
334
+
335
+ # Batch2: optional run label (auth-matrix case, environment label, etc.)
336
+ # This keeps reports for the same endpoint separated but still comparable.
337
+ run_label = str((meta or {}).get("run_label") or "").strip()
338
+ if run_label:
339
+ safe = re.sub(r"[^a-zA-Z0-9_.-]", "-", run_label).strip("-")
340
+ if safe:
341
+ endpoint_name = f"{endpoint_name}__{safe}"
342
+
290
343
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
291
344
 
292
345
  endpoint_dir = REPORTS_DIR / endpoint_name
@@ -294,6 +347,18 @@ def write_report(
294
347
 
295
348
  safe_results = _redact_results(results)
296
349
 
350
+ # Batch2: load the previous JSON report for trend comparison (best-effort)
351
+ previous_report = None
352
+ try:
353
+ from ai_testing_swarm.reporting.trend import compute_trend
354
+
355
+ json_candidates = sorted(endpoint_dir.glob("*.json"), key=lambda p: p.stat().st_mtime, reverse=True)
356
+ prev_path = json_candidates[0] if json_candidates else None
357
+ if prev_path and prev_path.exists():
358
+ previous_report = json.loads(prev_path.read_text(encoding="utf-8"))
359
+ except Exception:
360
+ previous_report = None
361
+
297
362
  summary = {
298
363
  "counts_by_failure_type": {},
299
364
  "counts_by_status_code": {},
@@ -346,6 +411,14 @@ def write_report(
346
411
  "results": safe_results,
347
412
  }
348
413
 
414
+ # Batch2: attach trend comparison (previous vs current)
415
+ try:
416
+ from ai_testing_swarm.reporting.trend import compute_trend
417
+
418
+ report["trend"] = compute_trend(report, previous_report)
419
+ except Exception:
420
+ report["trend"] = {"has_previous": False, "regressions": [], "regression_count": 0}
421
+
349
422
  report_format = (report_format or "json").lower().strip()
350
423
  if report_format not in {"json", "md", "html"}:
351
424
  report_format = "json"
@@ -0,0 +1,110 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+
6
+ @dataclass(frozen=True)
7
+ class Regression:
8
+ name: str
9
+ prev_status: str
10
+ curr_status: str
11
+ prev_risk_score: int
12
+ curr_risk_score: int
13
+
14
+
15
+ def _status_rank(s: str) -> int:
16
+ s = (s or "").upper()
17
+ return {"PASSED": 0, "RISK": 1, "FAILED": 2}.get(s, 1)
18
+
19
+
20
+ def compute_trend(current_report: dict, previous_report: dict | None) -> dict:
21
+ """Compute a best-effort trend comparison.
22
+
23
+ Trend is designed to be resilient to older report shapes.
24
+
25
+ Returns a dict that can be embedded into report['trend'].
26
+ """
27
+
28
+ if not previous_report:
29
+ return {
30
+ "has_previous": False,
31
+ "regressions": [],
32
+ "regression_count": 0,
33
+ "endpoint_risk_prev": None,
34
+ "endpoint_risk_delta": None,
35
+ }
36
+
37
+ cur_meta = current_report.get("meta") or {}
38
+ prev_meta = previous_report.get("meta") or {}
39
+
40
+ cur_risk = cur_meta.get("endpoint_risk_score")
41
+ if cur_risk is None:
42
+ cur_risk = (current_report.get("summary") or {}).get("endpoint_risk_score")
43
+
44
+ prev_risk = prev_meta.get("endpoint_risk_score")
45
+ if prev_risk is None:
46
+ prev_risk = (previous_report.get("summary") or {}).get("endpoint_risk_score")
47
+
48
+ try:
49
+ cur_risk_i = int(cur_risk or 0)
50
+ except Exception:
51
+ cur_risk_i = 0
52
+ try:
53
+ prev_risk_i = int(prev_risk or 0)
54
+ except Exception:
55
+ prev_risk_i = 0
56
+
57
+ cur_results = current_report.get("results") or []
58
+ prev_results = previous_report.get("results") or []
59
+
60
+ prev_by_name = {str(r.get("name")): r for r in prev_results if r.get("name") is not None}
61
+
62
+ regressions: list[Regression] = []
63
+ for r in cur_results:
64
+ name = r.get("name")
65
+ if name is None:
66
+ continue
67
+ name = str(name)
68
+ prev = prev_by_name.get(name)
69
+ if not prev:
70
+ continue
71
+
72
+ prev_status = str(prev.get("status") or "")
73
+ cur_status = str(r.get("status") or "")
74
+ prev_score = prev.get("risk_score")
75
+ cur_score = r.get("risk_score")
76
+ prev_score_i = int(prev_score) if isinstance(prev_score, int) else 0
77
+ cur_score_i = int(cur_score) if isinstance(cur_score, int) else 0
78
+
79
+ worsened_status = _status_rank(cur_status) > _status_rank(prev_status)
80
+ worsened_score = cur_score_i > prev_score_i
81
+
82
+ if worsened_status or worsened_score:
83
+ regressions.append(
84
+ Regression(
85
+ name=name,
86
+ prev_status=prev_status,
87
+ curr_status=cur_status,
88
+ prev_risk_score=prev_score_i,
89
+ curr_risk_score=cur_score_i,
90
+ )
91
+ )
92
+
93
+ regressions.sort(key=lambda x: (x.curr_risk_score - x.prev_risk_score, _status_rank(x.curr_status)), reverse=True)
94
+
95
+ return {
96
+ "has_previous": True,
97
+ "endpoint_risk_prev": prev_risk_i,
98
+ "endpoint_risk_delta": cur_risk_i - prev_risk_i,
99
+ "regression_count": len(regressions),
100
+ "regressions": [
101
+ {
102
+ "name": x.name,
103
+ "prev_status": x.prev_status,
104
+ "curr_status": x.curr_status,
105
+ "prev_risk_score": x.prev_risk_score,
106
+ "curr_risk_score": x.curr_risk_score,
107
+ }
108
+ for x in regressions[:50]
109
+ ],
110
+ }
@@ -1,3 +1,18 @@
1
+ Metadata-Version: 2.4
2
+ Name: ai-testing-swarm
3
+ Version: 0.1.16
4
+ Summary: AI-powered testing swarm
5
+ Author-email: Arif Shah <ashah7775@gmail.com>
6
+ License: MIT
7
+ Requires-Python: >=3.9
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: requests>=2.28
10
+ Requires-Dist: PyYAML>=6.0
11
+ Provides-Extra: openapi
12
+ Requires-Dist: jsonschema>=4.0; extra == "openapi"
13
+ Provides-Extra: dev
14
+ Requires-Dist: pytest>=8.0; extra == "dev"
15
+
1
16
  # AI Testing Swarm
2
17
 
3
18
  AI Testing Swarm is a **super-advanced, mutation-driven API testing framework** (with optional OpenAPI + OpenAI augmentation) built on top of **pytest**.
@@ -57,7 +72,9 @@ A report is written under:
57
72
  - `./ai_swarm_reports/<METHOD>_<endpoint>/<METHOD>_<endpoint>_<timestamp>.<json|md|html>`
58
73
 
59
74
  Reports include:
60
- - per-test results
75
+ - per-test results (including deterministic `risk_score` 0..100)
76
+ - endpoint-level risk gate (`PASS`/`WARN`/`BLOCK`)
77
+ - trend vs previous run for the same endpoint (risk delta + regressions)
61
78
  - summary counts by status code / failure type
62
79
  - optional AI summary (if enabled)
63
80
 
@@ -127,6 +144,28 @@ Then generates broad coverage across:
127
144
 
128
145
  ---
129
146
 
147
+ ## Auth matrix runner (multiple tokens/headers)
148
+
149
+ To run the *same* request under multiple auth contexts (e.g., user/admin tokens), create `auth_matrix.yaml`:
150
+
151
+ ```yaml
152
+ cases:
153
+ - name: user
154
+ headers:
155
+ Authorization: "Bearer USER_TOKEN"
156
+ - name: admin
157
+ headers:
158
+ Authorization: "Bearer ADMIN_TOKEN"
159
+ ```
160
+
161
+ Run:
162
+
163
+ ```bash
164
+ ai-test --input request.json --auth-matrix auth_matrix.yaml
165
+ ```
166
+
167
+ Each auth case is written as a separate report using a `run_label` suffix (e.g. `__auth-user`).
168
+
130
169
  ## Safety mode (recommended for CI/demos)
131
170
 
132
171
  Mutation testing can be noisy and may accidentally stress a real environment.
@@ -178,6 +217,11 @@ Reports include:
178
217
  - `summary.counts_by_failure_type`
179
218
  - `summary.counts_by_status_code`
180
219
  - `summary.slow_tests` (based on SLA)
220
+ - `meta.endpoint_risk_score` + `meta.gate_status`
221
+ - `trend.*` (previous comparison if a prior report exists)
222
+
223
+ A static dashboard index is generated at:
224
+ - `./ai_swarm_reports/index.html` (latest JSON report per endpoint, sorted by regressions/risk)
181
225
 
182
226
  SLA threshold:
183
227
  - `AI_SWARM_SLA_MS` (default: `2000`)
@@ -19,6 +19,7 @@ src/ai_testing_swarm/agents/test_writer_agent.py
19
19
  src/ai_testing_swarm/agents/ui_agent.py
20
20
  src/ai_testing_swarm/core/__init__.py
21
21
  src/ai_testing_swarm/core/api_client.py
22
+ src/ai_testing_swarm/core/auth_matrix.py
22
23
  src/ai_testing_swarm/core/config.py
23
24
  src/ai_testing_swarm/core/curl_parser.py
24
25
  src/ai_testing_swarm/core/openai_client.py
@@ -29,6 +30,8 @@ src/ai_testing_swarm/core/safety.py
29
30
  src/ai_testing_swarm/reporting/__init__.py
30
31
  src/ai_testing_swarm/reporting/dashboard.py
31
32
  src/ai_testing_swarm/reporting/report_writer.py
33
+ src/ai_testing_swarm/reporting/trend.py
34
+ tests/test_batch2_trend_and_auth.py
32
35
  tests/test_openapi_loader.py
33
36
  tests/test_openapi_validator.py
34
37
  tests/test_policy_expected_negatives.py
@@ -1,5 +1,8 @@
1
1
  requests>=2.28
2
2
  PyYAML>=6.0
3
3
 
4
+ [dev]
5
+ pytest>=8.0
6
+
4
7
  [openapi]
5
8
  jsonschema>=4.0
@@ -0,0 +1,73 @@
1
+ import json
2
+ from pathlib import Path
3
+
4
+ import pytest
5
+
6
+ from ai_testing_swarm.core.auth_matrix import load_auth_matrix, merge_auth_headers, AuthCase
7
+ from ai_testing_swarm.core.risk import compute_test_risk_score
8
+ from ai_testing_swarm.reporting.trend import compute_trend
9
+
10
+
11
+ def test_strategy_aware_risk_security_risk_is_100():
12
+ r = {
13
+ "failure_type": "security_risk",
14
+ "status": "FAILED",
15
+ "mutation": {"strategy": "security"},
16
+ "response": {"status_code": 200, "elapsed_ms": 10, "openapi_validation": []},
17
+ }
18
+ assert compute_test_risk_score(r) == 100
19
+
20
+
21
+ def test_strategy_aware_risk_validation_bypass_is_high():
22
+ r = {
23
+ "failure_type": "missing_param_accepted",
24
+ "status": "RISK",
25
+ "mutation": {"strategy": "missing_param"},
26
+ "response": {"status_code": 200, "elapsed_ms": 10, "openapi_validation": []},
27
+ }
28
+ assert compute_test_risk_score(r) >= 80
29
+
30
+
31
+ def test_compute_trend_regressions_detected():
32
+ prev = {
33
+ "meta": {"endpoint_risk_score": 10},
34
+ "results": [
35
+ {"name": "happy_path", "status": "PASSED", "risk_score": 0},
36
+ {"name": "sec_probe", "status": "RISK", "risk_score": 35},
37
+ ],
38
+ }
39
+ cur = {
40
+ "meta": {"endpoint_risk_score": 90},
41
+ "results": [
42
+ {"name": "happy_path", "status": "FAILED", "risk_score": 70},
43
+ {"name": "sec_probe", "status": "RISK", "risk_score": 60},
44
+ ],
45
+ }
46
+ t = compute_trend(cur, prev)
47
+ assert t["has_previous"] is True
48
+ assert t["endpoint_risk_prev"] == 10
49
+ assert t["endpoint_risk_delta"] == 80
50
+ assert t["regression_count"] == 2
51
+
52
+
53
+ def test_auth_matrix_load_and_merge(tmp_path: Path):
54
+ p = tmp_path / "auth.json"
55
+ p.write_text(
56
+ json.dumps(
57
+ {
58
+ "cases": [
59
+ {"name": "user", "headers": {"Authorization": "Bearer U"}},
60
+ {"name": "admin", "headers": {"Authorization": "Bearer A"}},
61
+ ]
62
+ }
63
+ ),
64
+ encoding="utf-8",
65
+ )
66
+
67
+ cases = load_auth_matrix(p)
68
+ assert [c.name for c in cases] == ["user", "admin"]
69
+
70
+ req = {"method": "GET", "url": "https://example.com", "headers": {"X": "1"}}
71
+ req2 = merge_auth_headers(req, cases[0])
72
+ assert req2["headers"]["X"] == "1"
73
+ assert req2["headers"]["Authorization"] == "Bearer U"
@@ -1 +0,0 @@
1
- __version__ = "0.1.15"