automr 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- automr-0.1.0/PKG-INFO +263 -0
- automr-0.1.0/README.md +247 -0
- automr-0.1.0/automr/__init__.py +1 -0
- automr-0.1.0/automr/analysis.py +31 -0
- automr-0.1.0/automr/api.py +320 -0
- automr-0.1.0/automr/comparator.py +22 -0
- automr-0.1.0/automr/core/__init__.py +0 -0
- automr-0.1.0/automr/core/failure_analysis.py +64 -0
- automr-0.1.0/automr/core/model_wrapper.py +23 -0
- automr-0.1.0/automr/core/range_tester.py +88 -0
- automr-0.1.0/automr/core/tester.py +26 -0
- automr-0.1.0/automr/core/visualize.py +309 -0
- automr-0.1.0/automr/relations/__init__.py +0 -0
- automr-0.1.0/automr/relations/behavioral_relations.py +35 -0
- automr-0.1.0/automr/relations/brightness_relation.py +8 -0
- automr-0.1.0/automr/relations/crop_relation.py +6 -0
- automr-0.1.0/automr/relations/flip_relation.py +4 -0
- automr-0.1.0/automr/relations/image_relations.py +98 -0
- automr-0.1.0/automr/relations/inequality_relation.py +6 -0
- automr-0.1.0/automr/relations/noise_relation.py +8 -0
- automr-0.1.0/automr/relations/rotation_relation.py +6 -0
- automr-0.1.0/automr/relations/temporal_relation.py +6 -0
- automr-0.1.0/automr/relations/temporal_relations.py +26 -0
- automr-0.1.0/automr/relations/translation_relation.py +8 -0
- automr-0.1.0/automr/relations/weather_relations.py +40 -0
- automr-0.1.0/automr/transforms/__init__.py +0 -0
- automr-0.1.0/automr/transforms/behavioral_transforms.py +11 -0
- automr-0.1.0/automr/transforms/brightness.py +6 -0
- automr-0.1.0/automr/transforms/crop.py +7 -0
- automr-0.1.0/automr/transforms/geometric.py +5 -0
- automr-0.1.0/automr/transforms/image_transforms.py +44 -0
- automr-0.1.0/automr/transforms/noise.py +14 -0
- automr-0.1.0/automr/transforms/rotation.py +8 -0
- automr-0.1.0/automr/transforms/temporal_transforms.py +17 -0
- automr-0.1.0/automr/transforms/translation.py +8 -0
- automr-0.1.0/automr/transforms/weather_transforms.py +69 -0
- automr-0.1.0/automr.egg-info/PKG-INFO +263 -0
- automr-0.1.0/automr.egg-info/SOURCES.txt +41 -0
- automr-0.1.0/automr.egg-info/dependency_links.txt +1 -0
- automr-0.1.0/automr.egg-info/requires.txt +6 -0
- automr-0.1.0/automr.egg-info/top_level.txt +1 -0
- automr-0.1.0/pyproject.toml +34 -0
- automr-0.1.0/setup.cfg +4 -0
automr-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: automr
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Metamorphic Testing Framework for Regression-based Autonomous Driving ML/AI Models
|
|
5
|
+
Author: Charith Manujaya, Raveesha Peiris, Thurunu Pabasara, Tharika Akurana
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/CharithManaujayaMUTEC/AutoMR-Framework
|
|
8
|
+
Requires-Python: >=3.8
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
Requires-Dist: numpy
|
|
11
|
+
Requires-Dist: pandas
|
|
12
|
+
Requires-Dist: tqdm
|
|
13
|
+
Requires-Dist: opencv-python
|
|
14
|
+
Requires-Dist: tensorflow
|
|
15
|
+
Requires-Dist: python-dotenv
|
|
16
|
+
|
|
17
|
+
<div align="center">
|
|
18
|
+
<img src="automrlogo.png" width="400"/>
|
|
19
|
+
<br/><br/>
|
|
20
|
+
<img src="https://img.shields.io/badge/Python-3.8+-3776AB?style=flat-square&logo=python&logoColor=white"/>
|
|
21
|
+
<img src="https://img.shields.io/badge/License-MIT-22c55e?style=flat-square"/>
|
|
22
|
+
<img src="https://img.shields.io/badge/Status-Active-22c55e?style=flat-square"/>
|
|
23
|
+
<img src="https://img.shields.io/badge/Domain-Autonomous%20Driving-7c6fcd?style=flat-square"/>
|
|
24
|
+
<br/><br/>
|
|
25
|
+
<p>Model agnostic + Input agnostic + Output agnostic Metamorphic testing framework for regressional autonomous driving AI/ML models.</p>
|
|
26
|
+
</div>
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Overview
|
|
31
|
+
|
|
32
|
+
AutoMR evaluates ML models by verifying **metamorphic relations (MRs)** — expected behavioral properties that must hold under controlled input transformations. Instead of checking exact outputs against ground truth, AutoMR checks whether the model behaves _consistently_ when inputs are perturbed in predictable ways.
|
|
33
|
+
|
|
34
|
+
| Problem | What AutoMR does |
|
|
35
|
+
| ------------------------ | -------------------------------------------------------- |
|
|
36
|
+
| No labeled data | Tests models without any ground-truth labels |
|
|
37
|
+
| Real-world perturbations | Measures robustness under realistic noise and conditions |
|
|
38
|
+
| Silent failures | Pinpoints when and how models begin to fail |
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## Key Features
|
|
43
|
+
|
|
44
|
+
- **Model-agnostic** — works with TensorFlow, PyTorch, sklearn, or any custom model
|
|
45
|
+
- **Input-agnostic** — supports images, text, tabular, and sequential data
|
|
46
|
+
- **Output-agnostic** — handles regression and classification outputs
|
|
47
|
+
- **Built-in MR pipeline** — end-to-end execution with zero boilerplate
|
|
48
|
+
- **Parametric testing** — sweep transformation parameters across configurable ranges
|
|
49
|
+
- **Automated analysis** — failure rate, severity scores, and worst-case detection
|
|
50
|
+
- **Automatic CSV export** — all results persisted without manual intervention
|
|
51
|
+
- **Optional progress tracking** — live progress bars for long-running tests
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## Installation
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
git clone https://github.com/CharithManaujayaMUTEC/AutoMR-Framework.git
|
|
59
|
+
cd AutoMR-Framework
|
|
60
|
+
|
|
61
|
+
python -m venv venv
|
|
62
|
+
venv\Scripts\activate
|
|
63
|
+
|
|
64
|
+
pip install -r requirements.txt
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
## Quick Start
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from automr.api import AutoMR
|
|
73
|
+
|
|
74
|
+
automr = AutoMR(model)
|
|
75
|
+
|
|
76
|
+
df, results = automr.run_full_test(
|
|
77
|
+
dataset,
|
|
78
|
+
max_samples=2000,
|
|
79
|
+
samples_per_mr=5,
|
|
80
|
+
show_progress=True
|
|
81
|
+
)
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## Execution Flow
|
|
87
|
+
|
|
88
|
+
```
|
|
89
|
+
1. Load dataset → user-defined input source
|
|
90
|
+
2. Load model → any model exposing predict(x)
|
|
91
|
+
3. Apply transforms → brightness, rotation, noise, fog, ...
|
|
92
|
+
4. Generate outputs → original vs. transformed predictions
|
|
93
|
+
5. Validate MRs → check expected behavioral properties
|
|
94
|
+
6. Analyze results → failure rate, severity, worst cases
|
|
95
|
+
7. Export → CSV files written to /results
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## Output Files
|
|
101
|
+
|
|
102
|
+
All results are saved automatically to the `/results` directory.
|
|
103
|
+
|
|
104
|
+
| File | Description |
|
|
105
|
+
| ---------------------- | ------------------------------------------ |
|
|
106
|
+
| `automr_results.csv` | Full per-sample test log |
|
|
107
|
+
| `failure_summary.csv` | Failure rate per metamorphic relation |
|
|
108
|
+
| `severity_summary.csv` | Average output deviation per MR |
|
|
109
|
+
| `worst_cases.csv` | Samples with highest deviation |
|
|
110
|
+
| `failure_regions.txt` | Parametric boundaries where failures occur |
|
|
111
|
+
|
|
112
|
+
### Output columns
|
|
113
|
+
|
|
114
|
+
| Column | Description |
|
|
115
|
+
| ------------------- | --------------------------------- |
|
|
116
|
+
| `mr` | Metamorphic relation identifier |
|
|
117
|
+
| `param` | Transformation parameter value |
|
|
118
|
+
| `original` | Original model prediction |
|
|
119
|
+
| `transformed` | Prediction after transformation |
|
|
120
|
+
| `difference` | Raw output difference |
|
|
121
|
+
| `percent_change` | Percentage change between outputs |
|
|
122
|
+
| `status` | `PASS` / `FAIL` |
|
|
123
|
+
| `expected_behavior` | Expected MR rule |
|
|
124
|
+
| `actual_behavior` | `Consistent` / `Violation` |
|
|
125
|
+
| `sample_id` | Input sample index |
|
|
126
|
+
|
|
127
|
+
---
|
|
128
|
+
|
|
129
|
+
## Metamorphic Relations
|
|
130
|
+
|
|
131
|
+
| Relation | Description | Type |
|
|
132
|
+
| --------------------- | ------------------------------------ | ---------- |
|
|
133
|
+
| `BrightnessRelation` | Output invariant to lighting changes | Invariance |
|
|
134
|
+
| `RotationRelation` | Stable under small rotations | Invariance |
|
|
135
|
+
| `TranslationRelation` | Stable under image shifts | Invariance |
|
|
136
|
+
| `NoiseRelation` | Robust to random noise | Robustness |
|
|
137
|
+
| `FogRelation` | Robust to visibility degradation | Robustness |
|
|
138
|
+
| `TemporalSmoothness` | Consistent outputs across frames | Monotonic |
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## Transformations
|
|
143
|
+
|
|
144
|
+
| Transform | Description |
|
|
145
|
+
| ----------- | ------------------------- |
|
|
146
|
+
| Brightness | Adjust pixel intensity |
|
|
147
|
+
| Rotation | Rotate image by angle |
|
|
148
|
+
| Translation | Shift image spatially |
|
|
149
|
+
| Noise | Add random Gaussian noise |
|
|
150
|
+
| Fog / Rain | Simulate adverse weather |
|
|
151
|
+
| Blur | Apply smoothing filter |
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
## Design Principles
|
|
156
|
+
|
|
157
|
+
### Model-agnostic
|
|
158
|
+
|
|
159
|
+
Works with any model that implements a `predict(x)` interface:
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
# TensorFlow, PyTorch, sklearn, or fully custom — all compatible
|
|
163
|
+
output = model.predict(input)
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### Input-agnostic
|
|
167
|
+
|
|
168
|
+
Accepts any input type — images, sequences, tabular data, or custom formats. Transformations are applied modularly and do not depend on input structure.
|
|
169
|
+
|
|
170
|
+
### Modular architecture
|
|
171
|
+
|
|
172
|
+
| Component | Role |
|
|
173
|
+
| ----------- | -------------------------------------- |
|
|
174
|
+
| `Model` | Generates predictions |
|
|
175
|
+
| `Transform` | Modifies input samples |
|
|
176
|
+
| `Relation` | Defines expected behavioral properties |
|
|
177
|
+
| `Analyzer` | Computes failure metrics and summaries |
|
|
178
|
+
|
|
179
|
+
---
|
|
180
|
+
|
|
181
|
+
## Project Structure
|
|
182
|
+
|
|
183
|
+
```
|
|
184
|
+
AutoMR-Framework/
|
|
185
|
+
│
|
|
186
|
+
├── automr/
|
|
187
|
+
│ ├── api.py
|
|
188
|
+
│ ├── comparator.py
|
|
189
|
+
│ │
|
|
190
|
+
│ ├── core/
|
|
191
|
+
│ │ ├── range_tester.py
|
|
192
|
+
│ │ └── failure_analysis.py
|
|
193
|
+
│ │
|
|
194
|
+
│ ├── relations/
|
|
195
|
+
│ ├── transforms/
|
|
196
|
+
│ └── analysis/
|
|
197
|
+
│
|
|
198
|
+
├── run_test_example.py
|
|
199
|
+
├── requirements.txt
|
|
200
|
+
├── .gitignore
|
|
201
|
+
└── automrlogo.png
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
---
|
|
205
|
+
|
|
206
|
+
## Example Run
|
|
207
|
+
|
|
208
|
+
```
|
|
209
|
+
Running AutoMR: ██████████████ 100%
|
|
210
|
+
|
|
211
|
+
=== AutoMR Results ===
|
|
212
|
+
Failure Summary:
|
|
213
|
+
BrightnessRelation → 12.4% failure rate | avg deviation: 0.031
|
|
214
|
+
RotationRelation → 8.7% failure rate | avg deviation: 0.019
|
|
215
|
+
NoiseRelation → 21.1% failure rate | avg deviation: 0.074
|
|
216
|
+
FogRelation → 34.2% failure rate | avg deviation: 0.112
|
|
217
|
+
|
|
218
|
+
DONE: Results saved in /results
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
---
|
|
222
|
+
|
|
223
|
+
## Built-in Analysis
|
|
224
|
+
|
|
225
|
+
AutoMR automatically computes the following after each test run:
|
|
226
|
+
|
|
227
|
+
- **Failure rate** per metamorphic relation
|
|
228
|
+
- **Severity** — average output deviation across failures
|
|
229
|
+
- **Worst-case failures** — samples with the largest deviations
|
|
230
|
+
- **Failure regions** — parameter ranges where the model is most unstable
|
|
231
|
+
|
|
232
|
+
---
|
|
233
|
+
|
|
234
|
+
## Limitations
|
|
235
|
+
|
|
236
|
+
- Current transformation suite is image-focused
|
|
237
|
+
- Comparator thresholds require manual tuning per task
|
|
238
|
+
- End-to-end performance depends on model inference speed
|
|
239
|
+
|
|
240
|
+
---
|
|
241
|
+
|
|
242
|
+
## Future Work
|
|
243
|
+
|
|
244
|
+
- NLP and tabular transformation extensions
|
|
245
|
+
- Classification-specific comparators
|
|
246
|
+
- Streamlit dashboard for interactive analysis
|
|
247
|
+
- Cross-model MR testing
|
|
248
|
+
- Automated result visualizations (plots and charts)
|
|
249
|
+
|
|
250
|
+
---
|
|
251
|
+
|
|
252
|
+
## Authors
|
|
253
|
+
|
|
254
|
+
**CharithManaujayaMUTEC** — [github.com/CharithManaujayaMUTEC](https://github.com/CharithManaujayaMUTEC)
|
|
255
|
+
**RaveeshaPeiris** — [github.com/RaveeshaPeiris](https://github.com/RaveeshaPeiris)
|
|
256
|
+
|
|
257
|
+
> Final Year Project — Metamorphic Testing Framework for Regressional Based Autonomous Driving AI/ML Models
|
|
258
|
+
|
|
259
|
+
---
|
|
260
|
+
|
|
261
|
+
## License
|
|
262
|
+
|
|
263
|
+
Released under the [MIT License](LICENSE).
|
automr-0.1.0/README.md
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
<img src="automrlogo.png" width="400"/>
|
|
3
|
+
<br/><br/>
|
|
4
|
+
<img src="https://img.shields.io/badge/Python-3.8+-3776AB?style=flat-square&logo=python&logoColor=white"/>
|
|
5
|
+
<img src="https://img.shields.io/badge/License-MIT-22c55e?style=flat-square"/>
|
|
6
|
+
<img src="https://img.shields.io/badge/Status-Active-22c55e?style=flat-square"/>
|
|
7
|
+
<img src="https://img.shields.io/badge/Domain-Autonomous%20Driving-7c6fcd?style=flat-square"/>
|
|
8
|
+
<br/><br/>
|
|
9
|
+
<p>Model agnostic + Input agnostic + Output agnostic Metamorphic testing framework for regressional autonomous driving AI/ML models.</p>
|
|
10
|
+
</div>
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## Overview
|
|
15
|
+
|
|
16
|
+
AutoMR evaluates ML models by verifying **metamorphic relations (MRs)** — expected behavioral properties that must hold under controlled input transformations. Instead of checking exact outputs against ground truth, AutoMR checks whether the model behaves _consistently_ when inputs are perturbed in predictable ways.
|
|
17
|
+
|
|
18
|
+
| Problem | What AutoMR does |
|
|
19
|
+
| ------------------------ | -------------------------------------------------------- |
|
|
20
|
+
| No labeled data | Tests models without any ground-truth labels |
|
|
21
|
+
| Real-world perturbations | Measures robustness under realistic noise and conditions |
|
|
22
|
+
| Silent failures | Pinpoints when and how models begin to fail |
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Key Features
|
|
27
|
+
|
|
28
|
+
- **Model-agnostic** — works with TensorFlow, PyTorch, sklearn, or any custom model
|
|
29
|
+
- **Input-agnostic** — supports images, text, tabular, and sequential data
|
|
30
|
+
- **Output-agnostic** — handles regression and classification outputs
|
|
31
|
+
- **Built-in MR pipeline** — end-to-end execution with zero boilerplate
|
|
32
|
+
- **Parametric testing** — sweep transformation parameters across configurable ranges
|
|
33
|
+
- **Automated analysis** — failure rate, severity scores, and worst-case detection
|
|
34
|
+
- **Automatic CSV export** — all results persisted without manual intervention
|
|
35
|
+
- **Optional progress tracking** — live progress bars for long-running tests
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## Installation
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
git clone https://github.com/CharithManaujayaMUTEC/AutoMR-Framework.git
|
|
43
|
+
cd AutoMR-Framework
|
|
44
|
+
|
|
45
|
+
python -m venv venv
|
|
46
|
+
venv\Scripts\activate
|
|
47
|
+
|
|
48
|
+
pip install -r requirements.txt
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Quick Start
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
from automr.api import AutoMR
|
|
57
|
+
|
|
58
|
+
automr = AutoMR(model)
|
|
59
|
+
|
|
60
|
+
df, results = automr.run_full_test(
|
|
61
|
+
dataset,
|
|
62
|
+
max_samples=2000,
|
|
63
|
+
samples_per_mr=5,
|
|
64
|
+
show_progress=True
|
|
65
|
+
)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## Execution Flow
|
|
71
|
+
|
|
72
|
+
```
|
|
73
|
+
1. Load dataset → user-defined input source
|
|
74
|
+
2. Load model → any model exposing predict(x)
|
|
75
|
+
3. Apply transforms → brightness, rotation, noise, fog, ...
|
|
76
|
+
4. Generate outputs → original vs. transformed predictions
|
|
77
|
+
5. Validate MRs → check expected behavioral properties
|
|
78
|
+
6. Analyze results → failure rate, severity, worst cases
|
|
79
|
+
7. Export → CSV files written to /results
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
## Output Files
|
|
85
|
+
|
|
86
|
+
All results are saved automatically to the `/results` directory.
|
|
87
|
+
|
|
88
|
+
| File | Description |
|
|
89
|
+
| ---------------------- | ------------------------------------------ |
|
|
90
|
+
| `automr_results.csv` | Full per-sample test log |
|
|
91
|
+
| `failure_summary.csv` | Failure rate per metamorphic relation |
|
|
92
|
+
| `severity_summary.csv` | Average output deviation per MR |
|
|
93
|
+
| `worst_cases.csv` | Samples with highest deviation |
|
|
94
|
+
| `failure_regions.txt` | Parametric boundaries where failures occur |
|
|
95
|
+
|
|
96
|
+
### Output columns
|
|
97
|
+
|
|
98
|
+
| Column | Description |
|
|
99
|
+
| ------------------- | --------------------------------- |
|
|
100
|
+
| `mr` | Metamorphic relation identifier |
|
|
101
|
+
| `param` | Transformation parameter value |
|
|
102
|
+
| `original` | Original model prediction |
|
|
103
|
+
| `transformed` | Prediction after transformation |
|
|
104
|
+
| `difference` | Raw output difference |
|
|
105
|
+
| `percent_change` | Percentage change between outputs |
|
|
106
|
+
| `status` | `PASS` / `FAIL` |
|
|
107
|
+
| `expected_behavior` | Expected MR rule |
|
|
108
|
+
| `actual_behavior` | `Consistent` / `Violation` |
|
|
109
|
+
| `sample_id` | Input sample index |
|
|
110
|
+
|
|
111
|
+
---
|
|
112
|
+
|
|
113
|
+
## Metamorphic Relations
|
|
114
|
+
|
|
115
|
+
| Relation | Description | Type |
|
|
116
|
+
| --------------------- | ------------------------------------ | ---------- |
|
|
117
|
+
| `BrightnessRelation` | Output invariant to lighting changes | Invariance |
|
|
118
|
+
| `RotationRelation` | Stable under small rotations | Invariance |
|
|
119
|
+
| `TranslationRelation` | Stable under image shifts | Invariance |
|
|
120
|
+
| `NoiseRelation` | Robust to random noise | Robustness |
|
|
121
|
+
| `FogRelation` | Robust to visibility degradation | Robustness |
|
|
122
|
+
| `TemporalSmoothness` | Consistent outputs across frames | Monotonic |
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## Transformations
|
|
127
|
+
|
|
128
|
+
| Transform | Description |
|
|
129
|
+
| ----------- | ------------------------- |
|
|
130
|
+
| Brightness | Adjust pixel intensity |
|
|
131
|
+
| Rotation | Rotate image by angle |
|
|
132
|
+
| Translation | Shift image spatially |
|
|
133
|
+
| Noise | Add random Gaussian noise |
|
|
134
|
+
| Fog / Rain | Simulate adverse weather |
|
|
135
|
+
| Blur | Apply smoothing filter |
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
## Design Principles
|
|
140
|
+
|
|
141
|
+
### Model-agnostic
|
|
142
|
+
|
|
143
|
+
Works with any model that implements a `predict(x)` interface:
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
# TensorFlow, PyTorch, sklearn, or fully custom — all compatible
|
|
147
|
+
output = model.predict(input)
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### Input-agnostic
|
|
151
|
+
|
|
152
|
+
Accepts any input type — images, sequences, tabular data, or custom formats. Transformations are applied modularly and do not depend on input structure.
|
|
153
|
+
|
|
154
|
+
### Modular architecture
|
|
155
|
+
|
|
156
|
+
| Component | Role |
|
|
157
|
+
| ----------- | -------------------------------------- |
|
|
158
|
+
| `Model` | Generates predictions |
|
|
159
|
+
| `Transform` | Modifies input samples |
|
|
160
|
+
| `Relation` | Defines expected behavioral properties |
|
|
161
|
+
| `Analyzer` | Computes failure metrics and summaries |
|
|
162
|
+
|
|
163
|
+
---
|
|
164
|
+
|
|
165
|
+
## Project Structure
|
|
166
|
+
|
|
167
|
+
```
|
|
168
|
+
AutoMR-Framework/
|
|
169
|
+
│
|
|
170
|
+
├── automr/
|
|
171
|
+
│ ├── api.py
|
|
172
|
+
│ ├── comparator.py
|
|
173
|
+
│ │
|
|
174
|
+
│ ├── core/
|
|
175
|
+
│ │ ├── range_tester.py
|
|
176
|
+
│ │ └── failure_analysis.py
|
|
177
|
+
│ │
|
|
178
|
+
│ ├── relations/
|
|
179
|
+
│ ├── transforms/
|
|
180
|
+
│ └── analysis/
|
|
181
|
+
│
|
|
182
|
+
├── run_test_example.py
|
|
183
|
+
├── requirements.txt
|
|
184
|
+
├── .gitignore
|
|
185
|
+
└── automrlogo.png
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
---
|
|
189
|
+
|
|
190
|
+
## Example Run
|
|
191
|
+
|
|
192
|
+
```
|
|
193
|
+
Running AutoMR: ██████████████ 100%
|
|
194
|
+
|
|
195
|
+
=== AutoMR Results ===
|
|
196
|
+
Failure Summary:
|
|
197
|
+
BrightnessRelation → 12.4% failure rate | avg deviation: 0.031
|
|
198
|
+
RotationRelation → 8.7% failure rate | avg deviation: 0.019
|
|
199
|
+
NoiseRelation → 21.1% failure rate | avg deviation: 0.074
|
|
200
|
+
FogRelation → 34.2% failure rate | avg deviation: 0.112
|
|
201
|
+
|
|
202
|
+
DONE: Results saved in /results
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
---
|
|
206
|
+
|
|
207
|
+
## Built-in Analysis
|
|
208
|
+
|
|
209
|
+
AutoMR automatically computes the following after each test run:
|
|
210
|
+
|
|
211
|
+
- **Failure rate** per metamorphic relation
|
|
212
|
+
- **Severity** — average output deviation across failures
|
|
213
|
+
- **Worst-case failures** — samples with the largest deviations
|
|
214
|
+
- **Failure regions** — parameter ranges where the model is most unstable
|
|
215
|
+
|
|
216
|
+
---
|
|
217
|
+
|
|
218
|
+
## Limitations
|
|
219
|
+
|
|
220
|
+
- Current transformation suite is image-focused
|
|
221
|
+
- Comparator thresholds require manual tuning per task
|
|
222
|
+
- End-to-end performance depends on model inference speed
|
|
223
|
+
|
|
224
|
+
---
|
|
225
|
+
|
|
226
|
+
## Future Work
|
|
227
|
+
|
|
228
|
+
- NLP and tabular transformation extensions
|
|
229
|
+
- Classification-specific comparators
|
|
230
|
+
- Streamlit dashboard for interactive analysis
|
|
231
|
+
- Cross-model MR testing
|
|
232
|
+
- Automated result visualizations (plots and charts)
|
|
233
|
+
|
|
234
|
+
---
|
|
235
|
+
|
|
236
|
+
## Authors
|
|
237
|
+
|
|
238
|
+
**CharithManaujayaMUTEC** — [github.com/CharithManaujayaMUTEC](https://github.com/CharithManaujayaMUTEC)
|
|
239
|
+
**RaveeshaPeiris** — [github.com/RaveeshaPeiris](https://github.com/RaveeshaPeiris)
|
|
240
|
+
|
|
241
|
+
> Final Year Project — Metamorphic Testing Framework for Regressional Based Autonomous Driving AI/ML Models
|
|
242
|
+
|
|
243
|
+
---
|
|
244
|
+
|
|
245
|
+
## License
|
|
246
|
+
|
|
247
|
+
Released under the [MIT License](LICENSE).
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .api import AutoMR
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
class Analyzer:
|
|
3
|
+
|
|
4
|
+
def to_dataframe(self, results):
|
|
5
|
+
if not results:
|
|
6
|
+
return pd.DataFrame()
|
|
7
|
+
|
|
8
|
+
df = pd.DataFrame(results)
|
|
9
|
+
df["status"] = df["passed"].apply(lambda x: "PASS" if x else "FAIL")
|
|
10
|
+
return df
|
|
11
|
+
|
|
12
|
+
def summary(self, df):
|
|
13
|
+
|
|
14
|
+
if df.empty:
|
|
15
|
+
return {
|
|
16
|
+
"total": 0,
|
|
17
|
+
"passed": 0,
|
|
18
|
+
"failed": 0,
|
|
19
|
+
"pass_rate": 0.0
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
total = len(df)
|
|
23
|
+
passed = df["passed"].sum()
|
|
24
|
+
failed = total - passed
|
|
25
|
+
|
|
26
|
+
return {
|
|
27
|
+
"total": total,
|
|
28
|
+
"passed": int(passed),
|
|
29
|
+
"failed": int(failed),
|
|
30
|
+
"pass_rate": float(passed / total * 100)
|
|
31
|
+
}
|