pytest-flakemark 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pytest_flakemark-1.0.0/LICENSE +21 -0
- pytest_flakemark-1.0.0/PKG-INFO +263 -0
- pytest_flakemark-1.0.0/README.md +228 -0
- pytest_flakemark-1.0.0/flakemark/__init__.py +11 -0
- pytest_flakemark-1.0.0/flakemark/differ/__init__.py +1 -0
- pytest_flakemark-1.0.0/flakemark/differ/divergence.py +476 -0
- pytest_flakemark-1.0.0/flakemark/engine.py +275 -0
- pytest_flakemark-1.0.0/flakemark/pytest_plugin.py +91 -0
- pytest_flakemark-1.0.0/flakemark/tracer/__init__.py +1 -0
- pytest_flakemark-1.0.0/flakemark/tracer/instrument.py +383 -0
- pytest_flakemark-1.0.0/pytest_flakemark.egg-info/PKG-INFO +263 -0
- pytest_flakemark-1.0.0/pytest_flakemark.egg-info/SOURCES.txt +17 -0
- pytest_flakemark-1.0.0/pytest_flakemark.egg-info/dependency_links.txt +1 -0
- pytest_flakemark-1.0.0/pytest_flakemark.egg-info/entry_points.txt +2 -0
- pytest_flakemark-1.0.0/pytest_flakemark.egg-info/requires.txt +1 -0
- pytest_flakemark-1.0.0/pytest_flakemark.egg-info/top_level.txt +1 -0
- pytest_flakemark-1.0.0/setup.cfg +4 -0
- pytest_flakemark-1.0.0/setup.py +53 -0
- pytest_flakemark-1.0.0/tests/test_accuracy.py +207 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Khushdeep Sharma, Chitkara University
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pytest-flakemark
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: FLAKEMARK — Differential execution tracer that finds the exact file, line, and root cause of any flaky test.
|
|
5
|
+
Home-page: https://github.com/khushsharma509/flakemark
|
|
6
|
+
Author: Khushdeep Sharma
|
|
7
|
+
Author-email: itskhushsharma@gmail.com
|
|
8
|
+
Project-URL: Bug Reports, https://github.com/khushsharma509/flakemark/issues
|
|
9
|
+
Project-URL: Source, https://github.com/khushsharma509/flakemark
|
|
10
|
+
Keywords: pytest,flaky-tests,flakemark,debugging,root-cause,differential-analysis,test-diagnosis,execution-trace
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Framework :: Pytest
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Topic :: Software Development :: Testing
|
|
15
|
+
Classifier: Topic :: Software Development :: Debuggers
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Requires-Python: >=3.11
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: pytest>=7.0
|
|
23
|
+
Dynamic: author
|
|
24
|
+
Dynamic: author-email
|
|
25
|
+
Dynamic: classifier
|
|
26
|
+
Dynamic: description
|
|
27
|
+
Dynamic: description-content-type
|
|
28
|
+
Dynamic: home-page
|
|
29
|
+
Dynamic: keywords
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
Dynamic: project-url
|
|
32
|
+
Dynamic: requires-dist
|
|
33
|
+
Dynamic: requires-python
|
|
34
|
+
Dynamic: summary
|
|
35
|
+
|
|
36
|
+
# FLAKEMARK
|
|
37
|
+
|
|
38
|
+
**pytest-flakemark — Find the exact line where your flaky test breaks.**
|
|
39
|
+
|
|
40
|
+
Not "your test is flaky." The actual file. The actual line. The actual fix.
|
|
41
|
+
|
|
42
|
+
Built by Khushdeep Sharma — Chitkara University, 2026.
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## The Problem
|
|
47
|
+
|
|
48
|
+
```
|
|
49
|
+
FAILED tests/test_login.py::test_user_session
|
|
50
|
+
[Flaky — rerunning]
|
|
51
|
+
PASSED tests/test_login.py::test_user_session
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Every existing tool gives you this. It tells you nothing new.
|
|
55
|
+
|
|
56
|
+
## What FLAKEMARK Gives You
|
|
57
|
+
|
|
58
|
+
```
|
|
59
|
+
--------------------------------------------------------
|
|
60
|
+
FLAKEMARK - Flaky Test Root Cause Found
|
|
61
|
+
--------------------------------------------------------
|
|
62
|
+
File: tests/test_login.py
|
|
63
|
+
Line: 47
|
|
64
|
+
Function: test_user_session
|
|
65
|
+
Type: timing_delta
|
|
66
|
+
Cause: Race condition or timing dependency
|
|
67
|
+
|
|
68
|
+
Detail: Line 47: 1.2ms (pass) vs 148.3ms (fail) — 124x timing difference.
|
|
69
|
+
|
|
70
|
+
Fix: Replace time.sleep(N) with threading.Event().wait()
|
|
71
|
+
or asyncio.wait_for(). Never hardcode sleep durations in tests.
|
|
72
|
+
|
|
73
|
+
Confidence: 85% | Total divergences: 1
|
|
74
|
+
--------------------------------------------------------
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## How FLAKEMARK Works
|
|
80
|
+
|
|
81
|
+
FLAKEMARK instruments your test at the AST level, runs it twice simultaneously,
|
|
82
|
+
records every operation in both runs, then finds the exact line where the two
|
|
83
|
+
executions diverged. That divergence is your bug.
|
|
84
|
+
|
|
85
|
+
```
|
|
86
|
+
Your test
|
|
87
|
+
├── Run 1 (instrumented) → ExecutionTrace A [op, op, op ...]
|
|
88
|
+
└── Run 2 (instrumented) → ExecutionTrace B [op, op, op ...]
|
|
89
|
+
↓
|
|
90
|
+
DifferentialAnalyser
|
|
91
|
+
Two-pointer trace walk
|
|
92
|
+
↓
|
|
93
|
+
"Line 47: 124x timing difference"
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## Install
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
pip install pytest-flakemark
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
Or use directly (no install needed):
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
git clone https://github.com/khushsharma509/flakemark
|
|
108
|
+
cd flakemark
|
|
109
|
+
# Add to PYTHONPATH and use
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
## Usage — 4 Ways
|
|
115
|
+
|
|
116
|
+
### 1. Source string
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
from flakemark import FlakeMark
|
|
120
|
+
|
|
121
|
+
source = """
|
|
122
|
+
import random
|
|
123
|
+
def test_flaky():
|
|
124
|
+
result = random.randint(0, 1)
|
|
125
|
+
assert result == 1
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
report = FlakeMark.diagnose_source(source, "test_flaky", runs=10)
|
|
129
|
+
print(report)
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
### 2. Real test file
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
from flakemark import FlakeMark
|
|
136
|
+
|
|
137
|
+
report = FlakeMark.diagnose_file(
|
|
138
|
+
filepath = "tests/test_api.py",
|
|
139
|
+
test_func_name = "test_user_session",
|
|
140
|
+
runs = 6,
|
|
141
|
+
project_root = "/path/to/your/project",
|
|
142
|
+
)
|
|
143
|
+
print(report)
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### 3. Batch scan entire folder
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
from flakemark import FlakeMark
|
|
150
|
+
|
|
151
|
+
results = FlakeMark.diagnose_batch("tests/", runs=4)
|
|
152
|
+
|
|
153
|
+
flaky = {k: v for k, v in results.items() if v.is_found()}
|
|
154
|
+
print(f"FLAKEMARK found {len(flaky)} flaky tests:\n")
|
|
155
|
+
for name, report in flaky.items():
|
|
156
|
+
print(f" {name}")
|
|
157
|
+
print(f" Line {report.primary.line} — {report.primary.divergence_type.value}")
|
|
158
|
+
print(f" Fix: {report.primary.fix[:60]}")
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
### 4. pytest CLI (after pip install)
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
pytest --flakemark-diagnose tests/
|
|
165
|
+
pytest --flakemark-diagnose --flakemark-runs=8 tests/test_api.py
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
---
|
|
169
|
+
|
|
170
|
+
## What FLAKEMARK Detects
|
|
171
|
+
|
|
172
|
+
| Type | What it means | Root cause |
|
|
173
|
+
|---|---|---|
|
|
174
|
+
| `value_mismatch` | Same line, different value | `random`, shared state |
|
|
175
|
+
| `timing_delta` | Same op, 3x+ slower | `time.sleep()`, race condition |
|
|
176
|
+
| `thread_race` | Same op, different thread | Missing `Lock()` |
|
|
177
|
+
| `sequence_break` | Different execution path | Test order dependency |
|
|
178
|
+
| `missing_event` | One run skipped an operation | Conditional on external state |
|
|
179
|
+
| `early_termination` | One run ended much sooner | Timeout, unhandled exception |
|
|
180
|
+
|
|
181
|
+
---
|
|
182
|
+
|
|
183
|
+
## Parameters
|
|
184
|
+
|
|
185
|
+
| Parameter | Default | Meaning |
|
|
186
|
+
|---|---|---|
|
|
187
|
+
| `runs` | `4` | Times to run. Use `10+` for low-frequency flakes |
|
|
188
|
+
| `timeout` | `30` | Seconds before a run is killed |
|
|
189
|
+
| `project_root` | `os.getcwd()` | Project root so imports work |
|
|
190
|
+
|
|
191
|
+
---
|
|
192
|
+
|
|
193
|
+
## Accuracy
|
|
194
|
+
|
|
195
|
+
```bash
|
|
196
|
+
python tests/test_accuracy.py
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
```
|
|
200
|
+
Score: 15/15 = 100%
|
|
201
|
+
✓ VALUE_MISMATCH detected (78% confidence)
|
|
202
|
+
✓ TIMING_DELTA detected (85% confidence)
|
|
203
|
+
✓ THREAD_RACE detected (93% confidence)
|
|
204
|
+
✓ EARLY_TERMINATION detected (55% confidence)
|
|
205
|
+
✓ No false positive on stable test (0%)
|
|
206
|
+
✓ Async test handled correctly
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
---
|
|
210
|
+
|
|
211
|
+
## Comparison to Other Tools
|
|
212
|
+
|
|
213
|
+
| | FLAKEMARK | FlakyGuard | pytest-randomly | CANNIER | Divergent |
|
|
214
|
+
|---|---|---|---|---|---|
|
|
215
|
+
| Finds exact root cause line | **Yes** | No | No | No | Yes (JS only) |
|
|
216
|
+
| Python / pytest | **Yes** | No (Java) | Yes | Yes | No (JS) |
|
|
217
|
+
| AST instrumentation | **Yes** | No | No | No | Partial |
|
|
218
|
+
| Subprocess isolation | **Yes** | No | No | No | No |
|
|
219
|
+
| Async test support | **Yes** | No | Yes | No | No |
|
|
220
|
+
| Zero dependencies | **Yes** | No | No | No | No |
|
|
221
|
+
|
|
222
|
+
---
|
|
223
|
+
|
|
224
|
+
## Project Structure
|
|
225
|
+
|
|
226
|
+
```
|
|
227
|
+
flakemark/
|
|
228
|
+
├── flakemark/
|
|
229
|
+
│ ├── __init__.py FlakeMark, DivergenceReport, DivergenceType
|
|
230
|
+
│ ├── engine.py FlakeMark class — diagnose_file/source/batch
|
|
231
|
+
│ ├── pytest_plugin.py pytest plugin — --flakemark-diagnose flag
|
|
232
|
+
│ ├── tracer/
|
|
233
|
+
│ │ └── instrument.py TraceInserter + run_instrumented() ← CORE IP
|
|
234
|
+
│ └── differ/
|
|
235
|
+
│ └── divergence.py DifferentialAnalyser + DivergenceType ← CORE IP
|
|
236
|
+
├── tests/
|
|
237
|
+
│ └── test_accuracy.py Accuracy test suite
|
|
238
|
+
├── setup.py PyPI config
|
|
239
|
+
└── README.md
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
---
|
|
243
|
+
|
|
244
|
+
## What to Copyright
|
|
245
|
+
|
|
246
|
+
| File | What is original |
|
|
247
|
+
|---|---|
|
|
248
|
+
| `tracer/instrument.py` — `TraceInserter` | AST NodeTransformer for execution recording |
|
|
249
|
+
| `tracer/instrument.py` — `run_instrumented()` | Subprocess isolation runner with JSON protocol |
|
|
250
|
+
| `differ/divergence.py` — `DifferentialAnalyser._walk()` | Two-pointer differential trace walk |
|
|
251
|
+
| `differ/divergence.py` — `DivergenceType` | Six-category flakiness classification schema |
|
|
252
|
+
| `differ/divergence.py` — `_score()` | Confidence scoring formula |
|
|
253
|
+
| `engine.py` — `FlakeMark._collect_traces()` | Concurrent dual-run orchestration |
|
|
254
|
+
|
|
255
|
+
---
|
|
256
|
+
|
|
257
|
+
## License
|
|
258
|
+
|
|
259
|
+
MIT — Copyright © 2026 Khushdeep Sharma, Chitkara University
|
|
260
|
+
|
|
261
|
+
---
|
|
262
|
+
|
|
263
|
+
*FLAKEMARK — Find the line. Fix the test.*
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
# FLAKEMARK
|
|
2
|
+
|
|
3
|
+
**pytest-flakemark — Find the exact line where your flaky test breaks.**
|
|
4
|
+
|
|
5
|
+
Not "your test is flaky." The actual file. The actual line. The actual fix.
|
|
6
|
+
|
|
7
|
+
Built by Khushdeep Sharma — Chitkara University, 2026.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## The Problem
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
FAILED tests/test_login.py::test_user_session
|
|
15
|
+
[Flaky — rerunning]
|
|
16
|
+
PASSED tests/test_login.py::test_user_session
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Every existing tool gives you this. It tells you nothing new.
|
|
20
|
+
|
|
21
|
+
## What FLAKEMARK Gives You
|
|
22
|
+
|
|
23
|
+
```
|
|
24
|
+
--------------------------------------------------------
|
|
25
|
+
FLAKEMARK - Flaky Test Root Cause Found
|
|
26
|
+
--------------------------------------------------------
|
|
27
|
+
File: tests/test_login.py
|
|
28
|
+
Line: 47
|
|
29
|
+
Function: test_user_session
|
|
30
|
+
Type: timing_delta
|
|
31
|
+
Cause: Race condition or timing dependency
|
|
32
|
+
|
|
33
|
+
Detail: Line 47: 1.2ms (pass) vs 148.3ms (fail) — 124x timing difference.
|
|
34
|
+
|
|
35
|
+
Fix: Replace time.sleep(N) with threading.Event().wait()
|
|
36
|
+
or asyncio.wait_for(). Never hardcode sleep durations in tests.
|
|
37
|
+
|
|
38
|
+
Confidence: 85% | Total divergences: 1
|
|
39
|
+
--------------------------------------------------------
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## How FLAKEMARK Works
|
|
45
|
+
|
|
46
|
+
FLAKEMARK instruments your test at the AST level, runs it twice simultaneously,
|
|
47
|
+
records every operation in both runs, then finds the exact line where the two
|
|
48
|
+
executions diverged. That divergence is your bug.
|
|
49
|
+
|
|
50
|
+
```
|
|
51
|
+
Your test
|
|
52
|
+
├── Run 1 (instrumented) → ExecutionTrace A [op, op, op ...]
|
|
53
|
+
└── Run 2 (instrumented) → ExecutionTrace B [op, op, op ...]
|
|
54
|
+
↓
|
|
55
|
+
DifferentialAnalyser
|
|
56
|
+
Two-pointer trace walk
|
|
57
|
+
↓
|
|
58
|
+
"Line 47: 124x timing difference"
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## Install
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
pip install pytest-flakemark
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Or use directly (no install needed):
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
git clone https://github.com/khushsharma509/flakemark
|
|
73
|
+
cd flakemark
|
|
74
|
+
# Add to PYTHONPATH and use
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## Usage — 4 Ways
|
|
80
|
+
|
|
81
|
+
### 1. Source string
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
from flakemark import FlakeMark
|
|
85
|
+
|
|
86
|
+
source = """
|
|
87
|
+
import random
|
|
88
|
+
def test_flaky():
|
|
89
|
+
result = random.randint(0, 1)
|
|
90
|
+
assert result == 1
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
report = FlakeMark.diagnose_source(source, "test_flaky", runs=10)
|
|
94
|
+
print(report)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### 2. Real test file
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
from flakemark import FlakeMark
|
|
101
|
+
|
|
102
|
+
report = FlakeMark.diagnose_file(
|
|
103
|
+
filepath = "tests/test_api.py",
|
|
104
|
+
test_func_name = "test_user_session",
|
|
105
|
+
runs = 6,
|
|
106
|
+
project_root = "/path/to/your/project",
|
|
107
|
+
)
|
|
108
|
+
print(report)
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### 3. Batch scan entire folder
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
from flakemark import FlakeMark
|
|
115
|
+
|
|
116
|
+
results = FlakeMark.diagnose_batch("tests/", runs=4)
|
|
117
|
+
|
|
118
|
+
flaky = {k: v for k, v in results.items() if v.is_found()}
|
|
119
|
+
print(f"FLAKEMARK found {len(flaky)} flaky tests:\n")
|
|
120
|
+
for name, report in flaky.items():
|
|
121
|
+
print(f" {name}")
|
|
122
|
+
print(f" Line {report.primary.line} — {report.primary.divergence_type.value}")
|
|
123
|
+
print(f" Fix: {report.primary.fix[:60]}")
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### 4. pytest CLI (after pip install)
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
pytest --flakemark-diagnose tests/
|
|
130
|
+
pytest --flakemark-diagnose --flakemark-runs=8 tests/test_api.py
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## What FLAKEMARK Detects
|
|
136
|
+
|
|
137
|
+
| Type | What it means | Root cause |
|
|
138
|
+
|---|---|---|
|
|
139
|
+
| `value_mismatch` | Same line, different value | `random`, shared state |
|
|
140
|
+
| `timing_delta` | Same op, 3x+ slower | `time.sleep()`, race condition |
|
|
141
|
+
| `thread_race` | Same op, different thread | Missing `Lock()` |
|
|
142
|
+
| `sequence_break` | Different execution path | Test order dependency |
|
|
143
|
+
| `missing_event` | One run skipped an operation | Conditional on external state |
|
|
144
|
+
| `early_termination` | One run ended much sooner | Timeout, unhandled exception |
|
|
145
|
+
|
|
146
|
+
---
|
|
147
|
+
|
|
148
|
+
## Parameters
|
|
149
|
+
|
|
150
|
+
| Parameter | Default | Meaning |
|
|
151
|
+
|---|---|---|
|
|
152
|
+
| `runs` | `4` | Times to run. Use `10+` for low-frequency flakes |
|
|
153
|
+
| `timeout` | `30` | Seconds before a run is killed |
|
|
154
|
+
| `project_root` | `os.getcwd()` | Project root so imports work |
|
|
155
|
+
|
|
156
|
+
---
|
|
157
|
+
|
|
158
|
+
## Accuracy
|
|
159
|
+
|
|
160
|
+
```bash
|
|
161
|
+
python tests/test_accuracy.py
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
```
|
|
165
|
+
Score: 15/15 = 100%
|
|
166
|
+
✓ VALUE_MISMATCH detected (78% confidence)
|
|
167
|
+
✓ TIMING_DELTA detected (85% confidence)
|
|
168
|
+
✓ THREAD_RACE detected (93% confidence)
|
|
169
|
+
✓ EARLY_TERMINATION detected (55% confidence)
|
|
170
|
+
✓ No false positive on stable test (0%)
|
|
171
|
+
✓ Async test handled correctly
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
---
|
|
175
|
+
|
|
176
|
+
## Comparison to Other Tools
|
|
177
|
+
|
|
178
|
+
| | FLAKEMARK | FlakyGuard | pytest-randomly | CANNIER | Divergent |
|
|
179
|
+
|---|---|---|---|---|---|
|
|
180
|
+
| Finds exact root cause line | **Yes** | No | No | No | Yes (JS only) |
|
|
181
|
+
| Python / pytest | **Yes** | No (Java) | Yes | Yes | No (JS) |
|
|
182
|
+
| AST instrumentation | **Yes** | No | No | No | Partial |
|
|
183
|
+
| Subprocess isolation | **Yes** | No | No | No | No |
|
|
184
|
+
| Async test support | **Yes** | No | Yes | No | No |
|
|
185
|
+
| Zero dependencies | **Yes** | No | No | No | No |
|
|
186
|
+
|
|
187
|
+
---
|
|
188
|
+
|
|
189
|
+
## Project Structure
|
|
190
|
+
|
|
191
|
+
```
|
|
192
|
+
flakemark/
|
|
193
|
+
├── flakemark/
|
|
194
|
+
│ ├── __init__.py FlakeMark, DivergenceReport, DivergenceType
|
|
195
|
+
│ ├── engine.py FlakeMark class — diagnose_file/source/batch
|
|
196
|
+
│ ├── pytest_plugin.py pytest plugin — --flakemark-diagnose flag
|
|
197
|
+
│ ├── tracer/
|
|
198
|
+
│ │ └── instrument.py TraceInserter + run_instrumented() ← CORE IP
|
|
199
|
+
│ └── differ/
|
|
200
|
+
│ └── divergence.py DifferentialAnalyser + DivergenceType ← CORE IP
|
|
201
|
+
├── tests/
|
|
202
|
+
│ └── test_accuracy.py Accuracy test suite
|
|
203
|
+
├── setup.py PyPI config
|
|
204
|
+
└── README.md
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
---
|
|
208
|
+
|
|
209
|
+
## What to Copyright
|
|
210
|
+
|
|
211
|
+
| File | What is original |
|
|
212
|
+
|---|---|
|
|
213
|
+
| `tracer/instrument.py` — `TraceInserter` | AST NodeTransformer for execution recording |
|
|
214
|
+
| `tracer/instrument.py` — `run_instrumented()` | Subprocess isolation runner with JSON protocol |
|
|
215
|
+
| `differ/divergence.py` — `DifferentialAnalyser._walk()` | Two-pointer differential trace walk |
|
|
216
|
+
| `differ/divergence.py` — `DivergenceType` | Six-category flakiness classification schema |
|
|
217
|
+
| `differ/divergence.py` — `_score()` | Confidence scoring formula |
|
|
218
|
+
| `engine.py` — `FlakeMark._collect_traces()` | Concurrent dual-run orchestration |
|
|
219
|
+
|
|
220
|
+
---
|
|
221
|
+
|
|
222
|
+
## License
|
|
223
|
+
|
|
224
|
+
MIT — Copyright © 2026 Khushdeep Sharma, Chitkara University
|
|
225
|
+
|
|
226
|
+
---
|
|
227
|
+
|
|
228
|
+
*FLAKEMARK — Find the line. Fix the test.*
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# flakemark/__init__.py
|
|
2
|
+
# FLAKEMARK — Differential Execution Tracer for Flaky Test Root Cause Diagnosis
|
|
3
|
+
# Copyright: Khushdeep Sharma, Chitkara University, 2026
|
|
4
|
+
|
|
5
|
+
from flakemark.engine import FlakeMark
|
|
6
|
+
from flakemark.differ.divergence import DivergenceReport, DivergenceType
|
|
7
|
+
|
|
8
|
+
__version__ = "1.0.0"
|
|
9
|
+
__author__ = "Khushdeep Sharma — Chitkara University 2026"
|
|
10
|
+
__email__ = "itskhushsharma@gmail.com"
|
|
11
|
+
__all__ = ["FlakeMark", "DivergenceReport", "DivergenceType"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# flakemark/differ/__init__.py
|