pytest-flakefighters 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pytest_flakefighters/__init__.py +0 -0
- pytest_flakefighters/database_management.py +231 -0
- pytest_flakefighters/flakefighters/__init__.py +0 -0
- pytest_flakefighters/flakefighters/abstract_flakefighter.py +50 -0
- pytest_flakefighters/flakefighters/coverage_independence.py +99 -0
- pytest_flakefighters/flakefighters/deflaker.py +126 -0
- pytest_flakefighters/flakefighters/traceback_matching.py +171 -0
- pytest_flakefighters/function_coverage.py +71 -0
- pytest_flakefighters/main.py +162 -0
- pytest_flakefighters/plugin.py +225 -0
- pytest_flakefighters/rerun_strategies.py +90 -0
- pytest_flakefighters-0.0.0.dist-info/METADATA +116 -0
- pytest_flakefighters-0.0.0.dist-info/RECORD +17 -0
- pytest_flakefighters-0.0.0.dist-info/WHEEL +5 -0
- pytest_flakefighters-0.0.0.dist-info/entry_points.txt +8 -0
- pytest_flakefighters-0.0.0.dist-info/licenses/LICENSE +22 -0
- pytest_flakefighters-0.0.0.dist-info/top_level.txt +1 -0
|
File without changes
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module manages all interaction with the test run database.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from datetime import datetime, timedelta
|
|
7
|
+
from typing import Union
|
|
8
|
+
|
|
9
|
+
from sqlalchemy import (
|
|
10
|
+
Boolean,
|
|
11
|
+
CheckConstraint,
|
|
12
|
+
Column,
|
|
13
|
+
DateTime,
|
|
14
|
+
ForeignKey,
|
|
15
|
+
Integer,
|
|
16
|
+
PickleType,
|
|
17
|
+
String,
|
|
18
|
+
Text,
|
|
19
|
+
create_engine,
|
|
20
|
+
desc,
|
|
21
|
+
func,
|
|
22
|
+
select,
|
|
23
|
+
)
|
|
24
|
+
from sqlalchemy.orm import (
|
|
25
|
+
DeclarativeBase,
|
|
26
|
+
Mapped,
|
|
27
|
+
Session,
|
|
28
|
+
declared_attr,
|
|
29
|
+
relationship,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class Base(DeclarativeBase):
|
|
35
|
+
"""
|
|
36
|
+
Declarative base class for data objects.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
id: Mapped[int] = Column(Integer, primary_key=True) # pylint: disable=C0103
|
|
40
|
+
# @pytest, these are not the tests you're looking for...
|
|
41
|
+
__test__ = False # pylint: disable=C0103
|
|
42
|
+
|
|
43
|
+
@declared_attr
|
|
44
|
+
def __tablename__(self):
|
|
45
|
+
return self.__name__.lower()
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class Run(Base):
|
|
50
|
+
"""
|
|
51
|
+
Class to store attributes of a flakefighters run.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
created_at = Column(DateTime, default=func.now())
|
|
55
|
+
root: Mapped[str] = Column(String)
|
|
56
|
+
tests = relationship("Test", backref="run", lazy="subquery", cascade="all, delete", passive_deletes=True)
|
|
57
|
+
active_flakefighters = relationship(
|
|
58
|
+
"ActiveFlakeFighter", backref="run", lazy="subquery", cascade="all, delete", passive_deletes=True
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class ActiveFlakeFighter(Base):
|
|
64
|
+
"""
|
|
65
|
+
Store relevant information about the active flakefighters.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
run_id: Mapped[int] = Column(Integer, ForeignKey("run.id"), nullable=False)
|
|
69
|
+
name: Mapped[str] = Column(String)
|
|
70
|
+
params: Mapped[dict] = Column(PickleType)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@dataclass
|
|
74
|
+
class Test(Base):
|
|
75
|
+
"""
|
|
76
|
+
Class to store attributes of a test case.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
run_id: Mapped[int] = Column(Integer, ForeignKey("run.id"), nullable=False)
|
|
80
|
+
name: Mapped[str] = Column(String)
|
|
81
|
+
skipped: Mapped[bool] = Column(Boolean, default=False)
|
|
82
|
+
executions = relationship(
|
|
83
|
+
"TestExecution", backref="test", lazy="subquery", cascade="all, delete", passive_deletes=True
|
|
84
|
+
)
|
|
85
|
+
flakefighter_results = relationship(
|
|
86
|
+
"FlakefighterResult", backref="test", lazy="subquery", cascade="all, delete", passive_deletes=True
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
@property
|
|
90
|
+
def flaky(self) -> bool:
|
|
91
|
+
"""
|
|
92
|
+
Return whether a test (or any of its executions) has been marked as flaky by any flakefighter.
|
|
93
|
+
"""
|
|
94
|
+
if not self.executions and not self.flakefighter_results:
|
|
95
|
+
return None
|
|
96
|
+
return any(result.flaky for result in self.flakefighter_results) or any(
|
|
97
|
+
execution.flaky for execution in self.executions
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@dataclass
|
|
102
|
+
class TestExecution(Base): # pylint: disable=R0902
|
|
103
|
+
"""
|
|
104
|
+
Class to store attributes of a test outcome.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
__tablename__ = "test_execution"
|
|
108
|
+
|
|
109
|
+
test_id: Mapped[int] = Column(Integer, ForeignKey("test.id"), nullable=False)
|
|
110
|
+
outcome: Mapped[str] = Column(String)
|
|
111
|
+
stdout: Mapped[str] = Column(Text)
|
|
112
|
+
stderr: Mapped[str] = Column(Text)
|
|
113
|
+
report: Mapped[str] = Column(Text)
|
|
114
|
+
start_time: Mapped[datetime] = Column(DateTime(timezone=True))
|
|
115
|
+
end_time: Mapped[datetime] = Column(DateTime(timezone=True))
|
|
116
|
+
coverage: Mapped[dict] = Column(PickleType)
|
|
117
|
+
flakefighter_results = relationship(
|
|
118
|
+
"FlakefighterResult", backref="test_execution", lazy="subquery", cascade="all, delete", passive_deletes=True
|
|
119
|
+
)
|
|
120
|
+
exception = relationship(
|
|
121
|
+
"TestException",
|
|
122
|
+
uselist=False,
|
|
123
|
+
backref="test_execution",
|
|
124
|
+
lazy="subquery",
|
|
125
|
+
cascade="all, delete",
|
|
126
|
+
passive_deletes=True,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
@property
|
|
130
|
+
def flaky(self) -> bool:
|
|
131
|
+
"""
|
|
132
|
+
Return whether a test (or any of its executions) has been marked as flaky by any flakefighter.
|
|
133
|
+
"""
|
|
134
|
+
return any(result.flaky for result in self.flakefighter_results)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@dataclass
|
|
138
|
+
class TestException(Base): # pylint: disable=R0902
|
|
139
|
+
"""
|
|
140
|
+
Class to store information about the exceptions that cause tests to fail.
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
__tablename__ = "test_exception"
|
|
144
|
+
|
|
145
|
+
execution_id: Mapped[int] = Column(Integer, ForeignKey("test_execution.id"), nullable=False)
|
|
146
|
+
name: Mapped[str] = Column(String)
|
|
147
|
+
traceback = relationship(
|
|
148
|
+
"TracebackEntry", backref="exception", lazy="subquery", cascade="all, delete", passive_deletes=True
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
@dataclass
|
|
153
|
+
class TracebackEntry(Base): # pylint: disable=R0902
|
|
154
|
+
"""
|
|
155
|
+
Class to store attributes of entries in the stack trace.
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
exception_id: Mapped[int] = Column(Integer, ForeignKey("test_exception.id"), nullable=False)
|
|
159
|
+
path: Mapped[str] = Column(String)
|
|
160
|
+
lineno: Mapped[int] = Column(Integer)
|
|
161
|
+
colno: Mapped[int] = Column(Integer)
|
|
162
|
+
statement: Mapped[str] = Column(String)
|
|
163
|
+
source: Mapped[str] = Column(Text)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
@dataclass
|
|
167
|
+
class FlakefighterResult(Base): # pylint: disable=R0902
|
|
168
|
+
"""
|
|
169
|
+
Class to store flakefighter results.
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
__tablename__ = "flakefighter_result"
|
|
173
|
+
|
|
174
|
+
test_execution_id: Mapped[int] = Column(Integer, ForeignKey("test_execution.id"), nullable=True)
|
|
175
|
+
test_id: Mapped[int] = Column(Integer, ForeignKey("test.id"), nullable=True)
|
|
176
|
+
name: Mapped[str] = Column(String)
|
|
177
|
+
flaky: Mapped[bool] = Column(Boolean)
|
|
178
|
+
|
|
179
|
+
__table_args__ = (
|
|
180
|
+
CheckConstraint("(test_execution_id IS NOT NULL) + (test_id IS NOT NULL) = 1", name="check_test_id_not_null"),
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
class Database:
|
|
185
|
+
"""
|
|
186
|
+
Class to handle database setup and interaction.
|
|
187
|
+
"""
|
|
188
|
+
|
|
189
|
+
def __init__(
|
|
190
|
+
self,
|
|
191
|
+
url: str,
|
|
192
|
+
load_max_runs: int = None,
|
|
193
|
+
store_max_runs: int = None,
|
|
194
|
+
time_immemorial: Union[timedelta, str] = None,
|
|
195
|
+
):
|
|
196
|
+
if isinstance(time_immemorial, str) and time_immemorial:
|
|
197
|
+
days, hours, minutes = [int(x) for x in time_immemorial.split(":")]
|
|
198
|
+
time_immemorial = timedelta(days=days, hours=hours, minutes=minutes)
|
|
199
|
+
|
|
200
|
+
self.engine = create_engine(url)
|
|
201
|
+
Base.metadata.create_all(self.engine)
|
|
202
|
+
|
|
203
|
+
self.store_max_runs = store_max_runs
|
|
204
|
+
self.time_immemorial = time_immemorial
|
|
205
|
+
self.previous_runs = self.load_runs(load_max_runs)
|
|
206
|
+
|
|
207
|
+
def save(self, run: Run):
|
|
208
|
+
"""
|
|
209
|
+
Save the given run into the database.
|
|
210
|
+
"""
|
|
211
|
+
with Session(self.engine) as session:
|
|
212
|
+
session.add(run)
|
|
213
|
+
if self.time_immemorial is not None:
|
|
214
|
+
expiry_date = datetime.now() - self.time_immemorial
|
|
215
|
+
for r in session.query(Run).filter(Run.created_at < (expiry_date - self.time_immemorial)):
|
|
216
|
+
session.delete(r)
|
|
217
|
+
|
|
218
|
+
if self.store_max_runs is not None:
|
|
219
|
+
for r in self.load_runs()[self.store_max_runs - 1 :]:
|
|
220
|
+
session.delete(r)
|
|
221
|
+
session.commit()
|
|
222
|
+
session.flush()
|
|
223
|
+
|
|
224
|
+
def load_runs(self, limit: int = None):
|
|
225
|
+
"""
|
|
226
|
+
Load runs from the database.
|
|
227
|
+
|
|
228
|
+
:param limit: The maximum number of runs to return (these will be most recent runs).
|
|
229
|
+
"""
|
|
230
|
+
with Session(self.engine) as session:
|
|
231
|
+
return session.scalars(select(Run).order_by(desc(Run.id)).limit(limit)).all()
|
|
File without changes
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module implements the FlakeFighter abstract class to be extended by concrete flakefighter classes.
|
|
3
|
+
Each of these is a microservice which takes a failed test and classifies that failure as either genuine or flaky.
|
|
4
|
+
This classification can be configured to either run "live" after each test, or as a postprocessing step on the entire
|
|
5
|
+
test suite after it completes.
|
|
6
|
+
If running live, detectors are run at the end of pytest_runtest_makereport.
|
|
7
|
+
If running as a postprocessing step, detectors are run at the start of pytest_sessionfinish.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from abc import ABC, abstractmethod
|
|
11
|
+
|
|
12
|
+
from pytest_flakefighters.database_management import Run, TestExecution
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class FlakeFighter(ABC): # pylint: disable=R0903
|
|
16
|
+
"""
|
|
17
|
+
Abstract base class for a FlakeFighter
|
|
18
|
+
:ivar run_live: Run detection "live" after each test. Otherwise run as a postprocessing step after the test suite.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, run_live: bool):
|
|
22
|
+
self.run_live = run_live
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
@abstractmethod
|
|
26
|
+
def from_config(cls, config: dict):
|
|
27
|
+
"""
|
|
28
|
+
Factory method to create a new instance from a pytest configuration.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
@abstractmethod
|
|
32
|
+
def flaky_test_live(self, execution: TestExecution):
|
|
33
|
+
"""
|
|
34
|
+
Detect whether a given test execution is flaky and append the result to its `flakefighter_results` attribute.
|
|
35
|
+
:param execution: The test execution to classify.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
@abstractmethod
|
|
39
|
+
def flaky_tests_post(self, run: Run):
|
|
40
|
+
"""
|
|
41
|
+
Go through each test in the test suite and append the result to its `flakefighter_results` attribute.
|
|
42
|
+
:param run: Run object representing the pytest run, with tests accessible through run.tests.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
@abstractmethod
|
|
46
|
+
def params(self) -> dict:
|
|
47
|
+
"""
|
|
48
|
+
Convert the key parameters into a dictionary so that the object can be replicated.
|
|
49
|
+
:return A dictionary of the parameters used to create the object.
|
|
50
|
+
"""
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module implements the CoverageIndependence FlakeFighter.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from scipy.cluster.hierarchy import fcluster, linkage
|
|
7
|
+
from scipy.spatial.distance import pdist
|
|
8
|
+
|
|
9
|
+
from pytest_flakefighters.database_management import (
|
|
10
|
+
FlakefighterResult,
|
|
11
|
+
Run,
|
|
12
|
+
TestExecution,
|
|
13
|
+
)
|
|
14
|
+
from pytest_flakefighters.flakefighters.abstract_flakefighter import FlakeFighter
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CoverageIndependence(FlakeFighter):
|
|
18
|
+
"""
|
|
19
|
+
Classify tests as flaky if they fail independently of passing test cases that exercise overlapping code.
|
|
20
|
+
|
|
21
|
+
:ivar threshold: The minimum distance to consider as "similar", expressed as a proportion 0 <= threshold < 1 where 0
|
|
22
|
+
represents no difference and 1 represents complete difference.
|
|
23
|
+
:ivar metric: From `scipy.spatial.distance`: ['braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice',
|
|
24
|
+
'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',
|
|
25
|
+
'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'].
|
|
26
|
+
:ivar linkage_method: From `scipy.cluster.hierarchy.linkage`: ['single', 'complete', 'average', 'weighted',
|
|
27
|
+
'centroid', 'median', 'ward']
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self, threshold: float = 0, metric: str = "jaccard", linkage_method="single"):
|
|
31
|
+
super().__init__(False)
|
|
32
|
+
self.threshold = threshold
|
|
33
|
+
self.metric = metric
|
|
34
|
+
self.linkage_method = linkage_method
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def from_config(cls, config: dict):
|
|
38
|
+
"""
|
|
39
|
+
Factory method to create a new instance from a pytest configuration.
|
|
40
|
+
"""
|
|
41
|
+
return CoverageIndependence(
|
|
42
|
+
threshold=config.get("threshold", 0),
|
|
43
|
+
metric=config.get("metric", "jaccard"),
|
|
44
|
+
linkage_method=config.get("linkage_method", "single"),
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
def params(self):
|
|
48
|
+
"""
|
|
49
|
+
Convert the key parameters into a dictionary so that the object can be replicated.
|
|
50
|
+
:return A dictionary of the parameters used to create the object.
|
|
51
|
+
"""
|
|
52
|
+
return {"threshold": self.threshold, "metric": self.metric, "linkage_method": self.linkage_method}
|
|
53
|
+
|
|
54
|
+
def flaky_test_live(self, execution: TestExecution):
|
|
55
|
+
"""
|
|
56
|
+
NOT SUPPORTED.
|
|
57
|
+
Detect whether a given test execution is flaky and append the result to its `flakefighter_results` attribute.
|
|
58
|
+
:param execution: The test execution to classify.
|
|
59
|
+
"""
|
|
60
|
+
raise NotImplementedError("Coverage independence cannot be measured live")
|
|
61
|
+
|
|
62
|
+
def flaky_tests_post(self, run: Run):
|
|
63
|
+
"""
|
|
64
|
+
Go through each test in the test suite and append the result to its `flakefighter_results` attribute.
|
|
65
|
+
:param run: Run object representing the pytest run, with tests accessible through run.tests.
|
|
66
|
+
"""
|
|
67
|
+
coverage = []
|
|
68
|
+
# Enumerating tests and executions since they won't have IDs if they are not yet in the database
|
|
69
|
+
for test in run.tests:
|
|
70
|
+
for execution in test.executions:
|
|
71
|
+
coverage.append(
|
|
72
|
+
{"test": test, "execution": execution}
|
|
73
|
+
| {f"{file}:{line}": True for file in execution.coverage for line in execution.coverage[file]}
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Can't compute the pairwise distance of a single execution
|
|
77
|
+
if len(coverage) < 2:
|
|
78
|
+
return
|
|
79
|
+
|
|
80
|
+
coverage = pd.DataFrame(coverage)
|
|
81
|
+
coverage[coverage.columns.drop(["test", "execution"])] = (
|
|
82
|
+
coverage[coverage.columns.drop(["test", "execution"])].astype(pd.BooleanDtype()).fillna(False).astype(bool)
|
|
83
|
+
)
|
|
84
|
+
# Calculate the distance between each pair of test executions
|
|
85
|
+
raw_coverage = coverage.drop(["test", "execution"], axis=1).to_numpy()
|
|
86
|
+
distances = pdist(raw_coverage, metric=self.metric)
|
|
87
|
+
# Assign each test execution to a cluster
|
|
88
|
+
coverage["cluster"] = fcluster(
|
|
89
|
+
linkage(distances, method=self.linkage_method), t=self.threshold, criterion="distance"
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
for _, group in coverage.groupby("cluster"):
|
|
93
|
+
for test in group["test"]:
|
|
94
|
+
result = FlakefighterResult(
|
|
95
|
+
name=self.__class__.__name__,
|
|
96
|
+
flaky=len(set(map(lambda x: x.outcome, group["execution"]))) > 1,
|
|
97
|
+
)
|
|
98
|
+
if result not in test.flakefighter_results:
|
|
99
|
+
test.flakefighter_results.append(result)
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module implements the DeFlaker FlakeFighter.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
import git
|
|
8
|
+
from unidiff import PatchSet
|
|
9
|
+
|
|
10
|
+
from pytest_flakefighters.database_management import (
|
|
11
|
+
FlakefighterResult,
|
|
12
|
+
Run,
|
|
13
|
+
TestExecution,
|
|
14
|
+
)
|
|
15
|
+
from pytest_flakefighters.flakefighters.abstract_flakefighter import FlakeFighter
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class DeFlaker(FlakeFighter):
|
|
19
|
+
"""
|
|
20
|
+
A python equivalent of the DeFlaker algorithm from Bell et al. 2019 [10.1145/3180155.3180164].
|
|
21
|
+
Given the subtle differences between JUnit and pytest, this is not intended to be an exact port, but it follows
|
|
22
|
+
the same general methodology of checking whether covered code has been changed between commits.
|
|
23
|
+
|
|
24
|
+
:ivar root: The root directory of the Git repository.
|
|
25
|
+
:ivar source_commit: The source (older) commit hash. Defaults to HEAD^ (the previous commit to target).
|
|
26
|
+
:ivar target_commit: The target (newer) commit hash. Defaults to HEAD (the most recent commit).
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, run_live: bool, root: str = ".", source_commit: str = None, target_commit: str = None):
|
|
30
|
+
super().__init__(run_live)
|
|
31
|
+
|
|
32
|
+
self.repo_root = git.Repo(root)
|
|
33
|
+
if target_commit is None and not self.repo_root.is_dirty():
|
|
34
|
+
# No uncommitted changes, so use most recent commit
|
|
35
|
+
self.target_commit = self.repo_root.commit().hexsha
|
|
36
|
+
else:
|
|
37
|
+
self.target_commit = target_commit
|
|
38
|
+
if source_commit is None:
|
|
39
|
+
if self.target_commit is None:
|
|
40
|
+
# If uncommitted changes, use most recent commit as source
|
|
41
|
+
self.source_commit = self.repo_root.commit().hexsha
|
|
42
|
+
else:
|
|
43
|
+
# If no uncommitted changes, use previous commit as source
|
|
44
|
+
parents = [
|
|
45
|
+
commit.hexsha
|
|
46
|
+
for commit in self.repo_root.commit(source_commit).iter_parents()
|
|
47
|
+
if commit.hexsha != self.target_commit
|
|
48
|
+
]
|
|
49
|
+
self.source_commit = parents[0]
|
|
50
|
+
else:
|
|
51
|
+
self.source_commit = source_commit
|
|
52
|
+
|
|
53
|
+
patches = PatchSet(self.repo_root.git.diff(self.source_commit, self.target_commit, "-U0", "--no-prefix"))
|
|
54
|
+
self.lines_changed = {}
|
|
55
|
+
for patch in patches:
|
|
56
|
+
if patch.target_file == patch.source_file:
|
|
57
|
+
abspath = os.path.join(self.repo_root.working_dir, patch.source_file)
|
|
58
|
+
self.lines_changed[abspath] = []
|
|
59
|
+
for hunk in patch:
|
|
60
|
+
# Add each line in the hunk to lines_changed
|
|
61
|
+
self.lines_changed[abspath] += list(
|
|
62
|
+
range(hunk.target_start, hunk.target_start + hunk.target_length)
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def from_config(cls, config: dict):
|
|
67
|
+
"""
|
|
68
|
+
Factory method to create a new instance from a pytest configuration.
|
|
69
|
+
"""
|
|
70
|
+
return DeFlaker(
|
|
71
|
+
run_live=config.get("run_live", True),
|
|
72
|
+
root=config.get("root", "."),
|
|
73
|
+
source_commit=config.get("source_commit"),
|
|
74
|
+
target_commit=config.get("target_commit"),
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
def params(self):
|
|
78
|
+
"""
|
|
79
|
+
Convert the key parameters into a dictionary so that the object can be replicated.
|
|
80
|
+
:return A dictionary of the parameters used to create the object.
|
|
81
|
+
"""
|
|
82
|
+
return {"root": self.repo_root, "source_commit": self.source_commit, "target_commit": self.target_commit}
|
|
83
|
+
|
|
84
|
+
def line_modified_by_target_commit(self, file_path: str, line_number: int) -> bool:
|
|
85
|
+
"""
|
|
86
|
+
Returns true if the given line in the file has been modified by the present commit.
|
|
87
|
+
|
|
88
|
+
:param file_path: The file to check.
|
|
89
|
+
:param line_number: The line number to check.
|
|
90
|
+
"""
|
|
91
|
+
return line_number in self.lines_changed.get(file_path, [])
|
|
92
|
+
|
|
93
|
+
def _flaky_execution(self, execution):
|
|
94
|
+
"""
|
|
95
|
+
Classify an execution as flaky or not.
|
|
96
|
+
:return: Boolean True of the test is classed as flaky and False otherwise.
|
|
97
|
+
"""
|
|
98
|
+
return not any(
|
|
99
|
+
execution.outcome == "failed" and self.line_modified_by_target_commit(file_path, line_number)
|
|
100
|
+
for file_path in execution.coverage
|
|
101
|
+
for line_number in execution.coverage[file_path]
|
|
102
|
+
if file_path in self.lines_changed
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
def flaky_test_live(self, execution: TestExecution):
|
|
106
|
+
"""
|
|
107
|
+
Classify a failing test as flaky if it does not cover any code which has been changed between the source and
|
|
108
|
+
target commits.
|
|
109
|
+
:param execution: The test execution to classify.
|
|
110
|
+
"""
|
|
111
|
+
execution.flakefighter_results.append(
|
|
112
|
+
FlakefighterResult(name=self.__class__.__name__, flaky=self._flaky_execution(execution))
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
def flaky_tests_post(self, run: Run) -> list[bool | None]:
|
|
116
|
+
"""
|
|
117
|
+
Classify failing tests as flaky if any of their executions are flaky.
|
|
118
|
+
:param run: Run object representing the pytest run, with tests accessible through run.tests.
|
|
119
|
+
"""
|
|
120
|
+
for test in run.tests:
|
|
121
|
+
test.flakefighter_results.append(
|
|
122
|
+
FlakefighterResult(
|
|
123
|
+
name=self.__class__.__name__,
|
|
124
|
+
flaky=any(self._flaky_execution(execution) for execution in test.executions),
|
|
125
|
+
)
|
|
126
|
+
)
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module implements three FlakeFighters based on failure de-duplication from Alshammari et. al.
|
|
3
|
+
[https://arxiv.org/pdf/2401.15788].
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import re
|
|
8
|
+
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
11
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
|
12
|
+
|
|
13
|
+
from pytest_flakefighters.database_management import (
|
|
14
|
+
FlakefighterResult,
|
|
15
|
+
Run,
|
|
16
|
+
TestExecution,
|
|
17
|
+
)
|
|
18
|
+
from pytest_flakefighters.flakefighters.abstract_flakefighter import FlakeFighter
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class TracebackMatching(FlakeFighter):
|
|
22
|
+
"""
|
|
23
|
+
Simple text-based matching classifier from Section II.A of [Alshammari et. al.].
|
|
24
|
+
We implement text-based matching on the failure logs for each test. Each failure log is represented by its failure
|
|
25
|
+
exception and stacktrace.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, run_live: bool, previous_runs: list[Run], root: str = "."):
|
|
29
|
+
super().__init__(run_live)
|
|
30
|
+
self.root = os.path.abspath(root)
|
|
31
|
+
self.previous_runs = previous_runs
|
|
32
|
+
print("TracebackMatching")
|
|
33
|
+
|
|
34
|
+
@classmethod
|
|
35
|
+
def from_config(cls, config: dict):
|
|
36
|
+
"""
|
|
37
|
+
Factory method to create a new instance from a pytest configuration.
|
|
38
|
+
"""
|
|
39
|
+
return TracebackMatching(
|
|
40
|
+
run_live=config.get("run_live", True),
|
|
41
|
+
previous_runs=config["database"].previous_runs,
|
|
42
|
+
root=config.get("root", "."),
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
def params(self):
|
|
46
|
+
"""
|
|
47
|
+
Convert the key parameters into a dictionary so that the object can be replicated.
|
|
48
|
+
:return A dictionary of the parameters used to create the object.
|
|
49
|
+
"""
|
|
50
|
+
return {"root": self.root}
|
|
51
|
+
|
|
52
|
+
def _flaky_execution(self, execution, previous_executions) -> bool:
|
|
53
|
+
"""
|
|
54
|
+
Classify an execution as flaky if any of its failing executions has a traceback that matches a test previously
|
|
55
|
+
classed as flaky.
|
|
56
|
+
:return: Boolean True if the test is classed as flaky and False otherwise.
|
|
57
|
+
"""
|
|
58
|
+
if not execution.exception:
|
|
59
|
+
return False
|
|
60
|
+
current_traceback = [
|
|
61
|
+
(os.path.relpath(e.path, self.root), e.lineno, e.colno, e.statement)
|
|
62
|
+
for e in execution.exception.traceback
|
|
63
|
+
if os.path.commonpath([self.root, e.path]) == self.root
|
|
64
|
+
]
|
|
65
|
+
return any(e == current_traceback for e in previous_executions)
|
|
66
|
+
|
|
67
|
+
def previous_flaky_executions(self, runs: list[Run] = None) -> list:
|
|
68
|
+
"""
|
|
69
|
+
Extract the relevant information from previous flaky executions and collapse into a single list.
|
|
70
|
+
:param runs: The runs to consider. Defaults to self.previous_runs.
|
|
71
|
+
:return: List containing the relative path, line number, column number, and code statement of all previous
|
|
72
|
+
test executions.
|
|
73
|
+
"""
|
|
74
|
+
if runs is None:
|
|
75
|
+
runs = self.previous_runs
|
|
76
|
+
return [
|
|
77
|
+
[
|
|
78
|
+
(os.path.relpath(elem.path, run.root), elem.lineno, elem.colno, elem.statement)
|
|
79
|
+
for elem in execution.exception.traceback
|
|
80
|
+
]
|
|
81
|
+
for run in runs
|
|
82
|
+
for test in run.tests
|
|
83
|
+
if test.flaky
|
|
84
|
+
for execution in test.executions
|
|
85
|
+
if execution.exception
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
def flaky_test_live(self, execution: TestExecution):
|
|
89
|
+
"""
|
|
90
|
+
Classify executions as flaky if they have the same failure logs as a flaky execution.
|
|
91
|
+
:param execution: Test execution to consider.
|
|
92
|
+
"""
|
|
93
|
+
execution.flakefighter_results.append(
|
|
94
|
+
FlakefighterResult(
|
|
95
|
+
name=self.__class__.__name__,
|
|
96
|
+
flaky=self._flaky_execution(
|
|
97
|
+
execution,
|
|
98
|
+
self.previous_flaky_executions(),
|
|
99
|
+
),
|
|
100
|
+
)
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
def flaky_tests_post(self, run: Run) -> list[bool | None]:
|
|
104
|
+
"""
|
|
105
|
+
Classify failing executions as flaky if any if their executions are flaky.
|
|
106
|
+
:param run: Run object representing the pytest run, with tests accessible through run.tests.
|
|
107
|
+
"""
|
|
108
|
+
for test in run.tests:
|
|
109
|
+
for execution in test.executions:
|
|
110
|
+
execution.flakefighter_results.append(
|
|
111
|
+
FlakefighterResult(
|
|
112
|
+
name=self.__class__.__name__,
|
|
113
|
+
flaky=self._flaky_execution(
|
|
114
|
+
execution, self.previous_flaky_executions(self.previous_runs + [run])
|
|
115
|
+
),
|
|
116
|
+
)
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class CosineSimilarity(TracebackMatching):
|
|
121
|
+
"""
|
|
122
|
+
TF-IDF cosine similarity matching classifier from Section II.C of [Alshammari et. al.].
|
|
123
|
+
Test executions are classified as flaky if the stack trace is sufficiently similar to a previous flaky execution.
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
def __init__(self, run_live: bool, previous_runs: list[Run], root: str = ".", threshold: float = 1):
|
|
127
|
+
super().__init__(run_live, previous_runs, root)
|
|
128
|
+
self.root = os.path.abspath(root)
|
|
129
|
+
self.previous_runs = previous_runs
|
|
130
|
+
self.threshold = threshold
|
|
131
|
+
|
|
132
|
+
@classmethod
|
|
133
|
+
def from_config(cls, config: dict):
|
|
134
|
+
"""
|
|
135
|
+
Factory method to create a new instance from a pytest configuration.
|
|
136
|
+
"""
|
|
137
|
+
return CosineSimilarity(
|
|
138
|
+
run_live=config.get("run_live", True),
|
|
139
|
+
previous_runs=config["database"].previous_runs,
|
|
140
|
+
root=config.get("root", "."),
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
def _tf_idf_matrix(self, executions):
|
|
144
|
+
corpus = [
|
|
145
|
+
re.sub(r"[^\w\s]", " ", "\n".join([" ".join(map(str, tuple)) for tuple in execution]))
|
|
146
|
+
for execution in executions
|
|
147
|
+
]
|
|
148
|
+
vectorizer = TfidfVectorizer()
|
|
149
|
+
tfidf_matrix = vectorizer.fit_transform(corpus)
|
|
150
|
+
|
|
151
|
+
feature_names = vectorizer.get_feature_names_out()
|
|
152
|
+
return pd.DataFrame(tfidf_matrix.toarray(), columns=feature_names)
|
|
153
|
+
|
|
154
|
+
def _flaky_execution(self, execution, previous_executions) -> bool:
|
|
155
|
+
"""
|
|
156
|
+
Classify an execution as flaky if the test execution is sufficiently cosine-similar to any of the previous
|
|
157
|
+
executions.
|
|
158
|
+
:return: Boolean True if the test is classed as flaky and False otherwise.
|
|
159
|
+
"""
|
|
160
|
+
if not execution.exception or not previous_executions:
|
|
161
|
+
return False
|
|
162
|
+
|
|
163
|
+
execution = [
|
|
164
|
+
(os.path.relpath(elem.path, self.root), elem.lineno, elem.colno, elem.statement)
|
|
165
|
+
for elem in execution.exception.traceback
|
|
166
|
+
]
|
|
167
|
+
|
|
168
|
+
tf_idf_matrix = self._tf_idf_matrix([execution] + previous_executions)
|
|
169
|
+
|
|
170
|
+
similarity = cosine_similarity(tf_idf_matrix.iloc[0].values.reshape(1, -1), tf_idf_matrix.iloc[1:].values)
|
|
171
|
+
return (similarity >= self.threshold).any()
|