pytest-flakefighters 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,231 @@
1
+ """
2
+ This module manages all interaction with the test run database.
3
+ """
4
+
5
+ from dataclasses import dataclass
6
+ from datetime import datetime, timedelta
7
+ from typing import Union
8
+
9
+ from sqlalchemy import (
10
+ Boolean,
11
+ CheckConstraint,
12
+ Column,
13
+ DateTime,
14
+ ForeignKey,
15
+ Integer,
16
+ PickleType,
17
+ String,
18
+ Text,
19
+ create_engine,
20
+ desc,
21
+ func,
22
+ select,
23
+ )
24
+ from sqlalchemy.orm import (
25
+ DeclarativeBase,
26
+ Mapped,
27
+ Session,
28
+ declared_attr,
29
+ relationship,
30
+ )
31
+
32
+
33
+ @dataclass
34
+ class Base(DeclarativeBase):
35
+ """
36
+ Declarative base class for data objects.
37
+ """
38
+
39
+ id: Mapped[int] = Column(Integer, primary_key=True) # pylint: disable=C0103
40
+ # @pytest, these are not the tests you're looking for...
41
+ __test__ = False # pylint: disable=C0103
42
+
43
+ @declared_attr
44
+ def __tablename__(self):
45
+ return self.__name__.lower()
46
+
47
+
48
+ @dataclass
49
+ class Run(Base):
50
+ """
51
+ Class to store attributes of a flakefighters run.
52
+ """
53
+
54
+ created_at = Column(DateTime, default=func.now())
55
+ root: Mapped[str] = Column(String)
56
+ tests = relationship("Test", backref="run", lazy="subquery", cascade="all, delete", passive_deletes=True)
57
+ active_flakefighters = relationship(
58
+ "ActiveFlakeFighter", backref="run", lazy="subquery", cascade="all, delete", passive_deletes=True
59
+ )
60
+
61
+
62
+ @dataclass
63
+ class ActiveFlakeFighter(Base):
64
+ """
65
+ Store relevant information about the active flakefighters.
66
+ """
67
+
68
+ run_id: Mapped[int] = Column(Integer, ForeignKey("run.id"), nullable=False)
69
+ name: Mapped[str] = Column(String)
70
+ params: Mapped[dict] = Column(PickleType)
71
+
72
+
73
+ @dataclass
74
+ class Test(Base):
75
+ """
76
+ Class to store attributes of a test case.
77
+ """
78
+
79
+ run_id: Mapped[int] = Column(Integer, ForeignKey("run.id"), nullable=False)
80
+ name: Mapped[str] = Column(String)
81
+ skipped: Mapped[bool] = Column(Boolean, default=False)
82
+ executions = relationship(
83
+ "TestExecution", backref="test", lazy="subquery", cascade="all, delete", passive_deletes=True
84
+ )
85
+ flakefighter_results = relationship(
86
+ "FlakefighterResult", backref="test", lazy="subquery", cascade="all, delete", passive_deletes=True
87
+ )
88
+
89
+ @property
90
+ def flaky(self) -> bool:
91
+ """
92
+ Return whether a test (or any of its executions) has been marked as flaky by any flakefighter.
93
+ """
94
+ if not self.executions and not self.flakefighter_results:
95
+ return None
96
+ return any(result.flaky for result in self.flakefighter_results) or any(
97
+ execution.flaky for execution in self.executions
98
+ )
99
+
100
+
101
+ @dataclass
102
+ class TestExecution(Base): # pylint: disable=R0902
103
+ """
104
+ Class to store attributes of a test outcome.
105
+ """
106
+
107
+ __tablename__ = "test_execution"
108
+
109
+ test_id: Mapped[int] = Column(Integer, ForeignKey("test.id"), nullable=False)
110
+ outcome: Mapped[str] = Column(String)
111
+ stdout: Mapped[str] = Column(Text)
112
+ stderr: Mapped[str] = Column(Text)
113
+ report: Mapped[str] = Column(Text)
114
+ start_time: Mapped[datetime] = Column(DateTime(timezone=True))
115
+ end_time: Mapped[datetime] = Column(DateTime(timezone=True))
116
+ coverage: Mapped[dict] = Column(PickleType)
117
+ flakefighter_results = relationship(
118
+ "FlakefighterResult", backref="test_execution", lazy="subquery", cascade="all, delete", passive_deletes=True
119
+ )
120
+ exception = relationship(
121
+ "TestException",
122
+ uselist=False,
123
+ backref="test_execution",
124
+ lazy="subquery",
125
+ cascade="all, delete",
126
+ passive_deletes=True,
127
+ )
128
+
129
+ @property
130
+ def flaky(self) -> bool:
131
+ """
132
+ Return whether a test (or any of its executions) has been marked as flaky by any flakefighter.
133
+ """
134
+ return any(result.flaky for result in self.flakefighter_results)
135
+
136
+
137
+ @dataclass
138
+ class TestException(Base): # pylint: disable=R0902
139
+ """
140
+ Class to store information about the exceptions that cause tests to fail.
141
+ """
142
+
143
+ __tablename__ = "test_exception"
144
+
145
+ execution_id: Mapped[int] = Column(Integer, ForeignKey("test_execution.id"), nullable=False)
146
+ name: Mapped[str] = Column(String)
147
+ traceback = relationship(
148
+ "TracebackEntry", backref="exception", lazy="subquery", cascade="all, delete", passive_deletes=True
149
+ )
150
+
151
+
152
+ @dataclass
153
+ class TracebackEntry(Base): # pylint: disable=R0902
154
+ """
155
+ Class to store attributes of entries in the stack trace.
156
+ """
157
+
158
+ exception_id: Mapped[int] = Column(Integer, ForeignKey("test_exception.id"), nullable=False)
159
+ path: Mapped[str] = Column(String)
160
+ lineno: Mapped[int] = Column(Integer)
161
+ colno: Mapped[int] = Column(Integer)
162
+ statement: Mapped[str] = Column(String)
163
+ source: Mapped[str] = Column(Text)
164
+
165
+
166
+ @dataclass
167
+ class FlakefighterResult(Base): # pylint: disable=R0902
168
+ """
169
+ Class to store flakefighter results.
170
+ """
171
+
172
+ __tablename__ = "flakefighter_result"
173
+
174
+ test_execution_id: Mapped[int] = Column(Integer, ForeignKey("test_execution.id"), nullable=True)
175
+ test_id: Mapped[int] = Column(Integer, ForeignKey("test.id"), nullable=True)
176
+ name: Mapped[str] = Column(String)
177
+ flaky: Mapped[bool] = Column(Boolean)
178
+
179
+ __table_args__ = (
180
+ CheckConstraint("(test_execution_id IS NOT NULL) + (test_id IS NOT NULL) = 1", name="check_test_id_not_null"),
181
+ )
182
+
183
+
184
+ class Database:
185
+ """
186
+ Class to handle database setup and interaction.
187
+ """
188
+
189
+ def __init__(
190
+ self,
191
+ url: str,
192
+ load_max_runs: int = None,
193
+ store_max_runs: int = None,
194
+ time_immemorial: Union[timedelta, str] = None,
195
+ ):
196
+ if isinstance(time_immemorial, str) and time_immemorial:
197
+ days, hours, minutes = [int(x) for x in time_immemorial.split(":")]
198
+ time_immemorial = timedelta(days=days, hours=hours, minutes=minutes)
199
+
200
+ self.engine = create_engine(url)
201
+ Base.metadata.create_all(self.engine)
202
+
203
+ self.store_max_runs = store_max_runs
204
+ self.time_immemorial = time_immemorial
205
+ self.previous_runs = self.load_runs(load_max_runs)
206
+
207
+ def save(self, run: Run):
208
+ """
209
+ Save the given run into the database.
210
+ """
211
+ with Session(self.engine) as session:
212
+ session.add(run)
213
+ if self.time_immemorial is not None:
214
+ expiry_date = datetime.now() - self.time_immemorial
215
+ for r in session.query(Run).filter(Run.created_at < (expiry_date - self.time_immemorial)):
216
+ session.delete(r)
217
+
218
+ if self.store_max_runs is not None:
219
+ for r in self.load_runs()[self.store_max_runs - 1 :]:
220
+ session.delete(r)
221
+ session.commit()
222
+ session.flush()
223
+
224
+ def load_runs(self, limit: int = None):
225
+ """
226
+ Load runs from the database.
227
+
228
+ :param limit: The maximum number of runs to return (these will be most recent runs).
229
+ """
230
+ with Session(self.engine) as session:
231
+ return session.scalars(select(Run).order_by(desc(Run.id)).limit(limit)).all()
File without changes
@@ -0,0 +1,50 @@
1
+ """
2
+ This module implements the FlakeFighter abstract class to be extended by concrete flakefighter classes.
3
+ Each of these is a microservice which takes a failed test and classifies that failure as either genuine or flaky.
4
+ This classification can be configured to either run "live" after each test, or as a postprocessing step on the entire
5
+ test suite after it completes.
6
+ If running live, detectors are run at the end of pytest_runtest_makereport.
7
+ If running as a postprocessing step, detectors are run at the start of pytest_sessionfinish.
8
+ """
9
+
10
+ from abc import ABC, abstractmethod
11
+
12
+ from pytest_flakefighters.database_management import Run, TestExecution
13
+
14
+
15
+ class FlakeFighter(ABC): # pylint: disable=R0903
16
+ """
17
+ Abstract base class for a FlakeFighter
18
+ :ivar run_live: Run detection "live" after each test. Otherwise run as a postprocessing step after the test suite.
19
+ """
20
+
21
+ def __init__(self, run_live: bool):
22
+ self.run_live = run_live
23
+
24
+ @classmethod
25
+ @abstractmethod
26
+ def from_config(cls, config: dict):
27
+ """
28
+ Factory method to create a new instance from a pytest configuration.
29
+ """
30
+
31
+ @abstractmethod
32
+ def flaky_test_live(self, execution: TestExecution):
33
+ """
34
+ Detect whether a given test execution is flaky and append the result to its `flakefighter_results` attribute.
35
+ :param execution: The test execution to classify.
36
+ """
37
+
38
+ @abstractmethod
39
+ def flaky_tests_post(self, run: Run):
40
+ """
41
+ Go through each test in the test suite and append the result to its `flakefighter_results` attribute.
42
+ :param run: Run object representing the pytest run, with tests accessible through run.tests.
43
+ """
44
+
45
+ @abstractmethod
46
+ def params(self) -> dict:
47
+ """
48
+ Convert the key parameters into a dictionary so that the object can be replicated.
49
+ :return A dictionary of the parameters used to create the object.
50
+ """
@@ -0,0 +1,99 @@
1
+ """
2
+ This module implements the CoverageIndependence FlakeFighter.
3
+ """
4
+
5
+ import pandas as pd
6
+ from scipy.cluster.hierarchy import fcluster, linkage
7
+ from scipy.spatial.distance import pdist
8
+
9
+ from pytest_flakefighters.database_management import (
10
+ FlakefighterResult,
11
+ Run,
12
+ TestExecution,
13
+ )
14
+ from pytest_flakefighters.flakefighters.abstract_flakefighter import FlakeFighter
15
+
16
+
17
+ class CoverageIndependence(FlakeFighter):
18
+ """
19
+ Classify tests as flaky if they fail independently of passing test cases that exercise overlapping code.
20
+
21
+ :ivar threshold: The minimum distance to consider as "similar", expressed as a proportion 0 <= threshold < 1 where 0
22
+ represents no difference and 1 represents complete difference.
23
+ :ivar metric: From `scipy.spatial.distance`: ['braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice',
24
+ 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',
25
+ 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'].
26
+ :ivar linkage_method: From `scipy.cluster.hierarchy.linkage`: ['single', 'complete', 'average', 'weighted',
27
+ 'centroid', 'median', 'ward']
28
+ """
29
+
30
+ def __init__(self, threshold: float = 0, metric: str = "jaccard", linkage_method="single"):
31
+ super().__init__(False)
32
+ self.threshold = threshold
33
+ self.metric = metric
34
+ self.linkage_method = linkage_method
35
+
36
+ @classmethod
37
+ def from_config(cls, config: dict):
38
+ """
39
+ Factory method to create a new instance from a pytest configuration.
40
+ """
41
+ return CoverageIndependence(
42
+ threshold=config.get("threshold", 0),
43
+ metric=config.get("metric", "jaccard"),
44
+ linkage_method=config.get("linkage_method", "single"),
45
+ )
46
+
47
+ def params(self):
48
+ """
49
+ Convert the key parameters into a dictionary so that the object can be replicated.
50
+ :return A dictionary of the parameters used to create the object.
51
+ """
52
+ return {"threshold": self.threshold, "metric": self.metric, "linkage_method": self.linkage_method}
53
+
54
+ def flaky_test_live(self, execution: TestExecution):
55
+ """
56
+ NOT SUPPORTED.
57
+ Detect whether a given test execution is flaky and append the result to its `flakefighter_results` attribute.
58
+ :param execution: The test execution to classify.
59
+ """
60
+ raise NotImplementedError("Coverage independence cannot be measured live")
61
+
62
+ def flaky_tests_post(self, run: Run):
63
+ """
64
+ Go through each test in the test suite and append the result to its `flakefighter_results` attribute.
65
+ :param run: Run object representing the pytest run, with tests accessible through run.tests.
66
+ """
67
+ coverage = []
68
+ # Enumerating tests and executions since they won't have IDs if they are not yet in the database
69
+ for test in run.tests:
70
+ for execution in test.executions:
71
+ coverage.append(
72
+ {"test": test, "execution": execution}
73
+ | {f"{file}:{line}": True for file in execution.coverage for line in execution.coverage[file]}
74
+ )
75
+
76
+ # Can't compute the pairwise distance of a single execution
77
+ if len(coverage) < 2:
78
+ return
79
+
80
+ coverage = pd.DataFrame(coverage)
81
+ coverage[coverage.columns.drop(["test", "execution"])] = (
82
+ coverage[coverage.columns.drop(["test", "execution"])].astype(pd.BooleanDtype()).fillna(False).astype(bool)
83
+ )
84
+ # Calculate the distance between each pair of test executions
85
+ raw_coverage = coverage.drop(["test", "execution"], axis=1).to_numpy()
86
+ distances = pdist(raw_coverage, metric=self.metric)
87
+ # Assign each test execution to a cluster
88
+ coverage["cluster"] = fcluster(
89
+ linkage(distances, method=self.linkage_method), t=self.threshold, criterion="distance"
90
+ )
91
+
92
+ for _, group in coverage.groupby("cluster"):
93
+ for test in group["test"]:
94
+ result = FlakefighterResult(
95
+ name=self.__class__.__name__,
96
+ flaky=len(set(map(lambda x: x.outcome, group["execution"]))) > 1,
97
+ )
98
+ if result not in test.flakefighter_results:
99
+ test.flakefighter_results.append(result)
@@ -0,0 +1,126 @@
1
+ """
2
+ This module implements the DeFlaker FlakeFighter.
3
+ """
4
+
5
+ import os
6
+
7
+ import git
8
+ from unidiff import PatchSet
9
+
10
+ from pytest_flakefighters.database_management import (
11
+ FlakefighterResult,
12
+ Run,
13
+ TestExecution,
14
+ )
15
+ from pytest_flakefighters.flakefighters.abstract_flakefighter import FlakeFighter
16
+
17
+
18
+ class DeFlaker(FlakeFighter):
19
+ """
20
+ A python equivalent of the DeFlaker algorithm from Bell et al. 2019 [10.1145/3180155.3180164].
21
+ Given the subtle differences between JUnit and pytest, this is not intended to be an exact port, but it follows
22
+ the same general methodology of checking whether covered code has been changed between commits.
23
+
24
+ :ivar root: The root directory of the Git repository.
25
+ :ivar source_commit: The source (older) commit hash. Defaults to HEAD^ (the previous commit to target).
26
+ :ivar target_commit: The target (newer) commit hash. Defaults to HEAD (the most recent commit).
27
+ """
28
+
29
+ def __init__(self, run_live: bool, root: str = ".", source_commit: str = None, target_commit: str = None):
30
+ super().__init__(run_live)
31
+
32
+ self.repo_root = git.Repo(root)
33
+ if target_commit is None and not self.repo_root.is_dirty():
34
+ # No uncommitted changes, so use most recent commit
35
+ self.target_commit = self.repo_root.commit().hexsha
36
+ else:
37
+ self.target_commit = target_commit
38
+ if source_commit is None:
39
+ if self.target_commit is None:
40
+ # If uncommitted changes, use most recent commit as source
41
+ self.source_commit = self.repo_root.commit().hexsha
42
+ else:
43
+ # If no uncommitted changes, use previous commit as source
44
+ parents = [
45
+ commit.hexsha
46
+ for commit in self.repo_root.commit(source_commit).iter_parents()
47
+ if commit.hexsha != self.target_commit
48
+ ]
49
+ self.source_commit = parents[0]
50
+ else:
51
+ self.source_commit = source_commit
52
+
53
+ patches = PatchSet(self.repo_root.git.diff(self.source_commit, self.target_commit, "-U0", "--no-prefix"))
54
+ self.lines_changed = {}
55
+ for patch in patches:
56
+ if patch.target_file == patch.source_file:
57
+ abspath = os.path.join(self.repo_root.working_dir, patch.source_file)
58
+ self.lines_changed[abspath] = []
59
+ for hunk in patch:
60
+ # Add each line in the hunk to lines_changed
61
+ self.lines_changed[abspath] += list(
62
+ range(hunk.target_start, hunk.target_start + hunk.target_length)
63
+ )
64
+
65
+ @classmethod
66
+ def from_config(cls, config: dict):
67
+ """
68
+ Factory method to create a new instance from a pytest configuration.
69
+ """
70
+ return DeFlaker(
71
+ run_live=config.get("run_live", True),
72
+ root=config.get("root", "."),
73
+ source_commit=config.get("source_commit"),
74
+ target_commit=config.get("target_commit"),
75
+ )
76
+
77
+ def params(self):
78
+ """
79
+ Convert the key parameters into a dictionary so that the object can be replicated.
80
+ :return A dictionary of the parameters used to create the object.
81
+ """
82
+ return {"root": self.repo_root, "source_commit": self.source_commit, "target_commit": self.target_commit}
83
+
84
+ def line_modified_by_target_commit(self, file_path: str, line_number: int) -> bool:
85
+ """
86
+ Returns true if the given line in the file has been modified by the present commit.
87
+
88
+ :param file_path: The file to check.
89
+ :param line_number: The line number to check.
90
+ """
91
+ return line_number in self.lines_changed.get(file_path, [])
92
+
93
+ def _flaky_execution(self, execution):
94
+ """
95
+ Classify an execution as flaky or not.
96
+ :return: Boolean True of the test is classed as flaky and False otherwise.
97
+ """
98
+ return not any(
99
+ execution.outcome == "failed" and self.line_modified_by_target_commit(file_path, line_number)
100
+ for file_path in execution.coverage
101
+ for line_number in execution.coverage[file_path]
102
+ if file_path in self.lines_changed
103
+ )
104
+
105
+ def flaky_test_live(self, execution: TestExecution):
106
+ """
107
+ Classify a failing test as flaky if it does not cover any code which has been changed between the source and
108
+ target commits.
109
+ :param execution: The test execution to classify.
110
+ """
111
+ execution.flakefighter_results.append(
112
+ FlakefighterResult(name=self.__class__.__name__, flaky=self._flaky_execution(execution))
113
+ )
114
+
115
+ def flaky_tests_post(self, run: Run) -> list[bool | None]:
116
+ """
117
+ Classify failing tests as flaky if any of their executions are flaky.
118
+ :param run: Run object representing the pytest run, with tests accessible through run.tests.
119
+ """
120
+ for test in run.tests:
121
+ test.flakefighter_results.append(
122
+ FlakefighterResult(
123
+ name=self.__class__.__name__,
124
+ flaky=any(self._flaky_execution(execution) for execution in test.executions),
125
+ )
126
+ )
@@ -0,0 +1,171 @@
1
+ """
2
+ This module implements three FlakeFighters based on failure de-duplication from Alshammari et. al.
3
+ [https://arxiv.org/pdf/2401.15788].
4
+ """
5
+
6
+ import os
7
+ import re
8
+
9
+ import pandas as pd
10
+ from sklearn.feature_extraction.text import TfidfVectorizer
11
+ from sklearn.metrics.pairwise import cosine_similarity
12
+
13
+ from pytest_flakefighters.database_management import (
14
+ FlakefighterResult,
15
+ Run,
16
+ TestExecution,
17
+ )
18
+ from pytest_flakefighters.flakefighters.abstract_flakefighter import FlakeFighter
19
+
20
+
21
+ class TracebackMatching(FlakeFighter):
22
+ """
23
+ Simple text-based matching classifier from Section II.A of [Alshammari et. al.].
24
+ We implement text-based matching on the failure logs for each test. Each failure log is represented by its failure
25
+ exception and stacktrace.
26
+ """
27
+
28
+ def __init__(self, run_live: bool, previous_runs: list[Run], root: str = "."):
29
+ super().__init__(run_live)
30
+ self.root = os.path.abspath(root)
31
+ self.previous_runs = previous_runs
32
+ print("TracebackMatching")
33
+
34
+ @classmethod
35
+ def from_config(cls, config: dict):
36
+ """
37
+ Factory method to create a new instance from a pytest configuration.
38
+ """
39
+ return TracebackMatching(
40
+ run_live=config.get("run_live", True),
41
+ previous_runs=config["database"].previous_runs,
42
+ root=config.get("root", "."),
43
+ )
44
+
45
+ def params(self):
46
+ """
47
+ Convert the key parameters into a dictionary so that the object can be replicated.
48
+ :return A dictionary of the parameters used to create the object.
49
+ """
50
+ return {"root": self.root}
51
+
52
+ def _flaky_execution(self, execution, previous_executions) -> bool:
53
+ """
54
+ Classify an execution as flaky if any of its failing executions has a traceback that matches a test previously
55
+ classed as flaky.
56
+ :return: Boolean True if the test is classed as flaky and False otherwise.
57
+ """
58
+ if not execution.exception:
59
+ return False
60
+ current_traceback = [
61
+ (os.path.relpath(e.path, self.root), e.lineno, e.colno, e.statement)
62
+ for e in execution.exception.traceback
63
+ if os.path.commonpath([self.root, e.path]) == self.root
64
+ ]
65
+ return any(e == current_traceback for e in previous_executions)
66
+
67
+ def previous_flaky_executions(self, runs: list[Run] = None) -> list:
68
+ """
69
+ Extract the relevant information from previous flaky executions and collapse into a single list.
70
+ :param runs: The runs to consider. Defaults to self.previous_runs.
71
+ :return: List containing the relative path, line number, column number, and code statement of all previous
72
+ test executions.
73
+ """
74
+ if runs is None:
75
+ runs = self.previous_runs
76
+ return [
77
+ [
78
+ (os.path.relpath(elem.path, run.root), elem.lineno, elem.colno, elem.statement)
79
+ for elem in execution.exception.traceback
80
+ ]
81
+ for run in runs
82
+ for test in run.tests
83
+ if test.flaky
84
+ for execution in test.executions
85
+ if execution.exception
86
+ ]
87
+
88
+ def flaky_test_live(self, execution: TestExecution):
89
+ """
90
+ Classify executions as flaky if they have the same failure logs as a flaky execution.
91
+ :param execution: Test execution to consider.
92
+ """
93
+ execution.flakefighter_results.append(
94
+ FlakefighterResult(
95
+ name=self.__class__.__name__,
96
+ flaky=self._flaky_execution(
97
+ execution,
98
+ self.previous_flaky_executions(),
99
+ ),
100
+ )
101
+ )
102
+
103
+ def flaky_tests_post(self, run: Run) -> list[bool | None]:
104
+ """
105
+ Classify failing executions as flaky if any if their executions are flaky.
106
+ :param run: Run object representing the pytest run, with tests accessible through run.tests.
107
+ """
108
+ for test in run.tests:
109
+ for execution in test.executions:
110
+ execution.flakefighter_results.append(
111
+ FlakefighterResult(
112
+ name=self.__class__.__name__,
113
+ flaky=self._flaky_execution(
114
+ execution, self.previous_flaky_executions(self.previous_runs + [run])
115
+ ),
116
+ )
117
+ )
118
+
119
+
120
+ class CosineSimilarity(TracebackMatching):
121
+ """
122
+ TF-IDF cosine similarity matching classifier from Section II.C of [Alshammari et. al.].
123
+ Test executions are classified as flaky if the stack trace is sufficiently similar to a previous flaky execution.
124
+ """
125
+
126
+ def __init__(self, run_live: bool, previous_runs: list[Run], root: str = ".", threshold: float = 1):
127
+ super().__init__(run_live, previous_runs, root)
128
+ self.root = os.path.abspath(root)
129
+ self.previous_runs = previous_runs
130
+ self.threshold = threshold
131
+
132
+ @classmethod
133
+ def from_config(cls, config: dict):
134
+ """
135
+ Factory method to create a new instance from a pytest configuration.
136
+ """
137
+ return CosineSimilarity(
138
+ run_live=config.get("run_live", True),
139
+ previous_runs=config["database"].previous_runs,
140
+ root=config.get("root", "."),
141
+ )
142
+
143
+ def _tf_idf_matrix(self, executions):
144
+ corpus = [
145
+ re.sub(r"[^\w\s]", " ", "\n".join([" ".join(map(str, tuple)) for tuple in execution]))
146
+ for execution in executions
147
+ ]
148
+ vectorizer = TfidfVectorizer()
149
+ tfidf_matrix = vectorizer.fit_transform(corpus)
150
+
151
+ feature_names = vectorizer.get_feature_names_out()
152
+ return pd.DataFrame(tfidf_matrix.toarray(), columns=feature_names)
153
+
154
+ def _flaky_execution(self, execution, previous_executions) -> bool:
155
+ """
156
+ Classify an execution as flaky if the test execution is sufficiently cosine-similar to any of the previous
157
+ executions.
158
+ :return: Boolean True if the test is classed as flaky and False otherwise.
159
+ """
160
+ if not execution.exception or not previous_executions:
161
+ return False
162
+
163
+ execution = [
164
+ (os.path.relpath(elem.path, self.root), elem.lineno, elem.colno, elem.statement)
165
+ for elem in execution.exception.traceback
166
+ ]
167
+
168
+ tf_idf_matrix = self._tf_idf_matrix([execution] + previous_executions)
169
+
170
+ similarity = cosine_similarity(tf_idf_matrix.iloc[0].values.reshape(1, -1), tf_idf_matrix.iloc[1:].values)
171
+ return (similarity >= self.threshold).any()