pytest-split 0.9.0__tar.gz → 0.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pytest-split
3
- Version: 0.9.0
3
+ Version: 0.10.0
4
4
  Summary: Pytest plugin which splits the test suite to equally sized sub suites based on test execution time.
5
5
  Home-page: https://jerry-git.github.io/pytest-split
6
6
  License: MIT
@@ -18,6 +18,7 @@ Classifier: Programming Language :: Python :: 3.9
18
18
  Classifier: Programming Language :: Python :: 3.10
19
19
  Classifier: Programming Language :: Python :: 3.11
20
20
  Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
21
22
  Classifier: Programming Language :: Python :: 3.8
22
23
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
24
  Classifier: Typing :: Typed
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "pytest-split"
3
- version = "0.9.0"
3
+ version = "0.10.0"
4
4
  description = "Pytest plugin which splits the test suite to equally sized sub suites based on test execution time."
5
5
  authors = [
6
6
  "Jerry Pussinen <jerry.pussinen@gmail.com>",
@@ -23,6 +23,7 @@ classifiers = [
23
23
  "Programming Language :: Python :: 3.10",
24
24
  "Programming Language :: Python :: 3.11",
25
25
  "Programming Language :: Python :: 3.12",
26
+ "Programming Language :: Python :: 3.13",
26
27
  "Topic :: Software Development :: Libraries :: Python Modules",
27
28
  "Typing :: Typed",
28
29
  ]
@@ -0,0 +1,188 @@
1
+ import enum
2
+ import heapq
3
+ from abc import ABC, abstractmethod
4
+ from operator import itemgetter
5
+ from typing import TYPE_CHECKING, NamedTuple
6
+
7
+ if TYPE_CHECKING:
8
+ from typing import Dict, List, Tuple
9
+
10
+ from _pytest import nodes
11
+
12
+
13
+ class TestGroup(NamedTuple):
14
+ selected: "List[nodes.Item]"
15
+ deselected: "List[nodes.Item]"
16
+ duration: float
17
+
18
+
19
+ class AlgorithmBase(ABC):
20
+ """Abstract base class for the algorithm implementations."""
21
+
22
+ @abstractmethod
23
+ def __call__(
24
+ self, splits: int, items: "List[nodes.Item]", durations: "Dict[str, float]"
25
+ ) -> "List[TestGroup]":
26
+ pass
27
+
28
+ def __hash__(self) -> int:
29
+ return hash(self.__class__.__name__)
30
+
31
+ def __eq__(self, other: object) -> bool:
32
+ if not isinstance(other, AlgorithmBase):
33
+ return NotImplemented
34
+ return self.__class__.__name__ == other.__class__.__name__
35
+
36
+
37
+ class LeastDurationAlgorithm(AlgorithmBase):
38
+ """
39
+ Split tests into groups by runtime.
40
+ It walks the test items, starting with the test with largest duration.
41
+ It assigns the test with the largest runtime to the group with the smallest duration sum.
42
+
43
+ The algorithm sorts the items by their duration. Since the sorting algorithm is stable, ties will be broken by
44
+ maintaining the original order of items. It is therefore important that the order of items be identical on all nodes
45
+ that use this plugin. Due to issue #25 this might not always be the case.
46
+
47
+ :param splits: How many groups we're splitting in.
48
+ :param items: Test items passed down by Pytest.
49
+ :param durations: Our cached test runtimes. Assumes contains timings only of relevant tests
50
+ :return:
51
+ List of groups
52
+ """
53
+
54
+ def __call__(
55
+ self, splits: int, items: "List[nodes.Item]", durations: "Dict[str, float]"
56
+ ) -> "List[TestGroup]":
57
+ items_with_durations = _get_items_with_durations(items, durations)
58
+
59
+ # add index of item in list
60
+ items_with_durations_indexed = [
61
+ (*tup, i) for i, tup in enumerate(items_with_durations)
62
+ ]
63
+
64
+ # Sort by name to ensure it's always the same order
65
+ items_with_durations_indexed = sorted(
66
+ items_with_durations_indexed, key=lambda tup: str(tup[0])
67
+ )
68
+
69
+ # sort in ascending order
70
+ sorted_items_with_durations = sorted(
71
+ items_with_durations_indexed, key=lambda tup: tup[1], reverse=True
72
+ )
73
+
74
+ selected: List[List[Tuple[nodes.Item, int]]] = [[] for _ in range(splits)]
75
+ deselected: List[List[nodes.Item]] = [[] for _ in range(splits)]
76
+ duration: List[float] = [0 for _ in range(splits)]
77
+
78
+ # create a heap of the form (summed_durations, group_index)
79
+ heap: List[Tuple[float, int]] = [(0, i) for i in range(splits)]
80
+ heapq.heapify(heap)
81
+ for item, item_duration, original_index in sorted_items_with_durations:
82
+ # get group with smallest sum
83
+ summed_durations, group_idx = heapq.heappop(heap)
84
+ new_group_durations = summed_durations + item_duration
85
+
86
+ # store assignment
87
+ selected[group_idx].append((item, original_index))
88
+ duration[group_idx] = new_group_durations
89
+ for i in range(splits):
90
+ if i != group_idx:
91
+ deselected[i].append(item)
92
+
93
+ # store new duration - in case of ties it sorts by the group_idx
94
+ heapq.heappush(heap, (new_group_durations, group_idx))
95
+
96
+ groups = []
97
+ for i in range(splits):
98
+ # sort the items by their original index to maintain relative ordering
99
+ # we don't care about the order of deselected items
100
+ s = [
101
+ item
102
+ for item, original_index in sorted(selected[i], key=lambda tup: tup[1])
103
+ ]
104
+ group = TestGroup(
105
+ selected=s, deselected=deselected[i], duration=duration[i]
106
+ )
107
+ groups.append(group)
108
+ return groups
109
+
110
+
111
+ class DurationBasedChunksAlgorithm(AlgorithmBase):
112
+ """
113
+ Split tests into groups by runtime.
114
+ Ensures tests are split into non-overlapping groups.
115
+ The original list of test items is split into groups by finding boundary indices i_0, i_1, i_2
116
+ and creating group_1 = items[0:i_0], group_2 = items[i_0, i_1], group_3 = items[i_1, i_2], ...
117
+
118
+ :param splits: How many groups we're splitting in.
119
+ :param items: Test items passed down by Pytest.
120
+ :param durations: Our cached test runtimes. Assumes contains timings only of relevant tests
121
+ :return: List of TestGroup
122
+ """
123
+
124
+ def __call__(
125
+ self, splits: int, items: "List[nodes.Item]", durations: "Dict[str, float]"
126
+ ) -> "List[TestGroup]":
127
+ items_with_durations = _get_items_with_durations(items, durations)
128
+ time_per_group = sum(map(itemgetter(1), items_with_durations)) / splits
129
+
130
+ selected: List[List[nodes.Item]] = [[] for i in range(splits)]
131
+ deselected: List[List[nodes.Item]] = [[] for i in range(splits)]
132
+ duration: List[float] = [0 for i in range(splits)]
133
+
134
+ group_idx = 0
135
+ for item, item_duration in items_with_durations:
136
+ if duration[group_idx] >= time_per_group:
137
+ group_idx += 1
138
+
139
+ selected[group_idx].append(item)
140
+ for i in range(splits):
141
+ if i != group_idx:
142
+ deselected[i].append(item)
143
+ duration[group_idx] += item_duration
144
+
145
+ return [
146
+ TestGroup(
147
+ selected=selected[i], deselected=deselected[i], duration=duration[i]
148
+ )
149
+ for i in range(splits)
150
+ ]
151
+
152
+
153
+ def _get_items_with_durations(
154
+ items: "List[nodes.Item]", durations: "Dict[str, float]"
155
+ ) -> "List[Tuple[nodes.Item, float]]":
156
+ durations = _remove_irrelevant_durations(items, durations)
157
+ avg_duration_per_test = _get_avg_duration_per_test(durations)
158
+ items_with_durations = [
159
+ (item, durations.get(item.nodeid, avg_duration_per_test)) for item in items
160
+ ]
161
+ return items_with_durations
162
+
163
+
164
+ def _get_avg_duration_per_test(durations: "Dict[str, float]") -> float:
165
+ if durations:
166
+ avg_duration_per_test = sum(durations.values()) / len(durations)
167
+ else:
168
+ # If there are no durations, give every test the same arbitrary value
169
+ avg_duration_per_test = 1
170
+ return avg_duration_per_test
171
+
172
+
173
+ def _remove_irrelevant_durations(
174
+ items: "List[nodes.Item]", durations: "Dict[str, float]"
175
+ ) -> "Dict[str, float]":
176
+ # Filtering down durations to relevant ones ensures the avg isn't skewed by irrelevant data
177
+ test_ids = [item.nodeid for item in items]
178
+ durations = {name: durations[name] for name in test_ids if name in durations}
179
+ return durations
180
+
181
+
182
+ class Algorithms(enum.Enum):
183
+ duration_based_chunks = DurationBasedChunksAlgorithm()
184
+ least_duration = LeastDurationAlgorithm()
185
+
186
+ @staticmethod
187
+ def names() -> "List[str]":
188
+ return [x.name for x in Algorithms]
@@ -1,162 +0,0 @@
1
- import enum
2
- import functools
3
- import heapq
4
- from operator import itemgetter
5
- from typing import TYPE_CHECKING, NamedTuple
6
-
7
- if TYPE_CHECKING:
8
- from typing import Dict, List, Tuple
9
-
10
- from _pytest import nodes
11
-
12
-
13
- class TestGroup(NamedTuple):
14
- selected: "List[nodes.Item]"
15
- deselected: "List[nodes.Item]"
16
- duration: float
17
-
18
-
19
- def least_duration(
20
- splits: int, items: "List[nodes.Item]", durations: "Dict[str, float]"
21
- ) -> "List[TestGroup]":
22
- """
23
- Split tests into groups by runtime.
24
- It walks the test items, starting with the test with largest duration.
25
- It assigns the test with the largest runtime to the group with the smallest duration sum.
26
-
27
- The algorithm sorts the items by their duration. Since the sorting algorithm is stable, ties will be broken by
28
- maintaining the original order of items. It is therefore important that the order of items be identical on all nodes
29
- that use this plugin. Due to issue #25 this might not always be the case.
30
-
31
- :param splits: How many groups we're splitting in.
32
- :param items: Test items passed down by Pytest.
33
- :param durations: Our cached test runtimes. Assumes contains timings only of relevant tests
34
- :return:
35
- List of groups
36
- """
37
- items_with_durations = _get_items_with_durations(items, durations)
38
-
39
- # add index of item in list
40
- items_with_durations_indexed = [
41
- (*tup, i) for i, tup in enumerate(items_with_durations)
42
- ]
43
-
44
- # Sort by name to ensure it's always the same order
45
- items_with_durations_indexed = sorted(
46
- items_with_durations_indexed, key=lambda tup: str(tup[0])
47
- )
48
-
49
- # sort in ascending order
50
- sorted_items_with_durations = sorted(
51
- items_with_durations_indexed, key=lambda tup: tup[1], reverse=True
52
- )
53
-
54
- selected: List[List[Tuple[nodes.Item, int]]] = [[] for _ in range(splits)]
55
- deselected: List[List[nodes.Item]] = [[] for _ in range(splits)]
56
- duration: List[float] = [0 for _ in range(splits)]
57
-
58
- # create a heap of the form (summed_durations, group_index)
59
- heap: List[Tuple[float, int]] = [(0, i) for i in range(splits)]
60
- heapq.heapify(heap)
61
- for item, item_duration, original_index in sorted_items_with_durations:
62
- # get group with smallest sum
63
- summed_durations, group_idx = heapq.heappop(heap)
64
- new_group_durations = summed_durations + item_duration
65
-
66
- # store assignment
67
- selected[group_idx].append((item, original_index))
68
- duration[group_idx] = new_group_durations
69
- for i in range(splits):
70
- if i != group_idx:
71
- deselected[i].append(item)
72
-
73
- # store new duration - in case of ties it sorts by the group_idx
74
- heapq.heappush(heap, (new_group_durations, group_idx))
75
-
76
- groups = []
77
- for i in range(splits):
78
- # sort the items by their original index to maintain relative ordering
79
- # we don't care about the order of deselected items
80
- s = [
81
- item for item, original_index in sorted(selected[i], key=lambda tup: tup[1])
82
- ]
83
- group = TestGroup(selected=s, deselected=deselected[i], duration=duration[i])
84
- groups.append(group)
85
- return groups
86
-
87
-
88
- def duration_based_chunks(
89
- splits: int, items: "List[nodes.Item]", durations: "Dict[str, float]"
90
- ) -> "List[TestGroup]":
91
- """
92
- Split tests into groups by runtime.
93
- Ensures tests are split into non-overlapping groups.
94
- The original list of test items is split into groups by finding boundary indices i_0, i_1, i_2
95
- and creating group_1 = items[0:i_0], group_2 = items[i_0, i_1], group_3 = items[i_1, i_2], ...
96
-
97
- :param splits: How many groups we're splitting in.
98
- :param items: Test items passed down by Pytest.
99
- :param durations: Our cached test runtimes. Assumes contains timings only of relevant tests
100
- :return: List of TestGroup
101
- """
102
- items_with_durations = _get_items_with_durations(items, durations)
103
- time_per_group = sum(map(itemgetter(1), items_with_durations)) / splits
104
-
105
- selected: List[List[nodes.Item]] = [[] for i in range(splits)]
106
- deselected: List[List[nodes.Item]] = [[] for i in range(splits)]
107
- duration: List[float] = [0 for i in range(splits)]
108
-
109
- group_idx = 0
110
- for item, item_duration in items_with_durations:
111
- if duration[group_idx] >= time_per_group:
112
- group_idx += 1
113
-
114
- selected[group_idx].append(item)
115
- for i in range(splits):
116
- if i != group_idx:
117
- deselected[i].append(item)
118
- duration[group_idx] += item_duration
119
-
120
- return [
121
- TestGroup(selected=selected[i], deselected=deselected[i], duration=duration[i])
122
- for i in range(splits)
123
- ]
124
-
125
-
126
- def _get_items_with_durations(
127
- items: "List[nodes.Item]", durations: "Dict[str, float]"
128
- ) -> "List[Tuple[nodes.Item, float]]":
129
- durations = _remove_irrelevant_durations(items, durations)
130
- avg_duration_per_test = _get_avg_duration_per_test(durations)
131
- items_with_durations = [
132
- (item, durations.get(item.nodeid, avg_duration_per_test)) for item in items
133
- ]
134
- return items_with_durations
135
-
136
-
137
- def _get_avg_duration_per_test(durations: "Dict[str, float]") -> float:
138
- if durations:
139
- avg_duration_per_test = sum(durations.values()) / len(durations)
140
- else:
141
- # If there are no durations, give every test the same arbitrary value
142
- avg_duration_per_test = 1
143
- return avg_duration_per_test
144
-
145
-
146
- def _remove_irrelevant_durations(
147
- items: "List[nodes.Item]", durations: "Dict[str, float]"
148
- ) -> "Dict[str, float]":
149
- # Filtering down durations to relevant ones ensures the avg isn't skewed by irrelevant data
150
- test_ids = [item.nodeid for item in items]
151
- durations = {name: durations[name] for name in test_ids if name in durations}
152
- return durations
153
-
154
-
155
- class Algorithms(enum.Enum):
156
- # values have to wrapped inside functools to avoid them being considered method definitions
157
- duration_based_chunks = functools.partial(duration_based_chunks)
158
- least_duration = functools.partial(least_duration)
159
-
160
- @staticmethod
161
- def names() -> "List[str]":
162
- return [x.name for x in Algorithms]
File without changes
File without changes