cddiagram 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Alberto Azzari
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,117 @@
1
+ Metadata-Version: 2.4
2
+ Name: cddiagram
3
+ Version: 0.0.1
4
+ Summary: Critical Difference diagram generator in pure Python
5
+ Author-email: Alberto Azzari <alberto.azzari@univr.it>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Alberto Azzari
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/albertoazzari/cd-diagram
29
+ Project-URL: Issues, https://github.com/albertoazzari/cd-diagram/issues
30
+ Project-URL: Source, https://github.com/albertoazzari/cd-diagram
31
+ Classifier: Programming Language :: Python :: 3
32
+ Classifier: License :: OSI Approved :: MIT License
33
+ Classifier: Operating System :: OS Independent
34
+ Requires-Python: >=3.12
35
+ Description-Content-Type: text/markdown
36
+ License-File: LICENSE
37
+ Requires-Dist: numpy
38
+ Requires-Dist: scipy
39
+ Provides-Extra: dev
40
+ Requires-Dist: build; extra == "dev"
41
+ Requires-Dist: pytest; extra == "dev"
42
+ Requires-Dist: twine; extra == "dev"
43
+ Dynamic: license-file
44
+
45
+ # cddiagram
46
+
47
+ A pure Python library for generating Critical Difference (CD) diagrams as SVG.
48
+
49
+ CD diagrams visualize the statistical comparison of multiple classifiers (or models) over multiple datasets, as introduced by Demsar (2006). They show the average rank of each model and connect groups of models whose performance differences are **not** statistically significant.
50
+
51
+ > J. Demsar, "Statistical Comparisons of Classifiers over Multiple Data Sets",
52
+ > *Journal of Machine Learning Research*, vol. 7, pp. 1-30, 2006.
53
+ > https://jmlr.org/papers/v7/demsar06a.html
54
+
55
+ ## How it works
56
+
57
+ 1. A **Friedman test** checks whether at least one model differs significantly from the others (at alpha = 0.05).
58
+ 2. If significant, the **Nemenyi post-hoc test** computes a critical distance (CD) threshold.
59
+ 3. Models whose average rank difference is less than CD are grouped together — they are not statistically distinguishable.
60
+ 4. The result is rendered as an SVG diagram showing ranked models and significance groups.
61
+
62
+ ## Install
63
+
64
+ ```bash
65
+ pip install cddiagram
66
+ ```
67
+
68
+ Requires Python 3.12+ and depends on `numpy` and `scipy`.
69
+
70
+ ## Usage
71
+
72
+ ### Write to file
73
+
74
+ ```python
75
+ import numpy as np
76
+ from cddiagram import draw_cd_diagram
77
+
78
+ rng = np.random.default_rng(1)
79
+
80
+ models = {
81
+ "model1": rng.normal(loc=0.2, scale=0.1, size=30),
82
+ "model2": rng.normal(loc=0.2, scale=0.1, size=30),
83
+ "model3": rng.normal(loc=0.4, scale=0.1, size=30),
84
+ "model4": rng.normal(loc=0.5, scale=0.1, size=30),
85
+ "model5": rng.normal(loc=0.7, scale=0.1, size=30),
86
+ "model6": rng.normal(loc=0.7, scale=0.1, size=30),
87
+ "model7": rng.normal(loc=0.8, scale=0.1, size=30),
88
+ "model8": rng.normal(loc=0.9, scale=0.1, size=30),
89
+ }
90
+
91
+ samples = np.column_stack(list(models.values()))
92
+ draw_cd_diagram(samples, labels=list(models.keys()), out_file="out.svg", title="Model comparison")
93
+ ```
94
+
95
+ <img src="./out.svg">
96
+
97
+ ### Non-significant results
98
+
99
+ If the Friedman test is not significant, the function issues a warning and returns `None` — no diagram is produced because the data does not support ranking the models.
100
+
101
+ ## API
102
+
103
+ ```python
104
+ draw_cd_diagram(
105
+ samples, # 2D array-like (rows=datasets, columns=models)
106
+ labels, # Sequence of model names (one per column)
107
+ title=None, # Optional diagram title
108
+ out_file=None, # Optional path to write SVG file
109
+ fig_size=None, # Optional (width, height) tuple in pixels
110
+ ) -> Element | None
111
+ ```
112
+
113
+ **Input formats**: NumPy arrays, pandas DataFrames, or any object with a `.to_numpy()` / `.values` attribute.
114
+
115
+ ## License
116
+
117
+ MIT
@@ -0,0 +1,73 @@
1
+ # cddiagram
2
+
3
+ A pure Python library for generating Critical Difference (CD) diagrams as SVG.
4
+
5
+ CD diagrams visualize the statistical comparison of multiple classifiers (or models) over multiple datasets, as introduced by Demsar (2006). They show the average rank of each model and connect groups of models whose performance differences are **not** statistically significant.
6
+
7
+ > J. Demsar, "Statistical Comparisons of Classifiers over Multiple Data Sets",
8
+ > *Journal of Machine Learning Research*, vol. 7, pp. 1-30, 2006.
9
+ > https://jmlr.org/papers/v7/demsar06a.html
10
+
11
+ ## How it works
12
+
13
+ 1. A **Friedman test** checks whether at least one model differs significantly from the others (at alpha = 0.05).
14
+ 2. If significant, the **Nemenyi post-hoc test** computes a critical distance (CD) threshold.
15
+ 3. Models whose average rank difference is less than CD are grouped together — they are not statistically distinguishable.
16
+ 4. The result is rendered as an SVG diagram showing ranked models and significance groups.
17
+
18
+ ## Install
19
+
20
+ ```bash
21
+ pip install cddiagram
22
+ ```
23
+
24
+ Requires Python 3.12+ and depends on `numpy` and `scipy`.
25
+
26
+ ## Usage
27
+
28
+ ### Write to file
29
+
30
+ ```python
31
+ import numpy as np
32
+ from cddiagram import draw_cd_diagram
33
+
34
+ rng = np.random.default_rng(1)
35
+
36
+ models = {
37
+ "model1": rng.normal(loc=0.2, scale=0.1, size=30),
38
+ "model2": rng.normal(loc=0.2, scale=0.1, size=30),
39
+ "model3": rng.normal(loc=0.4, scale=0.1, size=30),
40
+ "model4": rng.normal(loc=0.5, scale=0.1, size=30),
41
+ "model5": rng.normal(loc=0.7, scale=0.1, size=30),
42
+ "model6": rng.normal(loc=0.7, scale=0.1, size=30),
43
+ "model7": rng.normal(loc=0.8, scale=0.1, size=30),
44
+ "model8": rng.normal(loc=0.9, scale=0.1, size=30),
45
+ }
46
+
47
+ samples = np.column_stack(list(models.values()))
48
+ draw_cd_diagram(samples, labels=list(models.keys()), out_file="out.svg", title="Model comparison")
49
+ ```
50
+
51
+ <img src="./out.svg">
52
+
53
+ ### Non-significant results
54
+
55
+ If the Friedman test is not significant, the function issues a warning and returns `None` — no diagram is produced because the data does not support ranking the models.
56
+
57
+ ## API
58
+
59
+ ```python
60
+ draw_cd_diagram(
61
+ samples, # 2D array-like (rows=datasets, columns=models)
62
+ labels, # Sequence of model names (one per column)
63
+ title=None, # Optional diagram title
64
+ out_file=None, # Optional path to write SVG file
65
+ fig_size=None, # Optional (width, height) tuple in pixels
66
+ ) -> Element | None
67
+ ```
68
+
69
+ **Input formats**: NumPy arrays, pandas DataFrames, or any object with a `.to_numpy()` / `.values` attribute.
70
+
71
+ ## License
72
+
73
+ MIT
@@ -0,0 +1,45 @@
1
+ [project]
2
+ name = "cddiagram"
3
+ version = "0.0.1"
4
+ authors = [
5
+ { name="Alberto Azzari", email="alberto.azzari@univr.it" },
6
+ ]
7
+ description = "Critical Difference diagram generator in pure Python"
8
+ readme = "README.md"
9
+ license = {file = "LICENSE"}
10
+ requires-python = ">=3.12"
11
+
12
+ classifiers = [
13
+ "Programming Language :: Python :: 3",
14
+ "License :: OSI Approved :: MIT License",
15
+ "Operating System :: OS Independent",
16
+ ]
17
+
18
+ dependencies = [
19
+ "numpy",
20
+ "scipy",
21
+ ]
22
+
23
+ [project.optional-dependencies]
24
+ dev = [
25
+ "build",
26
+ "pytest",
27
+ "twine",
28
+ ]
29
+
30
+ [build-system]
31
+ requires = ["setuptools>=68", "wheel"]
32
+ build-backend = "setuptools.build_meta"
33
+
34
+ [tool.setuptools]
35
+ package-dir = {"" = "src"}
36
+ py-modules = ["cddiagram"]
37
+
38
+ [tool.pytest.ini_options]
39
+ testpaths = ["tests"]
40
+
41
+ [project.urls]
42
+ Homepage = "https://github.com/albertoazzari/cd-diagram"
43
+ Issues = "https://github.com/albertoazzari/cd-diagram/issues"
44
+ Source = "https://github.com/albertoazzari/cd-diagram"
45
+
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,117 @@
1
+ Metadata-Version: 2.4
2
+ Name: cddiagram
3
+ Version: 0.0.1
4
+ Summary: Critical Difference diagram generator in pure Python
5
+ Author-email: Alberto Azzari <alberto.azzari@univr.it>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Alberto Azzari
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/albertoazzari/cd-diagram
29
+ Project-URL: Issues, https://github.com/albertoazzari/cd-diagram/issues
30
+ Project-URL: Source, https://github.com/albertoazzari/cd-diagram
31
+ Classifier: Programming Language :: Python :: 3
32
+ Classifier: License :: OSI Approved :: MIT License
33
+ Classifier: Operating System :: OS Independent
34
+ Requires-Python: >=3.12
35
+ Description-Content-Type: text/markdown
36
+ License-File: LICENSE
37
+ Requires-Dist: numpy
38
+ Requires-Dist: scipy
39
+ Provides-Extra: dev
40
+ Requires-Dist: build; extra == "dev"
41
+ Requires-Dist: pytest; extra == "dev"
42
+ Requires-Dist: twine; extra == "dev"
43
+ Dynamic: license-file
44
+
45
+ # cddiagram
46
+
47
+ A pure Python library for generating Critical Difference (CD) diagrams as SVG.
48
+
49
+ CD diagrams visualize the statistical comparison of multiple classifiers (or models) over multiple datasets, as introduced by Demsar (2006). They show the average rank of each model and connect groups of models whose performance differences are **not** statistically significant.
50
+
51
+ > J. Demsar, "Statistical Comparisons of Classifiers over Multiple Data Sets",
52
+ > *Journal of Machine Learning Research*, vol. 7, pp. 1-30, 2006.
53
+ > https://jmlr.org/papers/v7/demsar06a.html
54
+
55
+ ## How it works
56
+
57
+ 1. A **Friedman test** checks whether at least one model differs significantly from the others (at alpha = 0.05).
58
+ 2. If significant, the **Nemenyi post-hoc test** computes a critical distance (CD) threshold.
59
+ 3. Models whose average rank difference is less than CD are grouped together — they are not statistically distinguishable.
60
+ 4. The result is rendered as an SVG diagram showing ranked models and significance groups.
61
+
62
+ ## Install
63
+
64
+ ```bash
65
+ pip install cddiagram
66
+ ```
67
+
68
+ Requires Python 3.12+ and depends on `numpy` and `scipy`.
69
+
70
+ ## Usage
71
+
72
+ ### Write to file
73
+
74
+ ```python
75
+ import numpy as np
76
+ from cddiagram import draw_cd_diagram
77
+
78
+ rng = np.random.default_rng(1)
79
+
80
+ models = {
81
+ "model1": rng.normal(loc=0.2, scale=0.1, size=30),
82
+ "model2": rng.normal(loc=0.2, scale=0.1, size=30),
83
+ "model3": rng.normal(loc=0.4, scale=0.1, size=30),
84
+ "model4": rng.normal(loc=0.5, scale=0.1, size=30),
85
+ "model5": rng.normal(loc=0.7, scale=0.1, size=30),
86
+ "model6": rng.normal(loc=0.7, scale=0.1, size=30),
87
+ "model7": rng.normal(loc=0.8, scale=0.1, size=30),
88
+ "model8": rng.normal(loc=0.9, scale=0.1, size=30),
89
+ }
90
+
91
+ samples = np.column_stack(list(models.values()))
92
+ draw_cd_diagram(samples, labels=list(models.keys()), out_file="out.svg", title="Model comparison")
93
+ ```
94
+
95
+ <img src="./out.svg">
96
+
97
+ ### Non-significant results
98
+
99
+ If the Friedman test is not significant, the function issues a warning and returns `None` — no diagram is produced because the data does not support ranking the models.
100
+
101
+ ## API
102
+
103
+ ```python
104
+ draw_cd_diagram(
105
+ samples, # 2D array-like (rows=datasets, columns=models)
106
+ labels, # Sequence of model names (one per column)
107
+ title=None, # Optional diagram title
108
+ out_file=None, # Optional path to write SVG file
109
+ fig_size=None, # Optional (width, height) tuple in pixels
110
+ ) -> Element | None
111
+ ```
112
+
113
+ **Input formats**: NumPy arrays, pandas DataFrames, or any object with a `.to_numpy()` / `.values` attribute.
114
+
115
+ ## License
116
+
117
+ MIT
@@ -0,0 +1,10 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ src/cddiagram.py
5
+ src/cddiagram.egg-info/PKG-INFO
6
+ src/cddiagram.egg-info/SOURCES.txt
7
+ src/cddiagram.egg-info/dependency_links.txt
8
+ src/cddiagram.egg-info/requires.txt
9
+ src/cddiagram.egg-info/top_level.txt
10
+ tests/test_cd_diagram.py
@@ -0,0 +1,7 @@
1
+ numpy
2
+ scipy
3
+
4
+ [dev]
5
+ build
6
+ pytest
7
+ twine
@@ -0,0 +1 @@
1
+ cddiagram
@@ -0,0 +1,300 @@
1
+ from __future__ import annotations
2
+
3
+ import warnings
4
+ from pathlib import Path
5
+ from typing import Sequence
6
+ from xml.etree import ElementTree as ET
7
+
8
+ import numpy as np
9
+
10
+
11
+ STROKE_WIDTH = 3.0
12
+ FONT_SIZE = 10
13
+ START_Y_PERC = 0.4
14
+
15
+ __all__ = ["draw_cd_diagram"]
16
+
17
+
18
+ def _get_relative_x(value: float, n_items: int, interval_len: float) -> float:
19
+ return ((n_items - value + 1.0) / n_items) * interval_len
20
+
21
+
22
+ def _svg_line(parent: ET.Element, x1: float, y1: float, x2: float, y2: float, *, color: str = "black", width: float = STROKE_WIDTH) -> None:
23
+ ET.SubElement(
24
+ parent,
25
+ "line",
26
+ {
27
+ "x1": f"{x1:.3f}",
28
+ "y1": f"{y1:.3f}",
29
+ "x2": f"{x2:.3f}",
30
+ "y2": f"{y2:.3f}",
31
+ "stroke": color,
32
+ "stroke-width": f"{width:.3f}",
33
+ "fill": "none",
34
+ },
35
+ )
36
+
37
+
38
+ def _svg_rect(parent: ET.Element, x: float, y: float, width: float, height: float, *, fill: str = "red") -> None:
39
+ ET.SubElement(
40
+ parent,
41
+ "rect",
42
+ {
43
+ "x": f"{x:.3f}",
44
+ "y": f"{y:.3f}",
45
+ "width": f"{width:.3f}",
46
+ "height": f"{height:.3f}",
47
+ "fill": fill,
48
+ },
49
+ )
50
+
51
+
52
+ def _svg_text(
53
+ parent: ET.Element,
54
+ text: str,
55
+ x: float,
56
+ y: float,
57
+ *,
58
+ anchor: str = "middle",
59
+ color: str = "black",
60
+ dominant_baseline: str | None = None,
61
+ ) -> None:
62
+ attrib = {
63
+ "x": f"{x:.3f}",
64
+ "y": f"{y:.3f}",
65
+ "font-size": str(FONT_SIZE),
66
+ "text-anchor": anchor,
67
+ "fill": color,
68
+ "stroke": color,
69
+ "stroke-width": "1",
70
+ }
71
+ if dominant_baseline is not None:
72
+ attrib["dominant-baseline"] = dominant_baseline
73
+
74
+ node = ET.SubElement(parent, "text", attrib)
75
+ node.text = text
76
+
77
+
78
+ def _draw_ruler(parent: ET.Element, n_items: int, width: int, height: int) -> None:
79
+ start_y = START_Y_PERC * height
80
+ start_x = 0.2 * width
81
+ end_x = 0.8 * width
82
+
83
+ _svg_line(parent, start_x, start_y, end_x, start_y)
84
+
85
+ n_lines = n_items * 2
86
+ step = (end_x - start_x) / n_lines
87
+ for i in range(n_lines + 1):
88
+ x = start_x + i * step
89
+ if i % 2 == 0:
90
+ bar_len = 0.05 * height
91
+ _svg_line(parent, x, start_y + STROKE_WIDTH / 2.0, x, start_y - bar_len)
92
+ number = n_items - (i // 2) + 1
93
+ _svg_text(parent, str(number), x, start_y - bar_len - FONT_SIZE)
94
+ else:
95
+ bar_len = 0.025 * height
96
+ _svg_line(parent, x, start_y + STROKE_WIDTH / 2.0, x, start_y - bar_len)
97
+
98
+
99
+ def _draw_models(parent: ET.Element, labels: list[str], avg_ranks: list[float], lowest_clique: float, width: int, height: int) -> None:
100
+ start_y = START_Y_PERC * height
101
+ start_x = 0.2 * width
102
+ end_x = 0.8 * width
103
+
104
+ half_count = len(labels) // 2
105
+ for i, (label, value) in enumerate(zip(labels, avg_ranks)):
106
+ x = start_x + _get_relative_x(value, len(labels), end_x - start_x)
107
+ color = "gray" if i % 2 == 0 else "black"
108
+
109
+ if i < half_count:
110
+ end_y = (
111
+ lowest_clique
112
+ + (i * (height - lowest_clique)) / (half_count + 1)
113
+ + FONT_SIZE / 2.0
114
+ + STROKE_WIDTH
115
+ )
116
+ _svg_line(parent, x, start_y, x, end_y, color=color, width=STROKE_WIDTH / 2.0)
117
+ _svg_line(parent, x, end_y, start_x - 0.01 * width, end_y, color=color, width=STROKE_WIDTH / 2.0)
118
+ _svg_text(
119
+ parent,
120
+ label,
121
+ start_x - 0.015 * width,
122
+ end_y,
123
+ anchor="end",
124
+ color=color,
125
+ dominant_baseline="middle",
126
+ )
127
+ else:
128
+ end_y = (
129
+ lowest_clique
130
+ + ((len(labels) - i - 1) * (height - lowest_clique)) / (half_count + 1)
131
+ + FONT_SIZE / 2.0
132
+ + STROKE_WIDTH
133
+ )
134
+ _svg_line(parent, x, start_y, x, end_y, color=color, width=STROKE_WIDTH / 2.0)
135
+ _svg_line(parent, x, end_y, end_x + 0.01 * width, end_y, color=color, width=STROKE_WIDTH / 2.0)
136
+ _svg_text(
137
+ parent,
138
+ label,
139
+ end_x + 0.015 * width,
140
+ end_y,
141
+ anchor="start",
142
+ color=color,
143
+ dominant_baseline="middle",
144
+ )
145
+
146
+
147
+ def _compute_cliques(
148
+ cd: float, avg_ranks: list[float], width: int, height: int,
149
+ ) -> tuple[list[tuple[float, float, float]], float]:
150
+ """Return (cliques, lowest_clique_y). Each clique is (x, y, length)."""
151
+ start_y = (START_Y_PERC - 0.15) * height
152
+ start_x = 0.2 * width
153
+ end_x = 0.8 * width
154
+ cd_len = (end_x - start_x) * cd / len(avg_ranks)
155
+
156
+ # CD reference bar
157
+ ref_x = start_x - cd_len / 2.0
158
+ ref_y = start_y + (0.01 * height)
159
+ cliques: list[tuple[float, float, float]] = [(ref_x, ref_y, cd_len)]
160
+ lowest_clique = ref_y
161
+
162
+ height_stride_perc = 1.0 / (len(avg_ranks) * 3)
163
+ cliques_start_y = (START_Y_PERC + 0.02) * height
164
+ h = 0
165
+ last_x2 = None
166
+ for i in range(len(avg_ranks) - 1, -1, -1):
167
+ count = 0
168
+ for j in range(i - 1, -1, -1):
169
+ if abs(avg_ranks[i] - avg_ranks[j]) < cd:
170
+ count += 1
171
+ else:
172
+ break
173
+ if count > 0:
174
+ x1 = start_x + _get_relative_x(avg_ranks[i], len(avg_ranks), end_x - start_x)
175
+ x2 = start_x + _get_relative_x(avg_ranks[i - count], len(avg_ranks), end_x - start_x)
176
+ if last_x2 is None or abs(last_x2 - x2) > 1e-9:
177
+ last_x2 = x2
178
+ y = cliques_start_y + height_stride_perc * (h * height)
179
+ cliques.append((x2, y, abs(x1 - x2)))
180
+ lowest_clique = max(lowest_clique, y)
181
+ h += 1
182
+
183
+ return cliques, lowest_clique
184
+
185
+
186
+ def _render_cliques(parent: ET.Element, cliques: list[tuple[float, float, float]]) -> None:
187
+ for x, y, length in cliques:
188
+ _svg_rect(parent, x - STROKE_WIDTH, y - STROKE_WIDTH / 2.0, STROKE_WIDTH, STROKE_WIDTH)
189
+ _svg_line(parent, x, y, x + length, y, color="red", width=STROKE_WIDTH / 2.0)
190
+ _svg_rect(parent, x + length, y - STROKE_WIDTH / 2.0, STROKE_WIDTH, STROKE_WIDTH)
191
+
192
+
193
+ def _render_cd_diagram(
194
+ cd: float,
195
+ avg_ranks: list[float],
196
+ labels: list[str],
197
+ title: str | None = None,
198
+ fig_size: tuple[int, int] | None = None,
199
+ ) -> ET.Element:
200
+ delta = 8
201
+ offset_height = 32
202
+ if fig_size is None:
203
+ width, height = 512, max(256, len(labels) * delta + offset_height)
204
+ else:
205
+ width, height = fig_size
206
+
207
+ ruler_step = 6
208
+ number = len(str(len(labels))) * ruler_step
209
+ min_ruler_width = number * len(labels)
210
+ width = max(width, int(min_ruler_width / 0.6))
211
+
212
+ svg = ET.Element(
213
+ "svg",
214
+ {
215
+ "xmlns": "http://www.w3.org/2000/svg",
216
+ "width": str(width),
217
+ "height": str(height),
218
+ "style": "background-color:white",
219
+ },
220
+ )
221
+
222
+ _svg_text(svg, title or "", width / 2.0, 0.1 * height, color="black")
223
+ _draw_ruler(svg, len(avg_ranks) - 1, width, height)
224
+
225
+ cliques, lowest_clique = _compute_cliques(cd, avg_ranks, width, height)
226
+ cd_label_y = (START_Y_PERC - 0.15) * height
227
+ _svg_text(svg, f"CD={cd:.2f}", 0.2 * width, cd_label_y)
228
+
229
+ _draw_models(svg, labels, avg_ranks, lowest_clique, width, height)
230
+ _render_cliques(svg, cliques)
231
+
232
+ return svg
233
+
234
+
235
+ def _to_numpy_2d(samples: object) -> np.ndarray:
236
+ if isinstance(samples, np.ndarray):
237
+ arr = samples.astype(float, copy=False)
238
+ elif hasattr(samples, "to_numpy"):
239
+ # Supports DataFrame-like objects without importing pandas.
240
+ arr = np.asarray(samples.to_numpy(), dtype=float)
241
+ elif hasattr(samples, "values"):
242
+ arr = np.asarray(samples.values, dtype=float)
243
+ else:
244
+ arr = np.asarray(samples, dtype=float)
245
+
246
+ if arr.ndim != 2:
247
+ raise ValueError("samples must be a 2D array-like object")
248
+ return arr
249
+
250
+
251
+ def draw_cd_diagram(
252
+ samples: object,
253
+ labels: Sequence[str],
254
+ title: str | None = None,
255
+ out_file: str | None = None,
256
+ fig_size: tuple[int, int] | None = None,
257
+ ) -> ET.Element | None:
258
+ alpha = 0.05
259
+
260
+ samples_ = _to_numpy_2d(samples)
261
+ labels_ = list(labels)
262
+
263
+ from scipy.stats import friedmanchisquare, rankdata
264
+
265
+ _, pvalue = friedmanchisquare(*samples_.T)
266
+ if pvalue >= alpha:
267
+ warnings.warn(
268
+ "The null hypothesis of the Friedman test cannot be rejected.",
269
+ stacklevel=2,
270
+ )
271
+ return None
272
+
273
+ N, k = samples_.shape
274
+ if len(labels_) != k:
275
+ raise ValueError("labels length must match number of model columns")
276
+ if k >= len(_QSTU_0_05) or np.isnan(_QSTU_0_05[k]):
277
+ raise ValueError(f"unsupported number of models for lookup table: {k}")
278
+
279
+ q_alpha = _QSTU_0_05[k]
280
+ cd = q_alpha * np.sqrt((k * (k + 1)) / (6 * N))
281
+
282
+ avg_ranks = rankdata(-samples_, axis=1, method="average").mean(axis=0)
283
+ sorted_indices = np.argsort(-avg_ranks)
284
+
285
+ svg = _render_cd_diagram(
286
+ cd,
287
+ avg_ranks[sorted_indices].tolist(),
288
+ [labels_[i] for i in sorted_indices],
289
+ title,
290
+ fig_size,
291
+ )
292
+
293
+ if out_file is not None:
294
+ tree = ET.ElementTree(svg)
295
+ tree.write(Path(out_file), encoding="utf-8", xml_declaration=True)
296
+
297
+ return svg
298
+
299
+
300
+ _QSTU_0_05 = (np.nan, np.nan, 1.959964233, 2.343700476, 2.569032073, 2.727774717, 2.849705382, 2.948319908, 3.030878867, 3.10173026, 3.16368342, 3.218653901, 3.268003591, 3.312738701, 3.353617959, 3.391230382, 3.426041249, 3.458424619, 3.488684546, 3.517072762, 3.543799277, 3.569040161, 3.592946027, 3.615646276, 3.637252631, 3.657860551, 3.677556303, 3.696413427, 3.71449839, 3.731869175, 3.748578108, 3.764671858, 3.780192852, 3.795178566, 3.809663649, 3.823679212, 3.837254248, 3.850413505, 3.863181025, 3.875578729, 3.887627121, 3.899344587, 3.910747391, 3.921852503, 3.932673359, 3.943224099, 3.953518159, 3.963566147, 3.973379375, 3.98296845, 3.992343271, 4.001512325, 4.010484803, 4.019267776, 4.02786973, 4.036297029, 4.044556036, 4.05265453, 4.060596753, 4.068389777, 4.076037844, 4.083547318, 4.090921028, 4.098166044, 4.105284488, 4.112282016, 4.119161458, 4.125927056, 4.132582345, 4.139131568, 4.145576139, 4.151921008, 4.158168297, 4.164320833, 4.170380738, 4.176352255, 4.182236797, 4.188036487, 4.19375486, 4.199392622, 4.204952603, 4.21043763, 4.215848411, 4.221187067, 4.22645572, 4.23165649, 4.236790793, 4.241859334, 4.246864943, 4.251809034, 4.256692313, 4.261516196, 4.266282802, 4.270992841, 4.275648432, 4.280249575, 4.284798393, 4.289294885, 4.29374188, 4.298139377, 4.302488791)
@@ -0,0 +1,67 @@
1
+ import warnings
2
+ from xml.etree import ElementTree as ET
3
+
4
+ import numpy as np
5
+ import pytest
6
+ from cddiagram import draw_cd_diagram
7
+
8
+
9
+ def _make_significant_samples() -> tuple[np.ndarray, list[str]]:
10
+ rng = np.random.default_rng(1)
11
+ labels = [f"model{i}" for i in range(1, 9)]
12
+ arr = np.column_stack(
13
+ [
14
+ rng.normal(loc=0.2, scale=0.1, size=30),
15
+ rng.normal(loc=0.2, scale=0.1, size=30),
16
+ rng.normal(loc=0.4, scale=0.1, size=30),
17
+ rng.normal(loc=0.5, scale=0.1, size=30),
18
+ rng.normal(loc=0.7, scale=0.1, size=30),
19
+ rng.normal(loc=0.7, scale=0.1, size=30),
20
+ rng.normal(loc=0.8, scale=0.1, size=30),
21
+ rng.normal(loc=0.9, scale=0.1, size=30),
22
+ ]
23
+ )
24
+ return arr, labels
25
+
26
+
27
+ def test_draw_cd_diagram_array(tmp_path):
28
+ samples, labels = _make_significant_samples()
29
+ out_file = tmp_path / "df.svg"
30
+
31
+ result = draw_cd_diagram(samples, labels=labels, out_file=str(out_file), title="TEST")
32
+
33
+ assert out_file.exists()
34
+ content = out_file.read_text(encoding="utf-8")
35
+ assert "<svg" in content
36
+ assert "CD=" in content
37
+ assert isinstance(result, ET.Element)
38
+
39
+
40
+ def test_draw_cd_diagram_in_memory():
41
+ samples, labels = _make_significant_samples()
42
+
43
+ result = draw_cd_diagram(samples, labels=labels)
44
+
45
+ assert isinstance(result, ET.Element)
46
+ svg_str = ET.tostring(result, encoding="unicode")
47
+ assert "<svg" in svg_str
48
+ assert "CD=" in svg_str
49
+
50
+
51
+ def test_draw_cd_diagram_non_significant(tmp_path):
52
+ rng = np.random.default_rng(7)
53
+ samples = np.column_stack(
54
+ [
55
+ rng.normal(loc=0.5, scale=0.1, size=30),
56
+ rng.normal(loc=0.5, scale=0.1, size=30),
57
+ rng.normal(loc=0.5, scale=0.1, size=30),
58
+ ]
59
+ )
60
+ out_file = tmp_path / "none.svg"
61
+ labels = ["model1", "model2", "model3"]
62
+
63
+ with pytest.warns(UserWarning, match="cannot be rejected"):
64
+ result = draw_cd_diagram(samples, labels=labels, out_file=str(out_file))
65
+
66
+ assert result is None
67
+ assert not out_file.exists()