cddiagram 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cddiagram-0.0.1/LICENSE +21 -0
- cddiagram-0.0.1/PKG-INFO +117 -0
- cddiagram-0.0.1/README.md +73 -0
- cddiagram-0.0.1/pyproject.toml +45 -0
- cddiagram-0.0.1/setup.cfg +4 -0
- cddiagram-0.0.1/src/cddiagram.egg-info/PKG-INFO +117 -0
- cddiagram-0.0.1/src/cddiagram.egg-info/SOURCES.txt +10 -0
- cddiagram-0.0.1/src/cddiagram.egg-info/dependency_links.txt +1 -0
- cddiagram-0.0.1/src/cddiagram.egg-info/requires.txt +7 -0
- cddiagram-0.0.1/src/cddiagram.egg-info/top_level.txt +1 -0
- cddiagram-0.0.1/src/cddiagram.py +300 -0
- cddiagram-0.0.1/tests/test_cd_diagram.py +67 -0
cddiagram-0.0.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Alberto Azzari
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
cddiagram-0.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cddiagram
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Critical Difference diagram generator in pure Python
|
|
5
|
+
Author-email: Alberto Azzari <alberto.azzari@univr.it>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 Alberto Azzari
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Homepage, https://github.com/albertoazzari/cd-diagram
|
|
29
|
+
Project-URL: Issues, https://github.com/albertoazzari/cd-diagram/issues
|
|
30
|
+
Project-URL: Source, https://github.com/albertoazzari/cd-diagram
|
|
31
|
+
Classifier: Programming Language :: Python :: 3
|
|
32
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
33
|
+
Classifier: Operating System :: OS Independent
|
|
34
|
+
Requires-Python: >=3.12
|
|
35
|
+
Description-Content-Type: text/markdown
|
|
36
|
+
License-File: LICENSE
|
|
37
|
+
Requires-Dist: numpy
|
|
38
|
+
Requires-Dist: scipy
|
|
39
|
+
Provides-Extra: dev
|
|
40
|
+
Requires-Dist: build; extra == "dev"
|
|
41
|
+
Requires-Dist: pytest; extra == "dev"
|
|
42
|
+
Requires-Dist: twine; extra == "dev"
|
|
43
|
+
Dynamic: license-file
|
|
44
|
+
|
|
45
|
+
# cddiagram
|
|
46
|
+
|
|
47
|
+
A pure Python library for generating Critical Difference (CD) diagrams as SVG.
|
|
48
|
+
|
|
49
|
+
CD diagrams visualize the statistical comparison of multiple classifiers (or models) over multiple datasets, as introduced by Demsar (2006). They show the average rank of each model and connect groups of models whose performance differences are **not** statistically significant.
|
|
50
|
+
|
|
51
|
+
> J. Demsar, "Statistical Comparisons of Classifiers over Multiple Data Sets",
|
|
52
|
+
> *Journal of Machine Learning Research*, vol. 7, pp. 1-30, 2006.
|
|
53
|
+
> https://jmlr.org/papers/v7/demsar06a.html
|
|
54
|
+
|
|
55
|
+
## How it works
|
|
56
|
+
|
|
57
|
+
1. A **Friedman test** checks whether at least one model differs significantly from the others (at alpha = 0.05).
|
|
58
|
+
2. If significant, the **Nemenyi post-hoc test** computes a critical distance (CD) threshold.
|
|
59
|
+
3. Models whose average rank difference is less than CD are grouped together — they are not statistically distinguishable.
|
|
60
|
+
4. The result is rendered as an SVG diagram showing ranked models and significance groups.
|
|
61
|
+
|
|
62
|
+
## Install
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
pip install cddiagram
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Requires Python 3.12+ and depends on `numpy` and `scipy`.
|
|
69
|
+
|
|
70
|
+
## Usage
|
|
71
|
+
|
|
72
|
+
### Write to file
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
import numpy as np
|
|
76
|
+
from cddiagram import draw_cd_diagram
|
|
77
|
+
|
|
78
|
+
rng = np.random.default_rng(1)
|
|
79
|
+
|
|
80
|
+
models = {
|
|
81
|
+
"model1": rng.normal(loc=0.2, scale=0.1, size=30),
|
|
82
|
+
"model2": rng.normal(loc=0.2, scale=0.1, size=30),
|
|
83
|
+
"model3": rng.normal(loc=0.4, scale=0.1, size=30),
|
|
84
|
+
"model4": rng.normal(loc=0.5, scale=0.1, size=30),
|
|
85
|
+
"model5": rng.normal(loc=0.7, scale=0.1, size=30),
|
|
86
|
+
"model6": rng.normal(loc=0.7, scale=0.1, size=30),
|
|
87
|
+
"model7": rng.normal(loc=0.8, scale=0.1, size=30),
|
|
88
|
+
"model8": rng.normal(loc=0.9, scale=0.1, size=30),
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
samples = np.column_stack(list(models.values()))
|
|
92
|
+
draw_cd_diagram(samples, labels=list(models.keys()), out_file="out.svg", title="Model comparison")
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
<img src="./out.svg">
|
|
96
|
+
|
|
97
|
+
### Non-significant results
|
|
98
|
+
|
|
99
|
+
If the Friedman test is not significant, the function issues a warning and returns `None` — no diagram is produced because the data does not support ranking the models.
|
|
100
|
+
|
|
101
|
+
## API
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
draw_cd_diagram(
|
|
105
|
+
samples, # 2D array-like (rows=datasets, columns=models)
|
|
106
|
+
labels, # Sequence of model names (one per column)
|
|
107
|
+
title=None, # Optional diagram title
|
|
108
|
+
out_file=None, # Optional path to write SVG file
|
|
109
|
+
fig_size=None, # Optional (width, height) tuple in pixels
|
|
110
|
+
) -> Element | None
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
**Input formats**: NumPy arrays, pandas DataFrames, or any object with a `.to_numpy()` / `.values` attribute.
|
|
114
|
+
|
|
115
|
+
## License
|
|
116
|
+
|
|
117
|
+
MIT
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# cddiagram
|
|
2
|
+
|
|
3
|
+
A pure Python library for generating Critical Difference (CD) diagrams as SVG.
|
|
4
|
+
|
|
5
|
+
CD diagrams visualize the statistical comparison of multiple classifiers (or models) over multiple datasets, as introduced by Demsar (2006). They show the average rank of each model and connect groups of models whose performance differences are **not** statistically significant.
|
|
6
|
+
|
|
7
|
+
> J. Demsar, "Statistical Comparisons of Classifiers over Multiple Data Sets",
|
|
8
|
+
> *Journal of Machine Learning Research*, vol. 7, pp. 1-30, 2006.
|
|
9
|
+
> https://jmlr.org/papers/v7/demsar06a.html
|
|
10
|
+
|
|
11
|
+
## How it works
|
|
12
|
+
|
|
13
|
+
1. A **Friedman test** checks whether at least one model differs significantly from the others (at alpha = 0.05).
|
|
14
|
+
2. If significant, the **Nemenyi post-hoc test** computes a critical distance (CD) threshold.
|
|
15
|
+
3. Models whose average rank difference is less than CD are grouped together — they are not statistically distinguishable.
|
|
16
|
+
4. The result is rendered as an SVG diagram showing ranked models and significance groups.
|
|
17
|
+
|
|
18
|
+
## Install
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
pip install cddiagram
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
Requires Python 3.12+ and depends on `numpy` and `scipy`.
|
|
25
|
+
|
|
26
|
+
## Usage
|
|
27
|
+
|
|
28
|
+
### Write to file
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import numpy as np
|
|
32
|
+
from cddiagram import draw_cd_diagram
|
|
33
|
+
|
|
34
|
+
rng = np.random.default_rng(1)
|
|
35
|
+
|
|
36
|
+
models = {
|
|
37
|
+
"model1": rng.normal(loc=0.2, scale=0.1, size=30),
|
|
38
|
+
"model2": rng.normal(loc=0.2, scale=0.1, size=30),
|
|
39
|
+
"model3": rng.normal(loc=0.4, scale=0.1, size=30),
|
|
40
|
+
"model4": rng.normal(loc=0.5, scale=0.1, size=30),
|
|
41
|
+
"model5": rng.normal(loc=0.7, scale=0.1, size=30),
|
|
42
|
+
"model6": rng.normal(loc=0.7, scale=0.1, size=30),
|
|
43
|
+
"model7": rng.normal(loc=0.8, scale=0.1, size=30),
|
|
44
|
+
"model8": rng.normal(loc=0.9, scale=0.1, size=30),
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
samples = np.column_stack(list(models.values()))
|
|
48
|
+
draw_cd_diagram(samples, labels=list(models.keys()), out_file="out.svg", title="Model comparison")
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
<img src="./out.svg">
|
|
52
|
+
|
|
53
|
+
### Non-significant results
|
|
54
|
+
|
|
55
|
+
If the Friedman test is not significant, the function issues a warning and returns `None` — no diagram is produced because the data does not support ranking the models.
|
|
56
|
+
|
|
57
|
+
## API
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
draw_cd_diagram(
|
|
61
|
+
samples, # 2D array-like (rows=datasets, columns=models)
|
|
62
|
+
labels, # Sequence of model names (one per column)
|
|
63
|
+
title=None, # Optional diagram title
|
|
64
|
+
out_file=None, # Optional path to write SVG file
|
|
65
|
+
fig_size=None, # Optional (width, height) tuple in pixels
|
|
66
|
+
) -> Element | None
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
**Input formats**: NumPy arrays, pandas DataFrames, or any object with a `.to_numpy()` / `.values` attribute.
|
|
70
|
+
|
|
71
|
+
## License
|
|
72
|
+
|
|
73
|
+
MIT
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "cddiagram"
|
|
3
|
+
version = "0.0.1"
|
|
4
|
+
authors = [
|
|
5
|
+
{ name="Alberto Azzari", email="alberto.azzari@univr.it" },
|
|
6
|
+
]
|
|
7
|
+
description = "Critical Difference diagram generator in pure Python"
|
|
8
|
+
readme = "README.md"
|
|
9
|
+
license = {file = "LICENSE"}
|
|
10
|
+
requires-python = ">=3.12"
|
|
11
|
+
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Programming Language :: Python :: 3",
|
|
14
|
+
"License :: OSI Approved :: MIT License",
|
|
15
|
+
"Operating System :: OS Independent",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
dependencies = [
|
|
19
|
+
"numpy",
|
|
20
|
+
"scipy",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[project.optional-dependencies]
|
|
24
|
+
dev = [
|
|
25
|
+
"build",
|
|
26
|
+
"pytest",
|
|
27
|
+
"twine",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[build-system]
|
|
31
|
+
requires = ["setuptools>=68", "wheel"]
|
|
32
|
+
build-backend = "setuptools.build_meta"
|
|
33
|
+
|
|
34
|
+
[tool.setuptools]
|
|
35
|
+
package-dir = {"" = "src"}
|
|
36
|
+
py-modules = ["cddiagram"]
|
|
37
|
+
|
|
38
|
+
[tool.pytest.ini_options]
|
|
39
|
+
testpaths = ["tests"]
|
|
40
|
+
|
|
41
|
+
[project.urls]
|
|
42
|
+
Homepage = "https://github.com/albertoazzari/cd-diagram"
|
|
43
|
+
Issues = "https://github.com/albertoazzari/cd-diagram/issues"
|
|
44
|
+
Source = "https://github.com/albertoazzari/cd-diagram"
|
|
45
|
+
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cddiagram
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Critical Difference diagram generator in pure Python
|
|
5
|
+
Author-email: Alberto Azzari <alberto.azzari@univr.it>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 Alberto Azzari
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Homepage, https://github.com/albertoazzari/cd-diagram
|
|
29
|
+
Project-URL: Issues, https://github.com/albertoazzari/cd-diagram/issues
|
|
30
|
+
Project-URL: Source, https://github.com/albertoazzari/cd-diagram
|
|
31
|
+
Classifier: Programming Language :: Python :: 3
|
|
32
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
33
|
+
Classifier: Operating System :: OS Independent
|
|
34
|
+
Requires-Python: >=3.12
|
|
35
|
+
Description-Content-Type: text/markdown
|
|
36
|
+
License-File: LICENSE
|
|
37
|
+
Requires-Dist: numpy
|
|
38
|
+
Requires-Dist: scipy
|
|
39
|
+
Provides-Extra: dev
|
|
40
|
+
Requires-Dist: build; extra == "dev"
|
|
41
|
+
Requires-Dist: pytest; extra == "dev"
|
|
42
|
+
Requires-Dist: twine; extra == "dev"
|
|
43
|
+
Dynamic: license-file
|
|
44
|
+
|
|
45
|
+
# cddiagram
|
|
46
|
+
|
|
47
|
+
A pure Python library for generating Critical Difference (CD) diagrams as SVG.
|
|
48
|
+
|
|
49
|
+
CD diagrams visualize the statistical comparison of multiple classifiers (or models) over multiple datasets, as introduced by Demsar (2006). They show the average rank of each model and connect groups of models whose performance differences are **not** statistically significant.
|
|
50
|
+
|
|
51
|
+
> J. Demsar, "Statistical Comparisons of Classifiers over Multiple Data Sets",
|
|
52
|
+
> *Journal of Machine Learning Research*, vol. 7, pp. 1-30, 2006.
|
|
53
|
+
> https://jmlr.org/papers/v7/demsar06a.html
|
|
54
|
+
|
|
55
|
+
## How it works
|
|
56
|
+
|
|
57
|
+
1. A **Friedman test** checks whether at least one model differs significantly from the others (at alpha = 0.05).
|
|
58
|
+
2. If significant, the **Nemenyi post-hoc test** computes a critical distance (CD) threshold.
|
|
59
|
+
3. Models whose average rank difference is less than CD are grouped together — they are not statistically distinguishable.
|
|
60
|
+
4. The result is rendered as an SVG diagram showing ranked models and significance groups.
|
|
61
|
+
|
|
62
|
+
## Install
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
pip install cddiagram
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Requires Python 3.12+ and depends on `numpy` and `scipy`.
|
|
69
|
+
|
|
70
|
+
## Usage
|
|
71
|
+
|
|
72
|
+
### Write to file
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
import numpy as np
|
|
76
|
+
from cddiagram import draw_cd_diagram
|
|
77
|
+
|
|
78
|
+
rng = np.random.default_rng(1)
|
|
79
|
+
|
|
80
|
+
models = {
|
|
81
|
+
"model1": rng.normal(loc=0.2, scale=0.1, size=30),
|
|
82
|
+
"model2": rng.normal(loc=0.2, scale=0.1, size=30),
|
|
83
|
+
"model3": rng.normal(loc=0.4, scale=0.1, size=30),
|
|
84
|
+
"model4": rng.normal(loc=0.5, scale=0.1, size=30),
|
|
85
|
+
"model5": rng.normal(loc=0.7, scale=0.1, size=30),
|
|
86
|
+
"model6": rng.normal(loc=0.7, scale=0.1, size=30),
|
|
87
|
+
"model7": rng.normal(loc=0.8, scale=0.1, size=30),
|
|
88
|
+
"model8": rng.normal(loc=0.9, scale=0.1, size=30),
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
samples = np.column_stack(list(models.values()))
|
|
92
|
+
draw_cd_diagram(samples, labels=list(models.keys()), out_file="out.svg", title="Model comparison")
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
<img src="./out.svg">
|
|
96
|
+
|
|
97
|
+
### Non-significant results
|
|
98
|
+
|
|
99
|
+
If the Friedman test is not significant, the function issues a warning and returns `None` — no diagram is produced because the data does not support ranking the models.
|
|
100
|
+
|
|
101
|
+
## API
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
draw_cd_diagram(
|
|
105
|
+
samples, # 2D array-like (rows=datasets, columns=models)
|
|
106
|
+
labels, # Sequence of model names (one per column)
|
|
107
|
+
title=None, # Optional diagram title
|
|
108
|
+
out_file=None, # Optional path to write SVG file
|
|
109
|
+
fig_size=None, # Optional (width, height) tuple in pixels
|
|
110
|
+
) -> Element | None
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
**Input formats**: NumPy arrays, pandas DataFrames, or any object with a `.to_numpy()` / `.values` attribute.
|
|
114
|
+
|
|
115
|
+
## License
|
|
116
|
+
|
|
117
|
+
MIT
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/cddiagram.py
|
|
5
|
+
src/cddiagram.egg-info/PKG-INFO
|
|
6
|
+
src/cddiagram.egg-info/SOURCES.txt
|
|
7
|
+
src/cddiagram.egg-info/dependency_links.txt
|
|
8
|
+
src/cddiagram.egg-info/requires.txt
|
|
9
|
+
src/cddiagram.egg-info/top_level.txt
|
|
10
|
+
tests/test_cd_diagram.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
cddiagram
|
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import warnings
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Sequence
|
|
6
|
+
from xml.etree import ElementTree as ET
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
STROKE_WIDTH = 3.0
|
|
12
|
+
FONT_SIZE = 10
|
|
13
|
+
START_Y_PERC = 0.4
|
|
14
|
+
|
|
15
|
+
__all__ = ["draw_cd_diagram"]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _get_relative_x(value: float, n_items: int, interval_len: float) -> float:
|
|
19
|
+
return ((n_items - value + 1.0) / n_items) * interval_len
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _svg_line(parent: ET.Element, x1: float, y1: float, x2: float, y2: float, *, color: str = "black", width: float = STROKE_WIDTH) -> None:
|
|
23
|
+
ET.SubElement(
|
|
24
|
+
parent,
|
|
25
|
+
"line",
|
|
26
|
+
{
|
|
27
|
+
"x1": f"{x1:.3f}",
|
|
28
|
+
"y1": f"{y1:.3f}",
|
|
29
|
+
"x2": f"{x2:.3f}",
|
|
30
|
+
"y2": f"{y2:.3f}",
|
|
31
|
+
"stroke": color,
|
|
32
|
+
"stroke-width": f"{width:.3f}",
|
|
33
|
+
"fill": "none",
|
|
34
|
+
},
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _svg_rect(parent: ET.Element, x: float, y: float, width: float, height: float, *, fill: str = "red") -> None:
|
|
39
|
+
ET.SubElement(
|
|
40
|
+
parent,
|
|
41
|
+
"rect",
|
|
42
|
+
{
|
|
43
|
+
"x": f"{x:.3f}",
|
|
44
|
+
"y": f"{y:.3f}",
|
|
45
|
+
"width": f"{width:.3f}",
|
|
46
|
+
"height": f"{height:.3f}",
|
|
47
|
+
"fill": fill,
|
|
48
|
+
},
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _svg_text(
|
|
53
|
+
parent: ET.Element,
|
|
54
|
+
text: str,
|
|
55
|
+
x: float,
|
|
56
|
+
y: float,
|
|
57
|
+
*,
|
|
58
|
+
anchor: str = "middle",
|
|
59
|
+
color: str = "black",
|
|
60
|
+
dominant_baseline: str | None = None,
|
|
61
|
+
) -> None:
|
|
62
|
+
attrib = {
|
|
63
|
+
"x": f"{x:.3f}",
|
|
64
|
+
"y": f"{y:.3f}",
|
|
65
|
+
"font-size": str(FONT_SIZE),
|
|
66
|
+
"text-anchor": anchor,
|
|
67
|
+
"fill": color,
|
|
68
|
+
"stroke": color,
|
|
69
|
+
"stroke-width": "1",
|
|
70
|
+
}
|
|
71
|
+
if dominant_baseline is not None:
|
|
72
|
+
attrib["dominant-baseline"] = dominant_baseline
|
|
73
|
+
|
|
74
|
+
node = ET.SubElement(parent, "text", attrib)
|
|
75
|
+
node.text = text
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _draw_ruler(parent: ET.Element, n_items: int, width: int, height: int) -> None:
|
|
79
|
+
start_y = START_Y_PERC * height
|
|
80
|
+
start_x = 0.2 * width
|
|
81
|
+
end_x = 0.8 * width
|
|
82
|
+
|
|
83
|
+
_svg_line(parent, start_x, start_y, end_x, start_y)
|
|
84
|
+
|
|
85
|
+
n_lines = n_items * 2
|
|
86
|
+
step = (end_x - start_x) / n_lines
|
|
87
|
+
for i in range(n_lines + 1):
|
|
88
|
+
x = start_x + i * step
|
|
89
|
+
if i % 2 == 0:
|
|
90
|
+
bar_len = 0.05 * height
|
|
91
|
+
_svg_line(parent, x, start_y + STROKE_WIDTH / 2.0, x, start_y - bar_len)
|
|
92
|
+
number = n_items - (i // 2) + 1
|
|
93
|
+
_svg_text(parent, str(number), x, start_y - bar_len - FONT_SIZE)
|
|
94
|
+
else:
|
|
95
|
+
bar_len = 0.025 * height
|
|
96
|
+
_svg_line(parent, x, start_y + STROKE_WIDTH / 2.0, x, start_y - bar_len)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _draw_models(parent: ET.Element, labels: list[str], avg_ranks: list[float], lowest_clique: float, width: int, height: int) -> None:
|
|
100
|
+
start_y = START_Y_PERC * height
|
|
101
|
+
start_x = 0.2 * width
|
|
102
|
+
end_x = 0.8 * width
|
|
103
|
+
|
|
104
|
+
half_count = len(labels) // 2
|
|
105
|
+
for i, (label, value) in enumerate(zip(labels, avg_ranks)):
|
|
106
|
+
x = start_x + _get_relative_x(value, len(labels), end_x - start_x)
|
|
107
|
+
color = "gray" if i % 2 == 0 else "black"
|
|
108
|
+
|
|
109
|
+
if i < half_count:
|
|
110
|
+
end_y = (
|
|
111
|
+
lowest_clique
|
|
112
|
+
+ (i * (height - lowest_clique)) / (half_count + 1)
|
|
113
|
+
+ FONT_SIZE / 2.0
|
|
114
|
+
+ STROKE_WIDTH
|
|
115
|
+
)
|
|
116
|
+
_svg_line(parent, x, start_y, x, end_y, color=color, width=STROKE_WIDTH / 2.0)
|
|
117
|
+
_svg_line(parent, x, end_y, start_x - 0.01 * width, end_y, color=color, width=STROKE_WIDTH / 2.0)
|
|
118
|
+
_svg_text(
|
|
119
|
+
parent,
|
|
120
|
+
label,
|
|
121
|
+
start_x - 0.015 * width,
|
|
122
|
+
end_y,
|
|
123
|
+
anchor="end",
|
|
124
|
+
color=color,
|
|
125
|
+
dominant_baseline="middle",
|
|
126
|
+
)
|
|
127
|
+
else:
|
|
128
|
+
end_y = (
|
|
129
|
+
lowest_clique
|
|
130
|
+
+ ((len(labels) - i - 1) * (height - lowest_clique)) / (half_count + 1)
|
|
131
|
+
+ FONT_SIZE / 2.0
|
|
132
|
+
+ STROKE_WIDTH
|
|
133
|
+
)
|
|
134
|
+
_svg_line(parent, x, start_y, x, end_y, color=color, width=STROKE_WIDTH / 2.0)
|
|
135
|
+
_svg_line(parent, x, end_y, end_x + 0.01 * width, end_y, color=color, width=STROKE_WIDTH / 2.0)
|
|
136
|
+
_svg_text(
|
|
137
|
+
parent,
|
|
138
|
+
label,
|
|
139
|
+
end_x + 0.015 * width,
|
|
140
|
+
end_y,
|
|
141
|
+
anchor="start",
|
|
142
|
+
color=color,
|
|
143
|
+
dominant_baseline="middle",
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _compute_cliques(
|
|
148
|
+
cd: float, avg_ranks: list[float], width: int, height: int,
|
|
149
|
+
) -> tuple[list[tuple[float, float, float]], float]:
|
|
150
|
+
"""Return (cliques, lowest_clique_y). Each clique is (x, y, length)."""
|
|
151
|
+
start_y = (START_Y_PERC - 0.15) * height
|
|
152
|
+
start_x = 0.2 * width
|
|
153
|
+
end_x = 0.8 * width
|
|
154
|
+
cd_len = (end_x - start_x) * cd / len(avg_ranks)
|
|
155
|
+
|
|
156
|
+
# CD reference bar
|
|
157
|
+
ref_x = start_x - cd_len / 2.0
|
|
158
|
+
ref_y = start_y + (0.01 * height)
|
|
159
|
+
cliques: list[tuple[float, float, float]] = [(ref_x, ref_y, cd_len)]
|
|
160
|
+
lowest_clique = ref_y
|
|
161
|
+
|
|
162
|
+
height_stride_perc = 1.0 / (len(avg_ranks) * 3)
|
|
163
|
+
cliques_start_y = (START_Y_PERC + 0.02) * height
|
|
164
|
+
h = 0
|
|
165
|
+
last_x2 = None
|
|
166
|
+
for i in range(len(avg_ranks) - 1, -1, -1):
|
|
167
|
+
count = 0
|
|
168
|
+
for j in range(i - 1, -1, -1):
|
|
169
|
+
if abs(avg_ranks[i] - avg_ranks[j]) < cd:
|
|
170
|
+
count += 1
|
|
171
|
+
else:
|
|
172
|
+
break
|
|
173
|
+
if count > 0:
|
|
174
|
+
x1 = start_x + _get_relative_x(avg_ranks[i], len(avg_ranks), end_x - start_x)
|
|
175
|
+
x2 = start_x + _get_relative_x(avg_ranks[i - count], len(avg_ranks), end_x - start_x)
|
|
176
|
+
if last_x2 is None or abs(last_x2 - x2) > 1e-9:
|
|
177
|
+
last_x2 = x2
|
|
178
|
+
y = cliques_start_y + height_stride_perc * (h * height)
|
|
179
|
+
cliques.append((x2, y, abs(x1 - x2)))
|
|
180
|
+
lowest_clique = max(lowest_clique, y)
|
|
181
|
+
h += 1
|
|
182
|
+
|
|
183
|
+
return cliques, lowest_clique
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _render_cliques(parent: ET.Element, cliques: list[tuple[float, float, float]]) -> None:
|
|
187
|
+
for x, y, length in cliques:
|
|
188
|
+
_svg_rect(parent, x - STROKE_WIDTH, y - STROKE_WIDTH / 2.0, STROKE_WIDTH, STROKE_WIDTH)
|
|
189
|
+
_svg_line(parent, x, y, x + length, y, color="red", width=STROKE_WIDTH / 2.0)
|
|
190
|
+
_svg_rect(parent, x + length, y - STROKE_WIDTH / 2.0, STROKE_WIDTH, STROKE_WIDTH)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _render_cd_diagram(
|
|
194
|
+
cd: float,
|
|
195
|
+
avg_ranks: list[float],
|
|
196
|
+
labels: list[str],
|
|
197
|
+
title: str | None = None,
|
|
198
|
+
fig_size: tuple[int, int] | None = None,
|
|
199
|
+
) -> ET.Element:
|
|
200
|
+
delta = 8
|
|
201
|
+
offset_height = 32
|
|
202
|
+
if fig_size is None:
|
|
203
|
+
width, height = 512, max(256, len(labels) * delta + offset_height)
|
|
204
|
+
else:
|
|
205
|
+
width, height = fig_size
|
|
206
|
+
|
|
207
|
+
ruler_step = 6
|
|
208
|
+
number = len(str(len(labels))) * ruler_step
|
|
209
|
+
min_ruler_width = number * len(labels)
|
|
210
|
+
width = max(width, int(min_ruler_width / 0.6))
|
|
211
|
+
|
|
212
|
+
svg = ET.Element(
|
|
213
|
+
"svg",
|
|
214
|
+
{
|
|
215
|
+
"xmlns": "http://www.w3.org/2000/svg",
|
|
216
|
+
"width": str(width),
|
|
217
|
+
"height": str(height),
|
|
218
|
+
"style": "background-color:white",
|
|
219
|
+
},
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
_svg_text(svg, title or "", width / 2.0, 0.1 * height, color="black")
|
|
223
|
+
_draw_ruler(svg, len(avg_ranks) - 1, width, height)
|
|
224
|
+
|
|
225
|
+
cliques, lowest_clique = _compute_cliques(cd, avg_ranks, width, height)
|
|
226
|
+
cd_label_y = (START_Y_PERC - 0.15) * height
|
|
227
|
+
_svg_text(svg, f"CD={cd:.2f}", 0.2 * width, cd_label_y)
|
|
228
|
+
|
|
229
|
+
_draw_models(svg, labels, avg_ranks, lowest_clique, width, height)
|
|
230
|
+
_render_cliques(svg, cliques)
|
|
231
|
+
|
|
232
|
+
return svg
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _to_numpy_2d(samples: object) -> np.ndarray:
|
|
236
|
+
if isinstance(samples, np.ndarray):
|
|
237
|
+
arr = samples.astype(float, copy=False)
|
|
238
|
+
elif hasattr(samples, "to_numpy"):
|
|
239
|
+
# Supports DataFrame-like objects without importing pandas.
|
|
240
|
+
arr = np.asarray(samples.to_numpy(), dtype=float)
|
|
241
|
+
elif hasattr(samples, "values"):
|
|
242
|
+
arr = np.asarray(samples.values, dtype=float)
|
|
243
|
+
else:
|
|
244
|
+
arr = np.asarray(samples, dtype=float)
|
|
245
|
+
|
|
246
|
+
if arr.ndim != 2:
|
|
247
|
+
raise ValueError("samples must be a 2D array-like object")
|
|
248
|
+
return arr
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def draw_cd_diagram(
|
|
252
|
+
samples: object,
|
|
253
|
+
labels: Sequence[str],
|
|
254
|
+
title: str | None = None,
|
|
255
|
+
out_file: str | None = None,
|
|
256
|
+
fig_size: tuple[int, int] | None = None,
|
|
257
|
+
) -> ET.Element | None:
|
|
258
|
+
alpha = 0.05
|
|
259
|
+
|
|
260
|
+
samples_ = _to_numpy_2d(samples)
|
|
261
|
+
labels_ = list(labels)
|
|
262
|
+
|
|
263
|
+
from scipy.stats import friedmanchisquare, rankdata
|
|
264
|
+
|
|
265
|
+
_, pvalue = friedmanchisquare(*samples_.T)
|
|
266
|
+
if pvalue >= alpha:
|
|
267
|
+
warnings.warn(
|
|
268
|
+
"The null hypothesis of the Friedman test cannot be rejected.",
|
|
269
|
+
stacklevel=2,
|
|
270
|
+
)
|
|
271
|
+
return None
|
|
272
|
+
|
|
273
|
+
N, k = samples_.shape
|
|
274
|
+
if len(labels_) != k:
|
|
275
|
+
raise ValueError("labels length must match number of model columns")
|
|
276
|
+
if k >= len(_QSTU_0_05) or np.isnan(_QSTU_0_05[k]):
|
|
277
|
+
raise ValueError(f"unsupported number of models for lookup table: {k}")
|
|
278
|
+
|
|
279
|
+
q_alpha = _QSTU_0_05[k]
|
|
280
|
+
cd = q_alpha * np.sqrt((k * (k + 1)) / (6 * N))
|
|
281
|
+
|
|
282
|
+
avg_ranks = rankdata(-samples_, axis=1, method="average").mean(axis=0)
|
|
283
|
+
sorted_indices = np.argsort(-avg_ranks)
|
|
284
|
+
|
|
285
|
+
svg = _render_cd_diagram(
|
|
286
|
+
cd,
|
|
287
|
+
avg_ranks[sorted_indices].tolist(),
|
|
288
|
+
[labels_[i] for i in sorted_indices],
|
|
289
|
+
title,
|
|
290
|
+
fig_size,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
if out_file is not None:
|
|
294
|
+
tree = ET.ElementTree(svg)
|
|
295
|
+
tree.write(Path(out_file), encoding="utf-8", xml_declaration=True)
|
|
296
|
+
|
|
297
|
+
return svg
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
_QSTU_0_05 = (np.nan, np.nan, 1.959964233, 2.343700476, 2.569032073, 2.727774717, 2.849705382, 2.948319908, 3.030878867, 3.10173026, 3.16368342, 3.218653901, 3.268003591, 3.312738701, 3.353617959, 3.391230382, 3.426041249, 3.458424619, 3.488684546, 3.517072762, 3.543799277, 3.569040161, 3.592946027, 3.615646276, 3.637252631, 3.657860551, 3.677556303, 3.696413427, 3.71449839, 3.731869175, 3.748578108, 3.764671858, 3.780192852, 3.795178566, 3.809663649, 3.823679212, 3.837254248, 3.850413505, 3.863181025, 3.875578729, 3.887627121, 3.899344587, 3.910747391, 3.921852503, 3.932673359, 3.943224099, 3.953518159, 3.963566147, 3.973379375, 3.98296845, 3.992343271, 4.001512325, 4.010484803, 4.019267776, 4.02786973, 4.036297029, 4.044556036, 4.05265453, 4.060596753, 4.068389777, 4.076037844, 4.083547318, 4.090921028, 4.098166044, 4.105284488, 4.112282016, 4.119161458, 4.125927056, 4.132582345, 4.139131568, 4.145576139, 4.151921008, 4.158168297, 4.164320833, 4.170380738, 4.176352255, 4.182236797, 4.188036487, 4.19375486, 4.199392622, 4.204952603, 4.21043763, 4.215848411, 4.221187067, 4.22645572, 4.23165649, 4.236790793, 4.241859334, 4.246864943, 4.251809034, 4.256692313, 4.261516196, 4.266282802, 4.270992841, 4.275648432, 4.280249575, 4.284798393, 4.289294885, 4.29374188, 4.298139377, 4.302488791)
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
from xml.etree import ElementTree as ET
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pytest
|
|
6
|
+
from cddiagram import draw_cd_diagram
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _make_significant_samples() -> tuple[np.ndarray, list[str]]:
|
|
10
|
+
rng = np.random.default_rng(1)
|
|
11
|
+
labels = [f"model{i}" for i in range(1, 9)]
|
|
12
|
+
arr = np.column_stack(
|
|
13
|
+
[
|
|
14
|
+
rng.normal(loc=0.2, scale=0.1, size=30),
|
|
15
|
+
rng.normal(loc=0.2, scale=0.1, size=30),
|
|
16
|
+
rng.normal(loc=0.4, scale=0.1, size=30),
|
|
17
|
+
rng.normal(loc=0.5, scale=0.1, size=30),
|
|
18
|
+
rng.normal(loc=0.7, scale=0.1, size=30),
|
|
19
|
+
rng.normal(loc=0.7, scale=0.1, size=30),
|
|
20
|
+
rng.normal(loc=0.8, scale=0.1, size=30),
|
|
21
|
+
rng.normal(loc=0.9, scale=0.1, size=30),
|
|
22
|
+
]
|
|
23
|
+
)
|
|
24
|
+
return arr, labels
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_draw_cd_diagram_array(tmp_path):
|
|
28
|
+
samples, labels = _make_significant_samples()
|
|
29
|
+
out_file = tmp_path / "df.svg"
|
|
30
|
+
|
|
31
|
+
result = draw_cd_diagram(samples, labels=labels, out_file=str(out_file), title="TEST")
|
|
32
|
+
|
|
33
|
+
assert out_file.exists()
|
|
34
|
+
content = out_file.read_text(encoding="utf-8")
|
|
35
|
+
assert "<svg" in content
|
|
36
|
+
assert "CD=" in content
|
|
37
|
+
assert isinstance(result, ET.Element)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_draw_cd_diagram_in_memory():
|
|
41
|
+
samples, labels = _make_significant_samples()
|
|
42
|
+
|
|
43
|
+
result = draw_cd_diagram(samples, labels=labels)
|
|
44
|
+
|
|
45
|
+
assert isinstance(result, ET.Element)
|
|
46
|
+
svg_str = ET.tostring(result, encoding="unicode")
|
|
47
|
+
assert "<svg" in svg_str
|
|
48
|
+
assert "CD=" in svg_str
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_draw_cd_diagram_non_significant(tmp_path):
|
|
52
|
+
rng = np.random.default_rng(7)
|
|
53
|
+
samples = np.column_stack(
|
|
54
|
+
[
|
|
55
|
+
rng.normal(loc=0.5, scale=0.1, size=30),
|
|
56
|
+
rng.normal(loc=0.5, scale=0.1, size=30),
|
|
57
|
+
rng.normal(loc=0.5, scale=0.1, size=30),
|
|
58
|
+
]
|
|
59
|
+
)
|
|
60
|
+
out_file = tmp_path / "none.svg"
|
|
61
|
+
labels = ["model1", "model2", "model3"]
|
|
62
|
+
|
|
63
|
+
with pytest.warns(UserWarning, match="cannot be rejected"):
|
|
64
|
+
result = draw_cd_diagram(samples, labels=labels, out_file=str(out_file))
|
|
65
|
+
|
|
66
|
+
assert result is None
|
|
67
|
+
assert not out_file.exists()
|