plotnine 0.15.0a1__py3-none-any.whl → 0.15.0a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- plotnine/_mpl/layout_manager/_layout_items.py +85 -23
- plotnine/_mpl/layout_manager/_layout_tree.py +16 -6
- plotnine/_mpl/layout_manager/_spaces.py +5 -5
- plotnine/_mpl/patches.py +70 -34
- plotnine/_mpl/text.py +150 -63
- plotnine/_mpl/utils.py +1 -1
- plotnine/_utils/__init__.py +30 -2
- plotnine/doctools.py +1 -1
- plotnine/facets/strips.py +17 -28
- plotnine/geoms/annotation_logticks.py +7 -8
- plotnine/geoms/annotation_stripes.py +6 -6
- plotnine/geoms/geom.py +20 -8
- plotnine/geoms/geom_abline.py +3 -2
- plotnine/geoms/geom_blank.py +0 -3
- plotnine/geoms/geom_boxplot.py +4 -4
- plotnine/geoms/geom_crossbar.py +3 -3
- plotnine/geoms/geom_dotplot.py +1 -1
- plotnine/geoms/geom_errorbar.py +2 -2
- plotnine/geoms/geom_errorbarh.py +2 -2
- plotnine/geoms/geom_hline.py +3 -2
- plotnine/geoms/geom_linerange.py +2 -2
- plotnine/geoms/geom_map.py +3 -3
- plotnine/geoms/geom_path.py +10 -11
- plotnine/geoms/geom_point.py +4 -5
- plotnine/geoms/geom_pointrange.py +3 -5
- plotnine/geoms/geom_polygon.py +2 -3
- plotnine/geoms/geom_raster.py +4 -5
- plotnine/geoms/geom_rect.py +3 -4
- plotnine/geoms/geom_ribbon.py +7 -7
- plotnine/geoms/geom_rug.py +1 -1
- plotnine/geoms/geom_segment.py +2 -2
- plotnine/geoms/geom_smooth.py +3 -3
- plotnine/geoms/geom_step.py +2 -2
- plotnine/geoms/geom_text.py +2 -3
- plotnine/geoms/geom_violin.py +4 -5
- plotnine/geoms/geom_vline.py +3 -2
- plotnine/guides/guides.py +1 -1
- plotnine/helpers.py +49 -0
- plotnine/iapi.py +28 -5
- plotnine/layer.py +18 -12
- plotnine/mapping/_eval_environment.py +1 -1
- plotnine/scales/scale_color.py +46 -14
- plotnine/scales/scale_continuous.py +5 -4
- plotnine/scales/scale_datetime.py +28 -14
- plotnine/scales/scale_discrete.py +2 -2
- plotnine/scales/scale_identity.py +10 -2
- plotnine/scales/scale_xy.py +2 -2
- plotnine/stats/binning.py +4 -1
- plotnine/stats/smoothers.py +19 -19
- plotnine/stats/stat.py +15 -25
- plotnine/stats/stat_bin.py +2 -5
- plotnine/stats/stat_bin_2d.py +7 -9
- plotnine/stats/stat_bindot.py +6 -11
- plotnine/stats/stat_boxplot.py +5 -5
- plotnine/stats/stat_count.py +5 -9
- plotnine/stats/stat_density.py +6 -9
- plotnine/stats/stat_density_2d.py +12 -9
- plotnine/stats/stat_ecdf.py +6 -5
- plotnine/stats/stat_ellipse.py +5 -6
- plotnine/stats/stat_function.py +6 -8
- plotnine/stats/stat_hull.py +2 -3
- plotnine/stats/stat_identity.py +1 -2
- plotnine/stats/stat_pointdensity.py +4 -7
- plotnine/stats/stat_qq.py +45 -20
- plotnine/stats/stat_qq_line.py +15 -11
- plotnine/stats/stat_quantile.py +6 -7
- plotnine/stats/stat_sina.py +12 -14
- plotnine/stats/stat_smooth.py +7 -10
- plotnine/stats/stat_sum.py +1 -2
- plotnine/stats/stat_summary.py +6 -9
- plotnine/stats/stat_summary_bin.py +10 -13
- plotnine/stats/stat_unique.py +1 -2
- plotnine/stats/stat_ydensity.py +7 -10
- plotnine/themes/elements/__init__.py +2 -1
- plotnine/themes/elements/margin.py +64 -1
- plotnine/themes/theme_gray.py +5 -3
- plotnine/themes/theme_matplotlib.py +5 -4
- plotnine/themes/theme_seaborn.py +7 -4
- plotnine/themes/theme_void.py +11 -4
- plotnine/themes/themeable.py +2 -2
- plotnine/typing.py +2 -2
- {plotnine-0.15.0a1.dist-info → plotnine-0.15.0a3.dist-info}/METADATA +7 -4
- {plotnine-0.15.0a1.dist-info → plotnine-0.15.0a3.dist-info}/RECORD +86 -85
- {plotnine-0.15.0a1.dist-info → plotnine-0.15.0a3.dist-info}/WHEEL +1 -1
- {plotnine-0.15.0a1.dist-info → plotnine-0.15.0a3.dist-info}/licenses/LICENSE +0 -0
- {plotnine-0.15.0a1.dist-info → plotnine-0.15.0a3.dist-info}/top_level.txt +0 -0
plotnine/stats/stat_bin.py
CHANGED
|
@@ -100,7 +100,6 @@ class stat_bin(stat):
|
|
|
100
100
|
and params["binwidth"] is None
|
|
101
101
|
and params["bins"] is None
|
|
102
102
|
):
|
|
103
|
-
params = params.copy()
|
|
104
103
|
params["bins"] = freedman_diaconis_bins(data["x"])
|
|
105
104
|
msg = (
|
|
106
105
|
"'stat_bin()' using 'bins = {}'. "
|
|
@@ -108,10 +107,8 @@ class stat_bin(stat):
|
|
|
108
107
|
)
|
|
109
108
|
warn(msg.format(params["bins"]), PlotnineWarning)
|
|
110
109
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
@classmethod
|
|
114
|
-
def compute_group(cls, data, scales, **params):
|
|
110
|
+
def compute_group(self, data, scales):
|
|
111
|
+
params = self.params
|
|
115
112
|
if params["breaks"] is not None:
|
|
116
113
|
breaks = np.asarray(params["breaks"])
|
|
117
114
|
if hasattr(scales.x, "transform"):
|
plotnine/stats/stat_bin_2d.py
CHANGED
|
@@ -66,18 +66,16 @@ class stat_bin_2d(stat):
|
|
|
66
66
|
CREATES = {"xmin", "xmax", "ymin", "ymax", "count", "density"}
|
|
67
67
|
|
|
68
68
|
def setup_params(self, data):
|
|
69
|
-
params = self.params
|
|
69
|
+
params = self.params
|
|
70
70
|
params["bins"] = dual_param(params["bins"])
|
|
71
71
|
params["breaks"] = dual_param(params["breaks"])
|
|
72
72
|
params["binwidth"] = dual_param(params["binwidth"])
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
binwidth = params["binwidth"]
|
|
80
|
-
drop = params["drop"]
|
|
73
|
+
|
|
74
|
+
def compute_group(self, data, scales):
|
|
75
|
+
bins = self.params["bins"]
|
|
76
|
+
breaks = self.params["breaks"]
|
|
77
|
+
binwidth = self.params["binwidth"]
|
|
78
|
+
drop = self.params["drop"]
|
|
81
79
|
weight = data.get("weight")
|
|
82
80
|
|
|
83
81
|
if weight is None:
|
plotnine/stats/stat_bindot.py
CHANGED
|
@@ -113,7 +113,6 @@ class stat_bindot(stat):
|
|
|
113
113
|
and params["binwidth"] is None
|
|
114
114
|
and params["bins"] is None
|
|
115
115
|
):
|
|
116
|
-
params = params.copy()
|
|
117
116
|
params["bins"] = freedman_diaconis_bins(data["x"])
|
|
118
117
|
msg = (
|
|
119
118
|
"'stat_bin()' using 'bins = {}'. "
|
|
@@ -121,10 +120,8 @@ class stat_bindot(stat):
|
|
|
121
120
|
)
|
|
122
121
|
warn(msg.format(params["bins"]), PlotnineWarning)
|
|
123
122
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
@classmethod
|
|
127
|
-
def compute_panel(cls, data, scales, **params):
|
|
123
|
+
def compute_panel(self, data, scales):
|
|
124
|
+
params = self.params
|
|
128
125
|
if (
|
|
129
126
|
params["method"] == "dotdensity"
|
|
130
127
|
and params["binpositions"] == "all"
|
|
@@ -160,10 +157,10 @@ class stat_bindot(stat):
|
|
|
160
157
|
data["binwidth"] = newdata["binwidth"]
|
|
161
158
|
data["weight"] = newdata["weight"]
|
|
162
159
|
data["bincenter"] = newdata["bincenter"]
|
|
163
|
-
return super(
|
|
160
|
+
return super().compute_panel(data, scales)
|
|
164
161
|
|
|
165
|
-
|
|
166
|
-
|
|
162
|
+
def compute_group(self, data, scales):
|
|
163
|
+
params = self.params
|
|
167
164
|
# Check that weights are whole numbers
|
|
168
165
|
# (for dots, weights must be whole)
|
|
169
166
|
weight = data.get("weight")
|
|
@@ -281,9 +278,7 @@ def densitybin(
|
|
|
281
278
|
if all(pd.isna(x)):
|
|
282
279
|
return pd.DataFrame()
|
|
283
280
|
|
|
284
|
-
if weight is None
|
|
285
|
-
weight = np.ones(len(x))
|
|
286
|
-
weight = np.asarray(weight)
|
|
281
|
+
weight = np.ones(len(x)) if weight is None else np.array(list(weight))
|
|
287
282
|
weight[np.isnan(weight)] = 0
|
|
288
283
|
|
|
289
284
|
if rangee is None:
|
plotnine/stats/stat_boxplot.py
CHANGED
|
@@ -91,10 +91,8 @@ class stat_boxplot(stat):
|
|
|
91
91
|
if self.params["width"] is None:
|
|
92
92
|
x = data.get("x", 0)
|
|
93
93
|
self.params["width"] = resolution(x, False) * 0.75
|
|
94
|
-
return self.params
|
|
95
94
|
|
|
96
|
-
|
|
97
|
-
def compute_group(cls, data, scales, **params):
|
|
95
|
+
def compute_group(self, data, scales):
|
|
98
96
|
n = len(data)
|
|
99
97
|
y = data["y"].to_numpy()
|
|
100
98
|
if "weight" in data:
|
|
@@ -103,12 +101,14 @@ class stat_boxplot(stat):
|
|
|
103
101
|
else:
|
|
104
102
|
weights = None
|
|
105
103
|
total_weight = len(y)
|
|
106
|
-
res = weighted_boxplot_stats(
|
|
104
|
+
res = weighted_boxplot_stats(
|
|
105
|
+
y, weights=weights, whis=self.params["coef"]
|
|
106
|
+
)
|
|
107
107
|
|
|
108
108
|
if len(np.unique(data["x"])) > 1:
|
|
109
109
|
width = np.ptp(data["x"]) * 0.9
|
|
110
110
|
else:
|
|
111
|
-
width = params["width"]
|
|
111
|
+
width = self.params["width"]
|
|
112
112
|
|
|
113
113
|
if isinstance(data["x"].dtype, pd.CategoricalDtype):
|
|
114
114
|
x = data["x"].iloc[0]
|
plotnine/stats/stat_count.py
CHANGED
|
@@ -49,21 +49,17 @@ class stat_count(stat):
|
|
|
49
49
|
CREATES = {"count", "prop"}
|
|
50
50
|
|
|
51
51
|
def setup_params(self, data):
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
params["width"] = resolution(data["x"], False) * 0.9
|
|
52
|
+
if self.params["width"] is None:
|
|
53
|
+
self.params["width"] = resolution(data["x"], False) * 0.9
|
|
55
54
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
@classmethod
|
|
59
|
-
def compute_group(cls, data, scales, **params):
|
|
55
|
+
def compute_group(self, data, scales):
|
|
60
56
|
x = data["x"]
|
|
61
|
-
if ("y" in data) or ("y" in params):
|
|
57
|
+
if ("y" in data) or ("y" in self.params):
|
|
62
58
|
msg = "stat_count() must not be used with a y aesthetic"
|
|
63
59
|
raise PlotnineError(msg)
|
|
64
60
|
|
|
65
61
|
weight = data.get("weight", [1] * len(x))
|
|
66
|
-
width = params["width"]
|
|
62
|
+
width = self.params["width"]
|
|
67
63
|
xdata_long = pd.DataFrame({"x": x, "weight": weight})
|
|
68
64
|
# weighted frequency count
|
|
69
65
|
count = xdata_long.pivot_table("weight", index=["x"], aggfunc="sum")[
|
plotnine/stats/stat_density.py
CHANGED
|
@@ -102,9 +102,9 @@ class stat_density(stat):
|
|
|
102
102
|
# useful for stacked density plots
|
|
103
103
|
|
|
104
104
|
'scaled' # density estimate, scaled to maximum of 1
|
|
105
|
+
'n' # Number of observations at a position
|
|
105
106
|
```
|
|
106
107
|
|
|
107
|
-
'n' # Number of observations at a position
|
|
108
108
|
|
|
109
109
|
"""
|
|
110
110
|
REQUIRED_AES = {"x"}
|
|
@@ -126,7 +126,7 @@ class stat_density(stat):
|
|
|
126
126
|
CREATES = {"density", "count", "scaled", "n"}
|
|
127
127
|
|
|
128
128
|
def setup_params(self, data):
|
|
129
|
-
params = self.params
|
|
129
|
+
params = self.params
|
|
130
130
|
lookup = {
|
|
131
131
|
"biweight": "biw",
|
|
132
132
|
"cosine": "cos",
|
|
@@ -148,21 +148,18 @@ class stat_density(stat):
|
|
|
148
148
|
)
|
|
149
149
|
raise PlotnineError(msg)
|
|
150
150
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
@classmethod
|
|
154
|
-
def compute_group(cls, data, scales, **params):
|
|
151
|
+
def compute_group(self, data, scales):
|
|
155
152
|
weight = data.get("weight")
|
|
156
153
|
|
|
157
|
-
if params["trim"]:
|
|
154
|
+
if self.params["trim"]:
|
|
158
155
|
range_x = data["x"].min(), data["x"].max()
|
|
159
156
|
else:
|
|
160
157
|
range_x = scales.x.dimension()
|
|
161
158
|
|
|
162
|
-
return compute_density(data["x"], weight, range_x,
|
|
159
|
+
return compute_density(data["x"], weight, range_x, self.params)
|
|
163
160
|
|
|
164
161
|
|
|
165
|
-
def compute_density(x, weight, range,
|
|
162
|
+
def compute_density(x, weight, range, params):
|
|
166
163
|
"""
|
|
167
164
|
Compute density
|
|
168
165
|
"""
|
|
@@ -1,12 +1,17 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
1
5
|
import numpy as np
|
|
2
6
|
import pandas as pd
|
|
3
7
|
|
|
4
|
-
from ..doctools import document
|
|
5
8
|
from .density import get_var_type, kde
|
|
6
9
|
from .stat import stat
|
|
7
10
|
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from plotnine.typing import FloatArrayLike
|
|
13
|
+
|
|
8
14
|
|
|
9
|
-
@document
|
|
10
15
|
class stat_density_2d(stat):
|
|
11
16
|
"""
|
|
12
17
|
Compute 2D kernel density estimation
|
|
@@ -66,7 +71,7 @@ class stat_density_2d(stat):
|
|
|
66
71
|
CREATES = {"y"}
|
|
67
72
|
|
|
68
73
|
def setup_params(self, data):
|
|
69
|
-
params = self.params
|
|
74
|
+
params = self.params
|
|
70
75
|
if params["kde_params"] is None:
|
|
71
76
|
params["kde_params"] = {}
|
|
72
77
|
|
|
@@ -78,10 +83,8 @@ class stat_density_2d(stat):
|
|
|
78
83
|
y_type = get_var_type(data["y"])
|
|
79
84
|
kde_params["var_type"] = f"{x_type}{y_type}"
|
|
80
85
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
@classmethod
|
|
84
|
-
def compute_group(cls, data, scales, **params):
|
|
86
|
+
def compute_group(self, data, scales):
|
|
87
|
+
params = self.params
|
|
85
88
|
package = params["package"]
|
|
86
89
|
kde_params = params["kde_params"]
|
|
87
90
|
|
|
@@ -118,7 +121,7 @@ class stat_density_2d(stat):
|
|
|
118
121
|
return data
|
|
119
122
|
|
|
120
123
|
|
|
121
|
-
def contour_lines(X, Y, Z, levels):
|
|
124
|
+
def contour_lines(X, Y, Z, levels: int | FloatArrayLike):
|
|
122
125
|
"""
|
|
123
126
|
Calculate contour lines
|
|
124
127
|
"""
|
|
@@ -152,7 +155,7 @@ def contour_lines(X, Y, Z, levels):
|
|
|
152
155
|
level_values = []
|
|
153
156
|
start_pid = 1
|
|
154
157
|
for level in levels:
|
|
155
|
-
vertices, *_ = cgen.create_contour(level)
|
|
158
|
+
vertices, *_ = cgen.create_contour(level) # pyright: ignore[reportArgumentType]
|
|
156
159
|
for pid, piece in enumerate(vertices, start=start_pid):
|
|
157
160
|
n = len(piece) # pyright: ignore
|
|
158
161
|
segments.append(piece)
|
plotnine/stats/stat_ecdf.py
CHANGED
|
@@ -50,17 +50,18 @@ class stat_ecdf(stat):
|
|
|
50
50
|
DEFAULT_AES = {"y": after_stat("ecdf")}
|
|
51
51
|
CREATES = {"ecdf"}
|
|
52
52
|
|
|
53
|
-
|
|
54
|
-
def compute_group(cls, data, scales, **params):
|
|
53
|
+
def compute_group(self, data, scales):
|
|
55
54
|
from statsmodels.distributions.empirical_distribution import ECDF
|
|
56
55
|
|
|
56
|
+
n, pad = self.params["n"], self.params["pad"]
|
|
57
|
+
|
|
57
58
|
# If n is None, use raw values; otherwise interpolate
|
|
58
|
-
if
|
|
59
|
+
if n is None:
|
|
59
60
|
x = np.unique(data["x"])
|
|
60
61
|
else:
|
|
61
|
-
x = np.linspace(data["x"].min(), data["x"].max(),
|
|
62
|
+
x = np.linspace(data["x"].min(), data["x"].max(), n)
|
|
62
63
|
|
|
63
|
-
if
|
|
64
|
+
if pad:
|
|
64
65
|
x = np.hstack([-np.inf, x, np.inf])
|
|
65
66
|
|
|
66
67
|
ecdf = ECDF(data["x"].to_numpy())(x)
|
plotnine/stats/stat_ellipse.py
CHANGED
|
@@ -49,14 +49,13 @@ class stat_ellipse(stat):
|
|
|
49
49
|
"segments": 51,
|
|
50
50
|
}
|
|
51
51
|
|
|
52
|
-
|
|
53
|
-
def compute_group(cls, data, scales, **params):
|
|
52
|
+
def compute_group(self, data, scales):
|
|
54
53
|
import scipy.stats as stats
|
|
55
54
|
from scipy import linalg
|
|
56
55
|
|
|
57
|
-
level = params["level"]
|
|
58
|
-
segments = params["segments"]
|
|
59
|
-
type_ = params["type"]
|
|
56
|
+
level = self.params["level"]
|
|
57
|
+
segments = self.params["segments"]
|
|
58
|
+
type_ = self.params["type"]
|
|
60
59
|
|
|
61
60
|
dfn = 2
|
|
62
61
|
dfd = len(data) - 1
|
|
@@ -203,7 +202,7 @@ def cov_trob(
|
|
|
203
202
|
wt = wt[wt > 0]
|
|
204
203
|
n, _ = x.shape
|
|
205
204
|
|
|
206
|
-
wt = wt[:, np.newaxis]
|
|
205
|
+
wt = wt[:, np.newaxis] # pyright: ignore[reportCallIssue,reportArgumentType,reportOptionalSubscript]
|
|
207
206
|
|
|
208
207
|
# loc
|
|
209
208
|
use_loc = False
|
plotnine/stats/stat_function.py
CHANGED
|
@@ -82,14 +82,12 @@ class stat_function(stat):
|
|
|
82
82
|
"stat_function requires parameter 'fun' to be "
|
|
83
83
|
"a function or any other callable object"
|
|
84
84
|
)
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
args = params["args"]
|
|
92
|
-
xlim = params["xlim"]
|
|
85
|
+
|
|
86
|
+
def compute_group(self, data, scales):
|
|
87
|
+
old_fun: Callable[..., FloatArrayLike] = self.params["fun"]
|
|
88
|
+
n = self.params["n"]
|
|
89
|
+
args = self.params["args"]
|
|
90
|
+
xlim = self.params["xlim"]
|
|
93
91
|
range_x = xlim or scales.x.dimension((0, 0))
|
|
94
92
|
|
|
95
93
|
if isinstance(args, (list, tuple)):
|
plotnine/stats/stat_hull.py
CHANGED
|
@@ -47,12 +47,11 @@ class stat_hull(stat):
|
|
|
47
47
|
}
|
|
48
48
|
CREATES = {"area"}
|
|
49
49
|
|
|
50
|
-
|
|
51
|
-
def compute_group(cls, data, scales, **params):
|
|
50
|
+
def compute_group(self, data, scales):
|
|
52
51
|
from scipy.spatial import ConvexHull
|
|
53
52
|
|
|
54
53
|
hull = ConvexHull(
|
|
55
|
-
data[["x", "y"]], qhull_options=params["qhull_options"]
|
|
54
|
+
data[["x", "y"]], qhull_options=self.params["qhull_options"]
|
|
56
55
|
)
|
|
57
56
|
idx = np.hstack([hull.vertices, hull.vertices[0]])
|
|
58
57
|
|
plotnine/stats/stat_identity.py
CHANGED
|
@@ -51,7 +51,7 @@ class stat_pointdensity(stat):
|
|
|
51
51
|
CREATES = {"density"}
|
|
52
52
|
|
|
53
53
|
def setup_params(self, data):
|
|
54
|
-
params = self.params
|
|
54
|
+
params = self.params
|
|
55
55
|
if params["kde_params"] is None:
|
|
56
56
|
params["kde_params"] = {}
|
|
57
57
|
|
|
@@ -63,12 +63,9 @@ class stat_pointdensity(stat):
|
|
|
63
63
|
y_type = get_var_type(data["y"])
|
|
64
64
|
kde_params["var_type"] = f"{x_type}{y_type}"
|
|
65
65
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
def compute_group(cls, data, scales, **params):
|
|
70
|
-
package = params["package"]
|
|
71
|
-
kde_params = params["kde_params"]
|
|
66
|
+
def compute_group(self, data, scales):
|
|
67
|
+
package = self.params["package"]
|
|
68
|
+
kde_params = self.params["kde_params"]
|
|
72
69
|
|
|
73
70
|
var_data = np.array([data["x"].to_numpy(), data["y"].to_numpy()]).T
|
|
74
71
|
density = kde(var_data, var_data, package, **kde_params)
|
plotnine/stats/stat_qq.py
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
1
5
|
import numpy as np
|
|
2
6
|
import pandas as pd
|
|
3
7
|
|
|
@@ -6,6 +10,11 @@ from ..exceptions import PlotnineError
|
|
|
6
10
|
from ..mapping.evaluation import after_stat
|
|
7
11
|
from .stat import stat
|
|
8
12
|
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from typing import Any, Sequence
|
|
15
|
+
|
|
16
|
+
from plotnine.typing import FloatArray
|
|
17
|
+
|
|
9
18
|
|
|
10
19
|
# Note: distribution should be a name from scipy.stat.distribution
|
|
11
20
|
@document
|
|
@@ -65,25 +74,41 @@ class stat_qq(stat):
|
|
|
65
74
|
"alpha_beta": (3 / 8, 3 / 8),
|
|
66
75
|
}
|
|
67
76
|
|
|
68
|
-
|
|
69
|
-
def compute_group(cls, data, scales, **params):
|
|
70
|
-
from scipy.stats.mstats import plotting_positions
|
|
71
|
-
|
|
72
|
-
from .distributions import get_continuous_distribution
|
|
73
|
-
|
|
77
|
+
def compute_group(self, data, scales):
|
|
74
78
|
sample = data["sample"].sort_values().to_numpy()
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
"the number of sample values."
|
|
84
|
-
)
|
|
85
|
-
|
|
86
|
-
quantiles = np.asarray(quantiles)
|
|
87
|
-
cdist = get_continuous_distribution(params["distribution"])
|
|
88
|
-
theoretical = cdist.ppf(quantiles, **params["dparams"])
|
|
79
|
+
theoretical = theoretical_qq(
|
|
80
|
+
sample,
|
|
81
|
+
self.params["distribution"],
|
|
82
|
+
alpha=self.params["alpha_beta"][0],
|
|
83
|
+
beta=self.params["alpha_beta"][1],
|
|
84
|
+
quantiles=self.params["quantiles"],
|
|
85
|
+
distribution_params=self.params["dparams"],
|
|
86
|
+
)
|
|
89
87
|
return pd.DataFrame({"sample": sample, "theoretical": theoretical})
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def theoretical_qq(
|
|
91
|
+
x: FloatArray,
|
|
92
|
+
distribution: str,
|
|
93
|
+
alpha: float,
|
|
94
|
+
beta: float,
|
|
95
|
+
quantiles: Sequence[float] | None,
|
|
96
|
+
distribution_params: dict[str, Any],
|
|
97
|
+
) -> FloatArray:
|
|
98
|
+
"""
|
|
99
|
+
Caculate theoretical qq distribution
|
|
100
|
+
"""
|
|
101
|
+
from scipy.stats.mstats import plotting_positions
|
|
102
|
+
|
|
103
|
+
from .distributions import get_continuous_distribution
|
|
104
|
+
|
|
105
|
+
if quantiles is None:
|
|
106
|
+
quantiles = plotting_positions(x, alpha, beta)
|
|
107
|
+
elif len(quantiles) != len(x):
|
|
108
|
+
raise PlotnineError(
|
|
109
|
+
"The number of quantile values is not the same as "
|
|
110
|
+
"the number of sample values."
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
cdist = get_continuous_distribution(distribution)
|
|
114
|
+
return cdist.ppf(np.asarray(quantiles), **distribution_params)
|
plotnine/stats/stat_qq_line.py
CHANGED
|
@@ -4,7 +4,7 @@ import pandas as pd
|
|
|
4
4
|
from ..doctools import document
|
|
5
5
|
from ..exceptions import PlotnineError
|
|
6
6
|
from .stat import stat
|
|
7
|
-
from .stat_qq import
|
|
7
|
+
from .stat_qq import theoretical_qq
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
@document
|
|
@@ -64,31 +64,35 @@ class stat_qq_line(stat):
|
|
|
64
64
|
raise PlotnineError(
|
|
65
65
|
"Cannot fit line quantiles. 'line_p' must be of length 2"
|
|
66
66
|
)
|
|
67
|
-
return self.params
|
|
68
67
|
|
|
69
|
-
|
|
70
|
-
def compute_group(cls, data, scales, **params):
|
|
68
|
+
def compute_group(self, data, scales):
|
|
71
69
|
from scipy.stats.mstats import mquantiles
|
|
72
70
|
|
|
73
71
|
from .distributions import get_continuous_distribution
|
|
74
72
|
|
|
75
|
-
line_p = params["line_p"]
|
|
76
|
-
dparams = params["dparams"]
|
|
73
|
+
line_p = self.params["line_p"]
|
|
74
|
+
dparams = self.params["dparams"]
|
|
77
75
|
|
|
78
76
|
# Compute theoretical values
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
77
|
+
sample = data["sample"].sort_values().to_numpy()
|
|
78
|
+
theoretical = theoretical_qq(
|
|
79
|
+
sample,
|
|
80
|
+
self.params["distribution"],
|
|
81
|
+
alpha=self.params["alpha_beta"][0],
|
|
82
|
+
beta=self.params["alpha_beta"][1],
|
|
83
|
+
quantiles=self.params["quantiles"],
|
|
84
|
+
distribution_params=dparams,
|
|
85
|
+
)
|
|
82
86
|
|
|
83
87
|
# Compute slope & intercept of the line through the quantiles
|
|
84
|
-
cdist = get_continuous_distribution(params["distribution"])
|
|
88
|
+
cdist = get_continuous_distribution(self.params["distribution"])
|
|
85
89
|
x_coords = cdist.ppf(line_p, **dparams)
|
|
86
90
|
y_coords = mquantiles(sample, line_p)
|
|
87
91
|
slope = (np.diff(y_coords) / np.diff(x_coords))[0]
|
|
88
92
|
intercept = y_coords[0] - slope * x_coords[0]
|
|
89
93
|
|
|
90
94
|
# Get x,y points that describe the line
|
|
91
|
-
if params["fullrange"] and scales.x:
|
|
95
|
+
if self.params["fullrange"] and scales.x:
|
|
92
96
|
x = scales.x.dimension()
|
|
93
97
|
else:
|
|
94
98
|
x = theoretical.min(), theoretical.max()
|
plotnine/stats/stat_quantile.py
CHANGED
|
@@ -59,7 +59,7 @@ class stat_quantile(stat):
|
|
|
59
59
|
CREATES = {"quantile", "group"}
|
|
60
60
|
|
|
61
61
|
def setup_params(self, data):
|
|
62
|
-
params = self.params
|
|
62
|
+
params = self.params
|
|
63
63
|
if params["formula"] is None:
|
|
64
64
|
params["formula"] = "y ~ x"
|
|
65
65
|
warn("Formula not specified, using '{}'", PlotnineWarning)
|
|
@@ -68,15 +68,14 @@ class stat_quantile(stat):
|
|
|
68
68
|
except TypeError:
|
|
69
69
|
params["quantiles"] = (params["quantiles"],)
|
|
70
70
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
res = [quant_pred(q, data, **params) for q in params["quantiles"]]
|
|
71
|
+
def compute_group(self, data, scales):
|
|
72
|
+
res = [
|
|
73
|
+
quant_pred(q, data, self.params) for q in self.params["quantiles"]
|
|
74
|
+
]
|
|
76
75
|
return pd.concat(res, axis=0, ignore_index=True)
|
|
77
76
|
|
|
78
77
|
|
|
79
|
-
def quant_pred(q, data,
|
|
78
|
+
def quant_pred(q, data, params):
|
|
80
79
|
"""
|
|
81
80
|
Quantile precitions
|
|
82
81
|
"""
|
plotnine/stats/stat_sina.py
CHANGED
|
@@ -116,7 +116,7 @@ class stat_sina(stat):
|
|
|
116
116
|
return data
|
|
117
117
|
|
|
118
118
|
def setup_params(self, data):
|
|
119
|
-
params = self.params
|
|
119
|
+
params = self.params
|
|
120
120
|
random_state = params["random_state"]
|
|
121
121
|
|
|
122
122
|
if params["maxwidth"] is None:
|
|
@@ -137,10 +137,9 @@ class stat_sina(stat):
|
|
|
137
137
|
params["clip"] = (-np.inf, np.inf)
|
|
138
138
|
params["bounds"] = (-np.inf, np.inf)
|
|
139
139
|
params["n"] = 512
|
|
140
|
-
return params
|
|
141
140
|
|
|
142
|
-
|
|
143
|
-
|
|
141
|
+
def compute_panel(self, data, scales):
|
|
142
|
+
params = self.params
|
|
144
143
|
maxwidth = params["maxwidth"]
|
|
145
144
|
random_state = params["random_state"]
|
|
146
145
|
fuzz = 1e-8
|
|
@@ -154,7 +153,7 @@ class stat_sina(stat):
|
|
|
154
153
|
else:
|
|
155
154
|
params["bins"] = breaks_from_bins(y_dim_fuzzed, params["bins"])
|
|
156
155
|
|
|
157
|
-
data = super(
|
|
156
|
+
data = super().compute_panel(data, scales)
|
|
158
157
|
|
|
159
158
|
if not len(data):
|
|
160
159
|
return data
|
|
@@ -198,11 +197,10 @@ class stat_sina(stat):
|
|
|
198
197
|
|
|
199
198
|
return data
|
|
200
199
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
bin_limit = params["bin_limit"]
|
|
200
|
+
def compute_group(self, data, scales):
|
|
201
|
+
maxwidth = self.params["maxwidth"]
|
|
202
|
+
bins = self.params["bins"]
|
|
203
|
+
bin_limit = self.params["bin_limit"]
|
|
206
204
|
weight = None
|
|
207
205
|
y = data["y"]
|
|
208
206
|
|
|
@@ -215,12 +213,12 @@ class stat_sina(stat):
|
|
|
215
213
|
elif len(np.unique(y)) < 2:
|
|
216
214
|
data["density"] = 1
|
|
217
215
|
data["scaled"] = 1
|
|
218
|
-
elif params["method"] == "density":
|
|
216
|
+
elif self.params["method"] == "density":
|
|
219
217
|
from scipy.interpolate import interp1d
|
|
220
218
|
|
|
221
219
|
# density kernel estimation
|
|
222
220
|
range_y = y.min(), y.max()
|
|
223
|
-
dens = compute_density(y, weight, range_y,
|
|
221
|
+
dens = compute_density(y, weight, range_y, self.params)
|
|
224
222
|
densf = interp1d(
|
|
225
223
|
dens["x"],
|
|
226
224
|
dens["density"],
|
|
@@ -253,9 +251,9 @@ class stat_sina(stat):
|
|
|
253
251
|
|
|
254
252
|
return data
|
|
255
253
|
|
|
256
|
-
def finish_layer(self, data
|
|
254
|
+
def finish_layer(self, data):
|
|
257
255
|
# Rescale x in case positions have been adjusted
|
|
258
|
-
style = params["style"]
|
|
256
|
+
style = self.params["style"]
|
|
259
257
|
x_mean = data["x"].to_numpy()
|
|
260
258
|
x_mod = (data["xmax"] - data["xmin"]) / data["width"]
|
|
261
259
|
data["x"] = data["x"] + data["x_diff"] * x_mod
|