lets-plot 4.6.2__cp310-cp310-macosx_11_0_arm64.whl → 4.7.0__cp310-cp310-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lets-plot might be problematic. Click here for more details.
- lets_plot/_global_settings.py +5 -0
- lets_plot/_kbridge.py +7 -0
- lets_plot/_type_utils.py +29 -6
- lets_plot/_version.py +1 -1
- lets_plot/bistro/im.py +2 -2
- lets_plot/bistro/waterfall.py +93 -12
- lets_plot/export/ggsave_.py +22 -14
- lets_plot/package_data/lets-plot.min.js +2 -1
- lets_plot/plot/annotation.py +75 -18
- lets_plot/plot/core.py +152 -33
- lets_plot/plot/geom.py +752 -93
- lets_plot/plot/geom_function_.py +1 -1
- lets_plot/plot/geom_imshow_.py +42 -51
- lets_plot/plot/pos.py +13 -44
- lets_plot/plot/scale_position.py +9 -3
- lets_plot/plot/series_meta.py +179 -105
- lets_plot/plot/stat.py +4 -4
- lets_plot/plot/subplots.py +4 -4
- lets_plot/plot/theme_.py +57 -54
- lets_plot/plot/util.py +31 -5
- {lets_plot-4.6.2.dist-info → lets_plot-4.7.0.dist-info}/METADATA +52 -24
- {lets_plot-4.6.2.dist-info → lets_plot-4.7.0.dist-info}/RECORD +30 -26
- {lets_plot-4.6.2.dist-info → lets_plot-4.7.0.dist-info}/WHEEL +1 -1
- lets_plot-4.7.0.dist-info/licenses/licenses/LICENSE.FreeType +166 -0
- lets_plot-4.7.0.dist-info/licenses/licenses/LICENSE.ImageMagick +106 -0
- lets_plot-4.7.0.dist-info/licenses/licenses/LICENSE.expat +21 -0
- lets_plot-4.7.0.dist-info/licenses/licenses/LICENSE.fontconfig +200 -0
- lets_plot_kotlin_bridge.cpython-310-darwin.so +0 -0
- {lets_plot-4.6.2.dist-info → lets_plot-4.7.0.dist-info}/licenses/LICENSE +0 -0
- {lets_plot-4.6.2.dist-info → lets_plot-4.7.0.dist-info}/top_level.txt +0 -0
lets_plot/plot/geom_function_.py
CHANGED
|
@@ -102,7 +102,7 @@ def geom_function(mapping=None, *, data=None, stat=None, geom=None, position=Non
|
|
|
102
102
|
The geometry to display the function, as a string.
|
|
103
103
|
position : str or `FeatureSpec`, default='identity'
|
|
104
104
|
Position adjustment.
|
|
105
|
-
Either a position adjustment name: 'dodge', '
|
|
105
|
+
Either a position adjustment name: 'dodge', 'jitter', 'nudge', 'jitterdodge', 'fill',
|
|
106
106
|
'stack' or 'identity', or the result of calling a position adjustment function (e.g., `position_dodge()` etc.).
|
|
107
107
|
show_legend : bool, default=True
|
|
108
108
|
False - do not show legend for this layer.
|
lets_plot/plot/geom_imshow_.py
CHANGED
|
@@ -234,29 +234,44 @@ def geom_imshow(image_data, cmap=None, *,
|
|
|
234
234
|
|
|
235
235
|
(image_data, greyscale_data_min, greyscale_data_max) = _normalize_2D(image_data, norm, vmin, vmax, min_lum)
|
|
236
236
|
height, width = image_data.shape
|
|
237
|
-
nchannels = 1
|
|
238
|
-
|
|
239
237
|
has_nan = numpy.isnan(image_data.max())
|
|
240
|
-
|
|
241
|
-
|
|
238
|
+
|
|
239
|
+
if cmap:
|
|
240
|
+
# colormap via palettable
|
|
241
|
+
if not palettable:
|
|
242
|
+
raise ValueError(
|
|
243
|
+
"Can't process `cmap`: please install 'Palettable' (https://pypi.org/project/palettable/) to your "
|
|
244
|
+
"Python environment. "
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
# prepare palette
|
|
248
|
+
palette = None
|
|
249
|
+
if not has_nan:
|
|
250
|
+
alpha_ch_val = 255 if alpha is None else 255 * alpha
|
|
251
|
+
cmap_256 = palettable.get_map(cmap + "_256")
|
|
252
|
+
palette = [_hex2rgb_arr_uint8(c, alpha_ch_val) for c in cmap_256.hex_colors]
|
|
253
|
+
else:
|
|
254
|
+
alpha_ch_val = 255 if alpha is None else 255 * alpha
|
|
255
|
+
cmap_255 = palettable.get_map(cmap + "_255")
|
|
256
|
+
# transparent color at index 0
|
|
257
|
+
palette = [numpy.array([0, 0, 0, 0], dtype=numpy.uint8)] \
|
|
258
|
+
+ [_hex2rgb_arr_uint8(c, alpha_ch_val) for c in cmap_255.hex_colors]
|
|
259
|
+
|
|
260
|
+
# replace indexes with palette colors
|
|
261
|
+
if has_nan:
|
|
262
|
+
# replace all NaN-s with 0 (index 0 for transparent color)
|
|
263
|
+
numpy.nan_to_num(image_data, copy=False, nan=0)
|
|
264
|
+
image_data = numpy.take(palette, numpy.round(image_data).astype(numpy.int32), axis=0)
|
|
265
|
+
else:
|
|
266
|
+
# Greyscale
|
|
242
267
|
alpha_ch_scaler = 1 if alpha is None else alpha
|
|
243
268
|
is_nan = numpy.isnan(image_data)
|
|
244
269
|
im_shape = numpy.shape(image_data)
|
|
245
270
|
alpha_ch = numpy.zeros(im_shape, dtype=image_data.dtype)
|
|
246
271
|
alpha_ch[is_nan == False] = 255 * alpha_ch_scaler
|
|
247
272
|
image_data[is_nan] = 0
|
|
248
|
-
image_data = numpy.
|
|
249
|
-
|
|
250
|
-
elif has_nan and cmap:
|
|
251
|
-
# replace all NaN-s with 0 (index 0 for transparent color)
|
|
252
|
-
numpy.nan_to_num(image_data, copy=False, nan=0)
|
|
253
|
-
elif not cmap and alpha is not None:
|
|
254
|
-
# add alpha-channel (LA)
|
|
255
|
-
im_shape = numpy.shape(image_data)
|
|
256
|
-
alpha_ch = numpy.full(im_shape, 255 * alpha, dtype=image_data.dtype)
|
|
257
|
-
image_data = numpy.dstack((image_data, alpha_ch))
|
|
258
|
-
nchannels = 2
|
|
259
|
-
|
|
273
|
+
image_data = numpy.repeat(image_data[:, :, numpy.newaxis], 3, axis=2) # convert to RGB
|
|
274
|
+
image_data = numpy.dstack((image_data, alpha_ch)) # convert to RGBA
|
|
260
275
|
else:
|
|
261
276
|
# Color RGB/RGBA image
|
|
262
277
|
# Make a copy:
|
|
@@ -268,15 +283,14 @@ def geom_imshow(image_data, cmap=None, *,
|
|
|
268
283
|
|
|
269
284
|
height, width, nchannels = image_data.shape
|
|
270
285
|
|
|
271
|
-
if
|
|
272
|
-
if
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
image_data[:, :, 3] *= alpha
|
|
286
|
+
if nchannels == 3:
|
|
287
|
+
alpha_ch_scaler = 1 if alpha is None else alpha
|
|
288
|
+
# RGB image: add alpha channel (RGBA)
|
|
289
|
+
alpha_ch = numpy.full((height, width, 1), 255 * alpha_ch_scaler, dtype=image_data.dtype)
|
|
290
|
+
image_data = numpy.dstack((image_data, alpha_ch))
|
|
291
|
+
elif nchannels == 4 and alpha is not None:
|
|
292
|
+
# RGBA image: apply alpha scaling
|
|
293
|
+
image_data[:, :, 3] *= alpha
|
|
280
294
|
|
|
281
295
|
# Make sure all values are ints in range 0-255.
|
|
282
296
|
image_data.clip(0, 255, out=image_data)
|
|
@@ -312,39 +326,16 @@ def geom_imshow(image_data, cmap=None, *,
|
|
|
312
326
|
image_data = image_data.astype(numpy.int8)
|
|
313
327
|
|
|
314
328
|
# Reshape to 2d-array:
|
|
315
|
-
|
|
316
|
-
# or from [[[L, A], [L, A]], ...] to [[L, A, L, A],..] for greyscale–alpha (LA)
|
|
317
|
-
# or pypng will fail
|
|
318
|
-
image_2d = image_data.reshape(-1, width * nchannels)
|
|
329
|
+
image_2d = image_data.reshape(-1, width * 4) # always 4 channels (RGBA)
|
|
319
330
|
|
|
320
331
|
# PNG writer
|
|
321
|
-
palette = None
|
|
322
|
-
if cmap and greyscale:
|
|
323
|
-
# colormap via palettable
|
|
324
|
-
if not palettable:
|
|
325
|
-
raise ValueError(
|
|
326
|
-
"Can't process `cmap`: please install 'Palettable' (https://pypi.org/project/palettable/) to your "
|
|
327
|
-
"Python environment. "
|
|
328
|
-
)
|
|
329
|
-
if not has_nan:
|
|
330
|
-
alpha_ch_val = None if alpha is None else 255 * alpha
|
|
331
|
-
cmap_256 = palettable.get_map(cmap + "_256")
|
|
332
|
-
palette = [_hex2rgb_arr_uint8(c, alpha_ch_val) for c in cmap_256.hex_colors]
|
|
333
|
-
else:
|
|
334
|
-
alpha_ch_val = 255 if alpha is None else 255 * alpha
|
|
335
|
-
cmap_255 = palettable.get_map(cmap + "_255")
|
|
336
|
-
# transparent color at index 0
|
|
337
|
-
palette = [numpy.array([0, 0, 0, 0], dtype=numpy.uint8)] + [_hex2rgb_arr_uint8(c, alpha_ch_val) for c in
|
|
338
|
-
cmap_255.hex_colors]
|
|
339
|
-
|
|
340
332
|
png_bytes = io.BytesIO()
|
|
341
333
|
png.Writer(
|
|
342
334
|
width=width,
|
|
343
335
|
height=height,
|
|
344
|
-
greyscale=
|
|
345
|
-
alpha=
|
|
336
|
+
greyscale=False,
|
|
337
|
+
alpha=True,
|
|
346
338
|
bitdepth=8,
|
|
347
|
-
palette=palette,
|
|
348
339
|
compression=compression
|
|
349
340
|
).write(png_bytes, image_2d)
|
|
350
341
|
|
lets_plot/plot/pos.py
CHANGED
|
@@ -22,6 +22,7 @@ def position_dodge(width=None):
|
|
|
22
22
|
This is useful when you want to align narrow geoms with wider geoms.
|
|
23
23
|
The value of width is relative and typically ranges between 0 and 1.
|
|
24
24
|
Values that are greater than 1 lead to overlapping of the objects.
|
|
25
|
+
The default value is taken from the width aesthetics of the data point.
|
|
25
26
|
|
|
26
27
|
Returns
|
|
27
28
|
-------
|
|
@@ -55,44 +56,11 @@ def position_dodge(width=None):
|
|
|
55
56
|
|
|
56
57
|
def position_dodgev(height=None):
|
|
57
58
|
"""
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
Parameters
|
|
61
|
-
----------
|
|
62
|
-
height : float
|
|
63
|
-
Dodging height, when different to the height of the individual elements.
|
|
64
|
-
This is useful when you want to align narrow geoms with taller geoms.
|
|
65
|
-
The value of height is relative and typically ranges between 0 and 1.
|
|
66
|
-
Values that are greater than 1 lead to overlapping of the objects.
|
|
67
|
-
|
|
68
|
-
Returns
|
|
69
|
-
-------
|
|
70
|
-
`FeatureSpec`
|
|
71
|
-
Geom object position specification.
|
|
72
|
-
|
|
73
|
-
Notes
|
|
74
|
-
-----
|
|
75
|
-
Adjust position by dodging overlaps to the side.
|
|
76
|
-
|
|
77
|
-
Examples
|
|
78
|
-
--------
|
|
79
|
-
.. jupyter-execute::
|
|
80
|
-
:linenos:
|
|
81
|
-
:emphasize-lines: 11
|
|
82
|
-
|
|
83
|
-
from lets_plot import *
|
|
84
|
-
LetsPlot.setup_html()
|
|
85
|
-
data = {
|
|
86
|
-
'xmin': [0.2, 4.6, 1.6, 3.5],
|
|
87
|
-
'xmax': [1.5, 5.3, 3.0, 4.4],
|
|
88
|
-
'y': ['a', 'a', 'b', 'b'],
|
|
89
|
-
'c': ['gr1', 'gr2', 'gr1', 'gr2']
|
|
90
|
-
}
|
|
91
|
-
ggplot(data, aes(y='y', color='c')) + \\
|
|
92
|
-
geom_errorbar(aes(xmin='xmin', xmax='xmax'), height=0.1, size=2, \\
|
|
93
|
-
position=position_dodgev(height=0.2))
|
|
59
|
+
Function `position_dodgev()` is deprecated and will be removed in future releases.
|
|
94
60
|
|
|
95
61
|
"""
|
|
62
|
+
print("WARN: The function position_dodgev() is deprecated and will be removed in future releases.")
|
|
63
|
+
|
|
96
64
|
return _pos('dodgev', height=height)
|
|
97
65
|
|
|
98
66
|
|
|
@@ -102,11 +70,11 @@ def position_jitter(width=None, height=None, seed=None):
|
|
|
102
70
|
|
|
103
71
|
Parameters
|
|
104
72
|
----------
|
|
105
|
-
width : float
|
|
73
|
+
width : float, default=.4
|
|
106
74
|
Jittering width.
|
|
107
75
|
The value of width is relative and typically ranges between 0 and 0.5.
|
|
108
76
|
Values that are greater than 0.5 lead to overlapping of the points.
|
|
109
|
-
height : float
|
|
77
|
+
height : float, default=.4
|
|
110
78
|
Jittering height.
|
|
111
79
|
The value of height is relative and typically ranges between 0 and 0.5.
|
|
112
80
|
Values that are greater than 0.5 lead to overlapping of the points.
|
|
@@ -152,9 +120,9 @@ def position_nudge(x=None, y=None, unit=None):
|
|
|
152
120
|
|
|
153
121
|
Parameters
|
|
154
122
|
----------
|
|
155
|
-
x : float
|
|
123
|
+
x : float, default=0.0
|
|
156
124
|
Nudging width.
|
|
157
|
-
y : float
|
|
125
|
+
y : float, default=0.0
|
|
158
126
|
Nudging height.
|
|
159
127
|
unit : {'identity', 'size', 'px'}, default='identity'
|
|
160
128
|
Units for x and y nudging.
|
|
@@ -206,11 +174,12 @@ def position_jitterdodge(dodge_width=None, jitter_width=None, jitter_height=None
|
|
|
206
174
|
Bin width.
|
|
207
175
|
The value of `dodge_width` is relative and typically ranges between 0 and 1.
|
|
208
176
|
Values that are greater than 1 lead to overlapping of the boxes.
|
|
209
|
-
|
|
177
|
+
The default value is taken from the width aesthetics of the data point.
|
|
178
|
+
jitter_width : float, default=.4
|
|
210
179
|
Jittering width.
|
|
211
180
|
The value of `jitter_width` is relative and typically ranges between 0 and 0.5.
|
|
212
181
|
Values that are greater than 0.5 lead to overlapping of the points.
|
|
213
|
-
jitter_height : float
|
|
182
|
+
jitter_height : float, default=.4
|
|
214
183
|
Jittering height.
|
|
215
184
|
The value of `jitter_height` is relative and typically ranges between 0 and 0.5.
|
|
216
185
|
Values that are greater than 0.5 lead to overlapping of the points.
|
|
@@ -259,7 +228,7 @@ def position_stack(vjust=None, mode=None):
|
|
|
259
228
|
|
|
260
229
|
Parameters
|
|
261
230
|
----------
|
|
262
|
-
vjust : float
|
|
231
|
+
vjust : float, default=1.0
|
|
263
232
|
Vertical adjustment for geoms that have a position (like points or lines),
|
|
264
233
|
not a dimension (like bars or areas).
|
|
265
234
|
Set to 0 to align with the bottom, 0.5 for the middle, and 1 for the top.
|
|
@@ -305,7 +274,7 @@ def position_fill(vjust=None, mode=None):
|
|
|
305
274
|
|
|
306
275
|
Parameters
|
|
307
276
|
----------
|
|
308
|
-
vjust : float
|
|
277
|
+
vjust : float, default=1.0
|
|
309
278
|
Vertical adjustment for geoms that have a position (like points or lines),
|
|
310
279
|
not a dimension (like bars or areas).
|
|
311
280
|
Set to 0 to align with the bottom, 0.5 for the middle, and 1 for the top.
|
lets_plot/plot/scale_position.py
CHANGED
|
@@ -722,7 +722,7 @@ def scale_x_discrete(name=None, *,
|
|
|
722
722
|
LetsPlot.setup_html()
|
|
723
723
|
np.random.seed(43)
|
|
724
724
|
scores = {'rating': np.random.randint(3, 6, size=10)}
|
|
725
|
-
ggplot(scores, aes(x='rating')) + geom_bar() + \\
|
|
725
|
+
ggplot(scores, aes(x=as_discrete('rating'))) + geom_bar() + \\
|
|
726
726
|
scale_x_discrete(name='rating', format='.1f')
|
|
727
727
|
|
|
728
728
|
"""
|
|
@@ -1012,7 +1012,10 @@ def scale_x_datetime(name=None, *,
|
|
|
1012
1012
|
position=None
|
|
1013
1013
|
):
|
|
1014
1014
|
"""
|
|
1015
|
-
Position scale x
|
|
1015
|
+
Position scale for the x-axis with date/time data.
|
|
1016
|
+
The input is expected to be either a series of integers representing milliseconds since the Unix epoch, or Python datetime objects.
|
|
1017
|
+
Assumes UTC timezone if no timezone information is present in the data (naive datetime).
|
|
1018
|
+
For timezone-aware datetime objects, the timezone information from the data is preserved.
|
|
1016
1019
|
|
|
1017
1020
|
Parameters
|
|
1018
1021
|
----------
|
|
@@ -1099,7 +1102,10 @@ def scale_y_datetime(name=None, *,
|
|
|
1099
1102
|
position=None
|
|
1100
1103
|
):
|
|
1101
1104
|
"""
|
|
1102
|
-
Position scale y
|
|
1105
|
+
Position scale for the y-axis with date/time data.
|
|
1106
|
+
The input is expected to be either a series of integers representing milliseconds since the Unix epoch, or Python datetime objects.
|
|
1107
|
+
Assumes UTC timezone if no timezone information is present in the data (naive datetime).
|
|
1108
|
+
For timezone-aware datetime objects, the timezone information from the data is preserved.
|
|
1103
1109
|
|
|
1104
1110
|
Parameters
|
|
1105
1111
|
----------
|
lets_plot/plot/series_meta.py
CHANGED
|
@@ -1,129 +1,203 @@
|
|
|
1
1
|
# Copyright (c) 2024. JetBrains s.r.o.
|
|
2
2
|
# Use of this source code is governed by the MIT license that can be found in the LICENSE file.
|
|
3
|
-
from datetime import datetime
|
|
4
|
-
from typing import Union, Dict, Iterable
|
|
3
|
+
from datetime import datetime, date, time
|
|
4
|
+
from typing import Union, Dict, Iterable, Optional
|
|
5
5
|
|
|
6
6
|
from lets_plot._type_utils import is_polars_dataframe
|
|
7
7
|
from lets_plot.plot.util import is_pandas_data_frame
|
|
8
8
|
|
|
9
|
+
try:
|
|
10
|
+
import numpy
|
|
11
|
+
except ImportError:
|
|
12
|
+
numpy = None
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
import pandas
|
|
16
|
+
except ImportError:
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
import polars as pl
|
|
21
|
+
from polars.datatypes.group import INTEGER_DTYPES as PL_INTEGER_DTYPES
|
|
22
|
+
from polars.datatypes.group import FLOAT_DTYPES as PL_FLOAT_DTYPES
|
|
23
|
+
except ImportError:
|
|
24
|
+
pass
|
|
25
|
+
|
|
9
26
|
TYPE_INTEGER = 'int'
|
|
10
27
|
TYPE_FLOATING = 'float'
|
|
11
28
|
TYPE_STRING = 'str'
|
|
12
29
|
TYPE_BOOLEAN = 'bool'
|
|
13
30
|
TYPE_DATE_TIME = 'datetime'
|
|
31
|
+
TYPE_DATE = 'date' # Local date (no time zone)
|
|
32
|
+
TYPE_TIME = 'time' # Local time (we ignore time zone even if it is present)
|
|
14
33
|
TYPE_UNKNOWN = 'unknown'
|
|
15
34
|
|
|
16
35
|
|
|
17
|
-
def
|
|
36
|
+
def _infer_type(data: Union[Dict, 'pandas.DataFrame', 'polars.DataFrame']) -> Dict[str, str]:
|
|
18
37
|
type_info = {}
|
|
19
38
|
|
|
20
39
|
if is_pandas_data_frame(data):
|
|
21
|
-
import pandas as pd
|
|
22
|
-
import numpy as np # np is a dependency of pandas, we can import it without checking
|
|
23
|
-
|
|
24
40
|
for var_name, var_content in data.items():
|
|
25
|
-
|
|
26
|
-
type_info[var_name] = TYPE_UNKNOWN
|
|
27
|
-
continue
|
|
28
|
-
|
|
29
|
-
inferred_type = pd.api.types.infer_dtype(var_content.values, skipna=True)
|
|
30
|
-
if inferred_type == "categorical":
|
|
31
|
-
dtype = var_content.cat.categories.dtype
|
|
32
|
-
|
|
33
|
-
if np.issubdtype(dtype, np.integer):
|
|
34
|
-
type_info[var_name] = TYPE_INTEGER
|
|
35
|
-
elif np.issubdtype(dtype, np.floating):
|
|
36
|
-
type_info[var_name] = TYPE_FLOATING
|
|
37
|
-
elif np.issubdtype(dtype, np.object_):
|
|
38
|
-
# Check if all elements are strings
|
|
39
|
-
if all(isinstance(x, str) for x in var_content.cat.categories):
|
|
40
|
-
type_info[var_name] = TYPE_STRING
|
|
41
|
-
else:
|
|
42
|
-
type_info[var_name] = TYPE_UNKNOWN
|
|
43
|
-
else:
|
|
44
|
-
type_info[var_name] = TYPE_UNKNOWN
|
|
45
|
-
else:
|
|
46
|
-
# see https://pandas.pydata.org/docs/reference/api/pandas.api.types.infer_dtype.html
|
|
47
|
-
if inferred_type == 'string':
|
|
48
|
-
type_info[var_name] = TYPE_STRING
|
|
49
|
-
elif inferred_type == 'floating':
|
|
50
|
-
type_info[var_name] = TYPE_FLOATING
|
|
51
|
-
elif inferred_type == 'integer':
|
|
52
|
-
type_info[var_name] = TYPE_INTEGER
|
|
53
|
-
elif inferred_type == 'boolean':
|
|
54
|
-
type_info[var_name] = TYPE_BOOLEAN
|
|
55
|
-
elif inferred_type == 'datetime64' or inferred_type == 'datetime':
|
|
56
|
-
type_info[var_name] = TYPE_DATE_TIME
|
|
57
|
-
elif inferred_type == "date":
|
|
58
|
-
type_info[var_name] = TYPE_DATE_TIME
|
|
59
|
-
elif inferred_type == 'empty': # for columns with all None values
|
|
60
|
-
type_info[var_name] = TYPE_UNKNOWN
|
|
61
|
-
else:
|
|
62
|
-
type_info[var_name] = 'unknown(pandas:' + inferred_type + ')'
|
|
41
|
+
type_info[var_name] = _infer_type_pandas_dataframe(var_name, var_content)
|
|
63
42
|
elif is_polars_dataframe(data):
|
|
64
|
-
import polars as pl
|
|
65
|
-
from polars.datatypes.group import INTEGER_DTYPES, FLOAT_DTYPES
|
|
66
43
|
for var_name, var_type in data.schema.items():
|
|
67
|
-
|
|
68
|
-
# https://docs.pola.rs/api/python/stable/reference/datatypes.html
|
|
69
|
-
if var_type in FLOAT_DTYPES:
|
|
70
|
-
type_info[var_name] = TYPE_FLOATING
|
|
71
|
-
elif var_type in INTEGER_DTYPES:
|
|
72
|
-
type_info[var_name] = TYPE_INTEGER
|
|
73
|
-
elif var_type == pl.datatypes.String:
|
|
74
|
-
type_info[var_name] = TYPE_STRING
|
|
75
|
-
elif var_type == pl.datatypes.Boolean:
|
|
76
|
-
type_info[var_name] = TYPE_BOOLEAN
|
|
77
|
-
elif var_type == pl.datatypes.Date or var_type == pl.datatypes.Datetime:
|
|
78
|
-
type_info[var_name] = TYPE_DATE_TIME
|
|
79
|
-
else:
|
|
80
|
-
type_info[var_name] = 'unknown(polars:' + str(var_type) + ')'
|
|
44
|
+
type_info[var_name] = _infer_type_polars_dataframe(var_name, var_type)
|
|
81
45
|
elif isinstance(data, dict):
|
|
82
46
|
for var_name, var_content in data.items():
|
|
83
|
-
|
|
84
|
-
if not any(True for _ in var_content): # empty
|
|
85
|
-
type_info[var_name] = TYPE_UNKNOWN
|
|
86
|
-
continue
|
|
87
|
-
|
|
88
|
-
type_set = set(type(val) for val in var_content)
|
|
89
|
-
if type(None) in type_set:
|
|
90
|
-
type_set.remove(type(None))
|
|
91
|
-
|
|
92
|
-
if len(type_set) == 0:
|
|
93
|
-
continue
|
|
94
|
-
|
|
95
|
-
if len(type_set) > 1:
|
|
96
|
-
if all(issubclass(type_obj, int) or issubclass(type_obj, float) for type_obj in type_set):
|
|
97
|
-
type_info[var_name] = TYPE_FLOATING
|
|
98
|
-
else:
|
|
99
|
-
type_info[var_name] = 'unknown(mixed types)'
|
|
100
|
-
continue
|
|
101
|
-
|
|
102
|
-
try:
|
|
103
|
-
import numpy
|
|
104
|
-
except ImportError:
|
|
105
|
-
numpy = None
|
|
106
|
-
|
|
107
|
-
type_obj = list(type_set)[0]
|
|
108
|
-
if type_obj == bool:
|
|
109
|
-
type_info[var_name] = TYPE_BOOLEAN
|
|
110
|
-
elif issubclass(type_obj, int):
|
|
111
|
-
type_info[var_name] = TYPE_INTEGER
|
|
112
|
-
elif issubclass(type_obj, float):
|
|
113
|
-
type_info[var_name] = TYPE_FLOATING
|
|
114
|
-
elif issubclass(type_obj, str):
|
|
115
|
-
type_info[var_name] = TYPE_STRING
|
|
116
|
-
elif issubclass(type_obj, datetime):
|
|
117
|
-
type_info[var_name] = TYPE_DATE_TIME
|
|
118
|
-
elif numpy and issubclass(type_obj, numpy.datetime64):
|
|
119
|
-
type_info[var_name] = TYPE_DATE_TIME
|
|
120
|
-
elif numpy and issubclass(type_obj, numpy.timedelta64):
|
|
121
|
-
type_info[var_name] = TYPE_DATE_TIME
|
|
122
|
-
elif numpy and issubclass(type_obj, numpy.integer):
|
|
123
|
-
type_info[var_name] = TYPE_INTEGER
|
|
124
|
-
elif numpy and issubclass(type_obj, numpy.floating):
|
|
125
|
-
type_info[var_name] = TYPE_FLOATING
|
|
126
|
-
else:
|
|
127
|
-
type_info[var_name] = 'unknown(python:' + str(type_obj) + ')'
|
|
47
|
+
type_info[var_name] = _infer_type_dict(var_name, var_content)
|
|
128
48
|
|
|
129
49
|
return type_info
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _infer_type_pandas_dataframe(var_name: str, var_content) -> str:
|
|
53
|
+
if var_content.empty:
|
|
54
|
+
return TYPE_UNKNOWN
|
|
55
|
+
elif var_content.isna().all():
|
|
56
|
+
return TYPE_UNKNOWN
|
|
57
|
+
|
|
58
|
+
lp_dtype = TYPE_UNKNOWN
|
|
59
|
+
time_zone = None
|
|
60
|
+
pandas_dtype = pandas.api.types.infer_dtype(var_content.values, skipna=True)
|
|
61
|
+
|
|
62
|
+
if pandas_dtype == "categorical":
|
|
63
|
+
dtype = var_content.cat.categories.dtype
|
|
64
|
+
|
|
65
|
+
if numpy.issubdtype(dtype, numpy.integer):
|
|
66
|
+
lp_dtype = TYPE_INTEGER
|
|
67
|
+
elif numpy.issubdtype(dtype, numpy.floating):
|
|
68
|
+
lp_dtype = TYPE_FLOATING
|
|
69
|
+
elif numpy.issubdtype(dtype, numpy.object_):
|
|
70
|
+
# Check if all elements are strings
|
|
71
|
+
if all(isinstance(x, str) for x in var_content.cat.categories):
|
|
72
|
+
lp_dtype = TYPE_STRING
|
|
73
|
+
else:
|
|
74
|
+
# see https://pandas.pydata.org/docs/reference/api/pandas.api.types.infer_dtype.html
|
|
75
|
+
if pandas_dtype == 'string':
|
|
76
|
+
lp_dtype = TYPE_STRING
|
|
77
|
+
elif pandas_dtype == 'floating':
|
|
78
|
+
lp_dtype = TYPE_FLOATING
|
|
79
|
+
elif pandas_dtype == 'integer':
|
|
80
|
+
lp_dtype = TYPE_INTEGER
|
|
81
|
+
elif pandas_dtype == 'boolean':
|
|
82
|
+
lp_dtype = TYPE_BOOLEAN
|
|
83
|
+
|
|
84
|
+
elif pandas_dtype == 'datetime64' or pandas_dtype == 'datetime':
|
|
85
|
+
lp_dtype = TYPE_DATE_TIME
|
|
86
|
+
elif pandas_dtype == "date":
|
|
87
|
+
lp_dtype = TYPE_DATE
|
|
88
|
+
elif pandas_dtype == "time":
|
|
89
|
+
lp_dtype = TYPE_TIME
|
|
90
|
+
|
|
91
|
+
elif pandas_dtype == 'empty': # for columns with all None values
|
|
92
|
+
lp_dtype = TYPE_UNKNOWN
|
|
93
|
+
else:
|
|
94
|
+
lp_dtype = 'unknown(pandas:' + pandas_dtype + ')'
|
|
95
|
+
|
|
96
|
+
return lp_dtype
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _infer_type_polars_dataframe(var_name: str, var_type) -> str:
|
|
100
|
+
lp_dtype = TYPE_UNKNOWN
|
|
101
|
+
|
|
102
|
+
# https://docs.pola.rs/api/python/stable/reference/datatypes.html
|
|
103
|
+
if var_type in PL_FLOAT_DTYPES:
|
|
104
|
+
lp_dtype = TYPE_FLOATING
|
|
105
|
+
elif var_type in PL_INTEGER_DTYPES:
|
|
106
|
+
lp_dtype = TYPE_INTEGER
|
|
107
|
+
elif var_type == pl.datatypes.String:
|
|
108
|
+
lp_dtype = TYPE_STRING
|
|
109
|
+
elif var_type == pl.datatypes.Boolean:
|
|
110
|
+
lp_dtype = TYPE_BOOLEAN
|
|
111
|
+
|
|
112
|
+
elif var_type == pl.datatypes.Datetime:
|
|
113
|
+
lp_dtype = TYPE_DATE_TIME
|
|
114
|
+
elif var_type == pl.datatypes.Date:
|
|
115
|
+
lp_dtype = TYPE_DATE
|
|
116
|
+
elif var_type == pl.datatypes.Time:
|
|
117
|
+
lp_dtype = TYPE_TIME
|
|
118
|
+
|
|
119
|
+
else:
|
|
120
|
+
lp_dtype = 'unknown(polars:' + str(var_type) + ')'
|
|
121
|
+
|
|
122
|
+
return lp_dtype
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _infer_type_dict(var_name: str, var_content) -> str:
|
|
126
|
+
if isinstance(var_content, Iterable):
|
|
127
|
+
if not any(True for _ in var_content): # empty
|
|
128
|
+
return TYPE_UNKNOWN
|
|
129
|
+
else:
|
|
130
|
+
return TYPE_UNKNOWN
|
|
131
|
+
|
|
132
|
+
type_set = set(type(val) for val in var_content)
|
|
133
|
+
if type(None) in type_set:
|
|
134
|
+
type_set.remove(type(None))
|
|
135
|
+
|
|
136
|
+
if len(type_set) == 0:
|
|
137
|
+
return TYPE_UNKNOWN
|
|
138
|
+
|
|
139
|
+
if len(type_set) > 1:
|
|
140
|
+
if all(issubclass(type_obj, int) or issubclass(type_obj, float) for type_obj in type_set):
|
|
141
|
+
return TYPE_FLOATING
|
|
142
|
+
else:
|
|
143
|
+
return 'unknown(mixed types)'
|
|
144
|
+
|
|
145
|
+
lp_dtype = TYPE_UNKNOWN
|
|
146
|
+
type_obj = list(type_set)[0]
|
|
147
|
+
if type_obj == bool:
|
|
148
|
+
lp_dtype = TYPE_BOOLEAN
|
|
149
|
+
elif issubclass(type_obj, int):
|
|
150
|
+
lp_dtype = TYPE_INTEGER
|
|
151
|
+
elif issubclass(type_obj, float):
|
|
152
|
+
lp_dtype = TYPE_FLOATING
|
|
153
|
+
elif issubclass(type_obj, str):
|
|
154
|
+
lp_dtype = TYPE_STRING
|
|
155
|
+
|
|
156
|
+
elif issubclass(type_obj, datetime):
|
|
157
|
+
lp_dtype = TYPE_DATE_TIME
|
|
158
|
+
elif issubclass(type_obj, date) and not issubclass(type_obj, datetime):
|
|
159
|
+
lp_dtype = TYPE_DATE
|
|
160
|
+
elif issubclass(type_obj, time):
|
|
161
|
+
lp_dtype = TYPE_TIME
|
|
162
|
+
|
|
163
|
+
elif numpy and issubclass(type_obj, numpy.datetime64):
|
|
164
|
+
lp_dtype = TYPE_DATE_TIME
|
|
165
|
+
elif numpy and issubclass(type_obj, numpy.timedelta64):
|
|
166
|
+
# ToDo: time delta?
|
|
167
|
+
# lp_dtype = TYPE_DATE_TIME
|
|
168
|
+
lp_dtype = 'unknown(python:' + str(type_obj) + ')'
|
|
169
|
+
|
|
170
|
+
elif numpy and issubclass(type_obj, numpy.integer):
|
|
171
|
+
lp_dtype = TYPE_INTEGER
|
|
172
|
+
elif numpy and issubclass(type_obj, numpy.floating):
|
|
173
|
+
lp_dtype = TYPE_FLOATING
|
|
174
|
+
else:
|
|
175
|
+
lp_dtype = 'unknown(python:' + str(type_obj) + ')'
|
|
176
|
+
|
|
177
|
+
return lp_dtype
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _detect_time_zone(var_name: str, data: Union[Dict, 'pandas.DataFrame', 'polars.DataFrame']) -> Optional[str]:
|
|
181
|
+
if is_pandas_data_frame(data):
|
|
182
|
+
if var_name in data:
|
|
183
|
+
var_content = data[var_name]
|
|
184
|
+
if hasattr(var_content, 'dt') and hasattr(var_content.dt, 'tz') and var_content.dt.tz is not None:
|
|
185
|
+
return str(var_content.dt.tz)
|
|
186
|
+
elif is_polars_dataframe(data):
|
|
187
|
+
if var_name in data.columns:
|
|
188
|
+
col_dtype = data[var_name].dtype
|
|
189
|
+
if hasattr(col_dtype, 'time_zone'):
|
|
190
|
+
if col_dtype.time_zone is not None:
|
|
191
|
+
return str(col_dtype.time_zone)
|
|
192
|
+
elif isinstance(data, dict):
|
|
193
|
+
if var_name in data:
|
|
194
|
+
var_content = data[var_name]
|
|
195
|
+
if isinstance(var_content, Iterable):
|
|
196
|
+
for val in var_content:
|
|
197
|
+
if isinstance(val, datetime) and val.tzinfo is not None:
|
|
198
|
+
return str(val.tzinfo)
|
|
199
|
+
|
|
200
|
+
# NumPy datetime64 objects don't store timezone information,
|
|
201
|
+
# so we can't extract it from them.
|
|
202
|
+
|
|
203
|
+
return None
|
lets_plot/plot/stat.py
CHANGED
|
@@ -32,7 +32,7 @@ def stat_summary(mapping=None, *, data=None, geom=None,
|
|
|
32
32
|
The geometry to display the summary stat for this layer, as a string.
|
|
33
33
|
position : str or `FeatureSpec`, default='identity'
|
|
34
34
|
Position adjustment.
|
|
35
|
-
Either a position adjustment name: 'dodge', '
|
|
35
|
+
Either a position adjustment name: 'dodge', 'jitter', 'nudge', 'jitterdodge', 'fill',
|
|
36
36
|
'stack' or 'identity', or the result of calling a position adjustment function (e.g., `position_dodge()` etc.).
|
|
37
37
|
show_legend : bool, default=True
|
|
38
38
|
False - do not show legend for this layer.
|
|
@@ -184,7 +184,7 @@ def stat_summary_bin(mapping=None, *, data=None, geom=None,
|
|
|
184
184
|
The geometry to display the summary stat for this layer, as a string.
|
|
185
185
|
position : str or `FeatureSpec`, default='identity'
|
|
186
186
|
Position adjustment.
|
|
187
|
-
Either a position adjustment name: 'dodge', '
|
|
187
|
+
Either a position adjustment name: 'dodge', 'jitter', 'nudge', 'jitterdodge', 'fill',
|
|
188
188
|
'stack' or 'identity', or the result of calling a position adjustment function (e.g., `position_dodge()` etc.).
|
|
189
189
|
show_legend : bool, default=True
|
|
190
190
|
False - do not show legend for this layer.
|
|
@@ -344,7 +344,7 @@ def stat_ecdf(mapping=None, *, data=None, geom=None,
|
|
|
344
344
|
The geometry to display the ecdf stat for this layer, as a string.
|
|
345
345
|
position : str or `FeatureSpec`, default='identity'
|
|
346
346
|
Position adjustment.
|
|
347
|
-
Either a position adjustment name: 'dodge', '
|
|
347
|
+
Either a position adjustment name: 'dodge', 'jitter', 'nudge', 'jitterdodge', 'fill',
|
|
348
348
|
'stack' or 'identity', or the result of calling a position adjustment function (e.g., `position_dodge()` etc.).
|
|
349
349
|
show_legend : bool, default=True
|
|
350
350
|
False - do not show legend for this layer.
|
|
@@ -485,7 +485,7 @@ def stat_sum(mapping=None, *, data=None, geom=None, position=None, show_legend=N
|
|
|
485
485
|
The geometry to display the sum stat for this layer, as a string.
|
|
486
486
|
position : str or `FeatureSpec`, default='identity'
|
|
487
487
|
Position adjustment.
|
|
488
|
-
Either a position adjustment name: 'dodge', '
|
|
488
|
+
Either a position adjustment name: 'dodge', 'jitter', 'nudge', 'jitterdodge', 'fill',
|
|
489
489
|
'stack' or 'identity', or the result of calling a position adjustment function (e.g., `position_dodge()` etc.).
|
|
490
490
|
show_legend : bool, default=True
|
|
491
491
|
False - do not show legend for this layer.
|
lets_plot/plot/subplots.py
CHANGED
|
@@ -221,10 +221,10 @@ class SupPlotsSpec(FeatureSpec):
|
|
|
221
221
|
h : float, default=None
|
|
222
222
|
Height of the output image in units.
|
|
223
223
|
Only applicable when exporting to PNG or PDF.
|
|
224
|
-
unit : {'in', 'cm', 'mm'}, default=
|
|
224
|
+
unit : {'in', 'cm', 'mm'}, default='in'
|
|
225
225
|
Unit of the output image. One of: 'in', 'cm', 'mm'.
|
|
226
226
|
Only applicable when exporting to PNG or PDF.
|
|
227
|
-
dpi : int, default=
|
|
227
|
+
dpi : int, default=300
|
|
228
228
|
Resolution in dots per inch.
|
|
229
229
|
Only applicable when exporting to PNG or PDF.
|
|
230
230
|
|
|
@@ -282,10 +282,10 @@ class SupPlotsSpec(FeatureSpec):
|
|
|
282
282
|
h : float, default=None
|
|
283
283
|
Height of the output image in units.
|
|
284
284
|
Only applicable when exporting to PNG or PDF.
|
|
285
|
-
unit : {'in', 'cm', 'mm'}, default=
|
|
285
|
+
unit : {'in', 'cm', 'mm'}, default='in'
|
|
286
286
|
Unit of the output image. One of: 'in', 'cm', 'mm'.
|
|
287
287
|
Only applicable when exporting to PNG or PDF.
|
|
288
|
-
dpi : int, default=
|
|
288
|
+
dpi : int, default=300
|
|
289
289
|
Resolution in dots per inch.
|
|
290
290
|
Only applicable when exporting to PNG or PDF.
|
|
291
291
|
|