pylegend 0.13.0__py3-none-any.whl → 0.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylegend/core/language/pandas_api/pandas_api_groupby_series.py +357 -0
- pylegend/core/language/pandas_api/pandas_api_series.py +202 -8
- pylegend/core/tds/pandas_api/frames/functions/aggregate_function.py +21 -11
- pylegend/core/tds/pandas_api/frames/pandas_api_applied_function_tds_frame.py +3 -0
- pylegend/core/tds/pandas_api/frames/pandas_api_base_tds_frame.py +10 -2
- pylegend/core/tds/pandas_api/frames/pandas_api_groupby_tds_frame.py +87 -27
- {pylegend-0.13.0.dist-info → pylegend-0.14.0.dist-info}/METADATA +1 -1
- {pylegend-0.13.0.dist-info → pylegend-0.14.0.dist-info}/RECORD +12 -11
- {pylegend-0.13.0.dist-info → pylegend-0.14.0.dist-info}/WHEEL +1 -1
- {pylegend-0.13.0.dist-info → pylegend-0.14.0.dist-info}/licenses/LICENSE +0 -0
- {pylegend-0.13.0.dist-info → pylegend-0.14.0.dist-info}/licenses/LICENSE.spdx +0 -0
- {pylegend-0.13.0.dist-info → pylegend-0.14.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
# Copyright 2026 Goldman Sachs
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import copy
|
|
16
|
+
import pandas as pd
|
|
17
|
+
from pylegend._typing import (
|
|
18
|
+
TYPE_CHECKING,
|
|
19
|
+
PyLegendDict,
|
|
20
|
+
PyLegendOptional,
|
|
21
|
+
PyLegendSequence,
|
|
22
|
+
PyLegendTypeVar,
|
|
23
|
+
PyLegendUnion
|
|
24
|
+
)
|
|
25
|
+
from pylegend.core.language.pandas_api.pandas_api_aggregate_specification import PyLegendAggInput
|
|
26
|
+
from pylegend.core.language.pandas_api.pandas_api_series import (
|
|
27
|
+
SupportsToPureExpression,
|
|
28
|
+
SupportsToSqlExpression
|
|
29
|
+
)
|
|
30
|
+
from pylegend.core.language.pandas_api.pandas_api_tds_row import PandasApiTdsRow
|
|
31
|
+
from pylegend.core.language.shared.column_expressions import PyLegendColumnExpression
|
|
32
|
+
from pylegend.core.language.shared.expression import (
|
|
33
|
+
PyLegendExpressionBooleanReturn,
|
|
34
|
+
PyLegendExpressionDateReturn,
|
|
35
|
+
PyLegendExpressionDateTimeReturn,
|
|
36
|
+
PyLegendExpressionFloatReturn,
|
|
37
|
+
PyLegendExpressionIntegerReturn,
|
|
38
|
+
PyLegendExpressionNumberReturn,
|
|
39
|
+
PyLegendExpressionStrictDateReturn,
|
|
40
|
+
PyLegendExpressionStringReturn
|
|
41
|
+
)
|
|
42
|
+
from pylegend.core.language.shared.primitives.boolean import PyLegendBoolean
|
|
43
|
+
from pylegend.core.language.shared.primitives.date import PyLegendDate
|
|
44
|
+
from pylegend.core.language.shared.primitives.datetime import PyLegendDateTime
|
|
45
|
+
from pylegend.core.language.shared.primitives.float import PyLegendFloat
|
|
46
|
+
from pylegend.core.language.shared.primitives.integer import PyLegendInteger
|
|
47
|
+
from pylegend.core.language.shared.primitives.number import PyLegendNumber
|
|
48
|
+
from pylegend.core.language.shared.primitives.primitive import (
|
|
49
|
+
PyLegendPrimitive,
|
|
50
|
+
PyLegendPrimitiveOrPythonPrimitive
|
|
51
|
+
)
|
|
52
|
+
from pylegend.core.language.shared.primitives.strictdate import PyLegendStrictDate
|
|
53
|
+
from pylegend.core.language.shared.primitives.string import PyLegendString
|
|
54
|
+
from pylegend.core.sql.metamodel import Expression, QuerySpecification
|
|
55
|
+
from pylegend.core.tds.abstract.frames.base_tds_frame import BaseTdsFrame
|
|
56
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import PandasApiAppliedFunctionTdsFrame
|
|
57
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_groupby_tds_frame import PandasApiGroupbyTdsFrame
|
|
58
|
+
from pylegend.core.tds.result_handler import ResultHandler
|
|
59
|
+
from pylegend.core.tds.tds_column import TdsColumn
|
|
60
|
+
from pylegend.core.tds.tds_frame import FrameToPureConfig, FrameToSqlConfig
|
|
61
|
+
from pylegend.extensions.tds.result_handler import PandasDfReadConfig
|
|
62
|
+
|
|
63
|
+
if TYPE_CHECKING:
|
|
64
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_tds_frame import PandasApiTdsFrame
|
|
65
|
+
|
|
66
|
+
__all__: PyLegendSequence[str] = [
|
|
67
|
+
"GroupbySeries",
|
|
68
|
+
"BooleanGroupbySeries",
|
|
69
|
+
"StringGroupbySeries",
|
|
70
|
+
"NumberGroupbySeries",
|
|
71
|
+
"IntegerGroupbySeries",
|
|
72
|
+
"FloatGroupbySeries",
|
|
73
|
+
"DateGroupbySeries",
|
|
74
|
+
"DateTimeGroupbySeries",
|
|
75
|
+
"StrictDateGroupbySeries",
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
R = PyLegendTypeVar('R')
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class GroupbySeries(PyLegendColumnExpression, PyLegendPrimitive, BaseTdsFrame):
|
|
82
|
+
_base_groupby_frame: PandasApiGroupbyTdsFrame
|
|
83
|
+
_applied_function_frame: PyLegendOptional[PandasApiAppliedFunctionTdsFrame]
|
|
84
|
+
|
|
85
|
+
def __init__(self, base_groupby_frame: PandasApiGroupbyTdsFrame):
|
|
86
|
+
selected_columns = base_groupby_frame.get_selected_columns()
|
|
87
|
+
assert selected_columns is not None and len(selected_columns) == 1, (
|
|
88
|
+
"To initialize a GroupbySeries object, exactly one column must be selected, "
|
|
89
|
+
f"but got selected columns: {[str(col) for col in selected_columns] if selected_columns is not None else None}"
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
row = PandasApiTdsRow.from_tds_frame("c", base_groupby_frame.base_frame())
|
|
93
|
+
PyLegendColumnExpression.__init__(self, row=row, column=selected_columns[0].get_name())
|
|
94
|
+
|
|
95
|
+
self._base_groupby_frame: PandasApiGroupbyTdsFrame = base_groupby_frame
|
|
96
|
+
self._applied_function_frame = None
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def applied_function_frame(self) -> PyLegendOptional[PandasApiAppliedFunctionTdsFrame]:
|
|
100
|
+
return self._applied_function_frame
|
|
101
|
+
|
|
102
|
+
@applied_function_frame.setter
|
|
103
|
+
def applied_function_frame(self, value: PandasApiAppliedFunctionTdsFrame) -> None:
|
|
104
|
+
self._applied_function_frame = value
|
|
105
|
+
|
|
106
|
+
def _raise_exception_if_no_function_applied(self) -> PandasApiAppliedFunctionTdsFrame:
|
|
107
|
+
if self._applied_function_frame is None:
|
|
108
|
+
raise RuntimeError(
|
|
109
|
+
"The 'groupby' function requires at least one operation to be performed right after it (e.g. aggregate, rank)"
|
|
110
|
+
)
|
|
111
|
+
return self._applied_function_frame
|
|
112
|
+
|
|
113
|
+
def get_base_frame(self) -> "PandasApiGroupbyTdsFrame":
|
|
114
|
+
return self._base_groupby_frame
|
|
115
|
+
|
|
116
|
+
def to_sql_expression(
|
|
117
|
+
self,
|
|
118
|
+
frame_name_to_base_query_map: PyLegendDict[str, QuerySpecification],
|
|
119
|
+
config: FrameToSqlConfig
|
|
120
|
+
) -> Expression:
|
|
121
|
+
applied_function_frame = self._raise_exception_if_no_function_applied()
|
|
122
|
+
applied_func = applied_function_frame.get_applied_function()
|
|
123
|
+
if isinstance(applied_func, SupportsToSqlExpression):
|
|
124
|
+
return applied_func.to_sql_expression(frame_name_to_base_query_map, config)
|
|
125
|
+
|
|
126
|
+
raise NotImplementedError( # pragma: no cover
|
|
127
|
+
f"The '{applied_func.name()}' function cannot provide a SQL expression"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
def to_pure_expression(self, config: FrameToPureConfig) -> str:
|
|
131
|
+
applied_function_frame = self._raise_exception_if_no_function_applied()
|
|
132
|
+
applied_func = applied_function_frame.get_applied_function()
|
|
133
|
+
if isinstance(applied_func, SupportsToPureExpression):
|
|
134
|
+
return applied_func.to_pure_expression(config)
|
|
135
|
+
|
|
136
|
+
raise NotImplementedError( # pragma: no cover
|
|
137
|
+
f"The '{applied_func.name()}' function cannot provide a pure expression"
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
def columns(self) -> PyLegendSequence[TdsColumn]:
|
|
141
|
+
applied_function_frame = self._raise_exception_if_no_function_applied()
|
|
142
|
+
return applied_function_frame.columns()
|
|
143
|
+
|
|
144
|
+
def to_sql_query(self, config: FrameToSqlConfig = FrameToSqlConfig()) -> str:
|
|
145
|
+
applied_function_frame = self._raise_exception_if_no_function_applied()
|
|
146
|
+
return applied_function_frame.to_sql_query(config)
|
|
147
|
+
|
|
148
|
+
def to_pure_query(self, config: FrameToPureConfig = FrameToPureConfig()) -> str:
|
|
149
|
+
applied_function_frame = self._raise_exception_if_no_function_applied()
|
|
150
|
+
return applied_function_frame.to_pure_query(config)
|
|
151
|
+
|
|
152
|
+
def execute_frame(
|
|
153
|
+
self,
|
|
154
|
+
result_handler: ResultHandler[R],
|
|
155
|
+
chunk_size: PyLegendOptional[int] = None
|
|
156
|
+
) -> R: # pragma: no cover
|
|
157
|
+
applied_function_frame = self._raise_exception_if_no_function_applied()
|
|
158
|
+
return applied_function_frame.execute_frame(result_handler, chunk_size)
|
|
159
|
+
|
|
160
|
+
def execute_frame_to_string(
|
|
161
|
+
self,
|
|
162
|
+
chunk_size: PyLegendOptional[int] = None
|
|
163
|
+
) -> str: # pragma: no cover
|
|
164
|
+
applied_function_frame = self._raise_exception_if_no_function_applied()
|
|
165
|
+
return applied_function_frame.execute_frame_to_string(chunk_size)
|
|
166
|
+
|
|
167
|
+
def execute_frame_to_pandas_df(
|
|
168
|
+
self,
|
|
169
|
+
chunk_size: PyLegendOptional[int] = None,
|
|
170
|
+
pandas_df_read_config: PandasDfReadConfig = PandasDfReadConfig()
|
|
171
|
+
) -> pd.DataFrame: # pragma: no cover
|
|
172
|
+
applied_function_frame = self._raise_exception_if_no_function_applied()
|
|
173
|
+
return applied_function_frame.execute_frame_to_pandas_df(chunk_size, pandas_df_read_config)
|
|
174
|
+
|
|
175
|
+
def to_sql_query_object(self, config: FrameToSqlConfig) -> QuerySpecification:
|
|
176
|
+
applied_function_frame = self._raise_exception_if_no_function_applied()
|
|
177
|
+
return applied_function_frame.to_sql_query_object(config)
|
|
178
|
+
|
|
179
|
+
def to_pure(self, config: FrameToPureConfig) -> str:
|
|
180
|
+
applied_function_frame = self._raise_exception_if_no_function_applied()
|
|
181
|
+
return applied_function_frame.to_pure(config)
|
|
182
|
+
|
|
183
|
+
def get_all_tds_frames(self) -> PyLegendSequence["BaseTdsFrame"]:
|
|
184
|
+
applied_function_frame = self._raise_exception_if_no_function_applied()
|
|
185
|
+
return applied_function_frame.get_all_tds_frames()
|
|
186
|
+
|
|
187
|
+
def aggregate(
|
|
188
|
+
self,
|
|
189
|
+
func: PyLegendAggInput,
|
|
190
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
191
|
+
*args: PyLegendPrimitiveOrPythonPrimitive,
|
|
192
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
193
|
+
) -> "PandasApiTdsFrame":
|
|
194
|
+
new_series = copy.copy(self)
|
|
195
|
+
if new_series.applied_function_frame is None:
|
|
196
|
+
return new_series.get_base_frame().aggregate(func, axis, *args, **kwargs)
|
|
197
|
+
else:
|
|
198
|
+
return new_series.applied_function_frame.aggregate(func, axis, *args, **kwargs)
|
|
199
|
+
|
|
200
|
+
def agg(
|
|
201
|
+
self,
|
|
202
|
+
func: PyLegendAggInput,
|
|
203
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
204
|
+
*args: PyLegendPrimitiveOrPythonPrimitive,
|
|
205
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
206
|
+
) -> "PandasApiTdsFrame":
|
|
207
|
+
return self.aggregate(func, axis, *args, **kwargs)
|
|
208
|
+
|
|
209
|
+
def sum(
|
|
210
|
+
self,
|
|
211
|
+
numeric_only: bool = False,
|
|
212
|
+
min_count: int = 0,
|
|
213
|
+
engine: PyLegendOptional[str] = None,
|
|
214
|
+
engine_kwargs: PyLegendOptional[PyLegendDict[str, bool]] = None,
|
|
215
|
+
) -> "PandasApiTdsFrame":
|
|
216
|
+
if numeric_only is not False:
|
|
217
|
+
raise NotImplementedError("numeric_only=True is not currently supported in sum function.")
|
|
218
|
+
if min_count != 0:
|
|
219
|
+
raise NotImplementedError(f"min_count must be 0 in sum function, but got: {min_count}")
|
|
220
|
+
if engine is not None:
|
|
221
|
+
raise NotImplementedError("engine parameter is not supported in sum function.")
|
|
222
|
+
if engine_kwargs is not None:
|
|
223
|
+
raise NotImplementedError("engine_kwargs parameter is not supported in sum function.")
|
|
224
|
+
return self.aggregate("sum", 0)
|
|
225
|
+
|
|
226
|
+
def mean(
|
|
227
|
+
self,
|
|
228
|
+
numeric_only: bool = False,
|
|
229
|
+
engine: PyLegendOptional[str] = None,
|
|
230
|
+
engine_kwargs: PyLegendOptional[PyLegendDict[str, bool]] = None,
|
|
231
|
+
) -> "PandasApiTdsFrame":
|
|
232
|
+
if numeric_only is not False:
|
|
233
|
+
raise NotImplementedError("numeric_only=True is not currently supported in mean function.")
|
|
234
|
+
if engine is not None:
|
|
235
|
+
raise NotImplementedError("engine parameter is not supported in mean function.")
|
|
236
|
+
if engine_kwargs is not None:
|
|
237
|
+
raise NotImplementedError("engine_kwargs parameter is not supported in mean function.")
|
|
238
|
+
return self.aggregate("mean", 0)
|
|
239
|
+
|
|
240
|
+
def min(
|
|
241
|
+
self,
|
|
242
|
+
numeric_only: bool = False,
|
|
243
|
+
min_count: int = -1,
|
|
244
|
+
engine: PyLegendOptional[str] = None,
|
|
245
|
+
engine_kwargs: PyLegendOptional[PyLegendDict[str, bool]] = None,
|
|
246
|
+
) -> "PandasApiTdsFrame":
|
|
247
|
+
if numeric_only is not False:
|
|
248
|
+
raise NotImplementedError("numeric_only=True is not currently supported in min function.")
|
|
249
|
+
if min_count != -1:
|
|
250
|
+
raise NotImplementedError(f"min_count must be -1 (default) in min function, but got: {min_count}")
|
|
251
|
+
if engine is not None:
|
|
252
|
+
raise NotImplementedError("engine parameter is not supported in min function.")
|
|
253
|
+
if engine_kwargs is not None:
|
|
254
|
+
raise NotImplementedError("engine_kwargs parameter is not supported in min function.")
|
|
255
|
+
return self.aggregate("min", 0)
|
|
256
|
+
|
|
257
|
+
def max(
|
|
258
|
+
self,
|
|
259
|
+
numeric_only: bool = False,
|
|
260
|
+
min_count: int = -1,
|
|
261
|
+
engine: PyLegendOptional[str] = None,
|
|
262
|
+
engine_kwargs: PyLegendOptional[PyLegendDict[str, bool]] = None,
|
|
263
|
+
) -> "PandasApiTdsFrame":
|
|
264
|
+
if numeric_only is not False:
|
|
265
|
+
raise NotImplementedError("numeric_only=True is not currently supported in max function.")
|
|
266
|
+
if min_count != -1:
|
|
267
|
+
raise NotImplementedError(f"min_count must be -1 (default) in max function, but got: {min_count}")
|
|
268
|
+
if engine is not None:
|
|
269
|
+
raise NotImplementedError("engine parameter is not supported in max function.")
|
|
270
|
+
if engine_kwargs is not None:
|
|
271
|
+
raise NotImplementedError("engine_kwargs parameter is not supported in max function.")
|
|
272
|
+
return self.aggregate("max", 0)
|
|
273
|
+
|
|
274
|
+
def std(
|
|
275
|
+
self,
|
|
276
|
+
ddof: int = 1,
|
|
277
|
+
engine: PyLegendOptional[str] = None,
|
|
278
|
+
engine_kwargs: PyLegendOptional[PyLegendDict[str, bool]] = None,
|
|
279
|
+
numeric_only: bool = False,
|
|
280
|
+
) -> "PandasApiTdsFrame":
|
|
281
|
+
if ddof != 1:
|
|
282
|
+
raise NotImplementedError(f"Only ddof=1 (Sample Standard Deviation) is supported in std function, but got: {ddof}")
|
|
283
|
+
if engine is not None:
|
|
284
|
+
raise NotImplementedError("engine parameter is not supported in std function.")
|
|
285
|
+
if engine_kwargs is not None:
|
|
286
|
+
raise NotImplementedError("engine_kwargs parameter is not supported in std function.")
|
|
287
|
+
if numeric_only is not False:
|
|
288
|
+
raise NotImplementedError("numeric_only=True is not currently supported in std function.")
|
|
289
|
+
return self.aggregate("std", 0)
|
|
290
|
+
|
|
291
|
+
def var(
|
|
292
|
+
self,
|
|
293
|
+
ddof: int = 1,
|
|
294
|
+
engine: PyLegendOptional[str] = None,
|
|
295
|
+
engine_kwargs: PyLegendOptional[PyLegendDict[str, bool]] = None,
|
|
296
|
+
numeric_only: bool = False,
|
|
297
|
+
) -> "PandasApiTdsFrame":
|
|
298
|
+
if ddof != 1:
|
|
299
|
+
raise NotImplementedError(f"Only ddof=1 (Sample Variance) is supported in var function, but got: {ddof}")
|
|
300
|
+
if engine is not None:
|
|
301
|
+
raise NotImplementedError("engine parameter is not supported in var function.")
|
|
302
|
+
if engine_kwargs is not None:
|
|
303
|
+
raise NotImplementedError("engine_kwargs parameter is not supported in var function.")
|
|
304
|
+
if numeric_only is not False:
|
|
305
|
+
raise NotImplementedError("numeric_only=True is not currently supported in var function.")
|
|
306
|
+
return self.aggregate("var", 0)
|
|
307
|
+
|
|
308
|
+
def count(self) -> "PandasApiTdsFrame":
|
|
309
|
+
return self.aggregate("count", 0)
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
class BooleanGroupbySeries(GroupbySeries, PyLegendBoolean, PyLegendExpressionBooleanReturn):
|
|
313
|
+
def __init__(self, base_frame: "PandasApiGroupbyTdsFrame"):
|
|
314
|
+
super().__init__(base_frame) # pragma: no cover (Boolean column not supported in PURE)
|
|
315
|
+
PyLegendBoolean.__init__(self, self) # pragma: no cover (Boolean column not supported in PURE)
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
class StringGroupbySeries(GroupbySeries, PyLegendString, PyLegendExpressionStringReturn):
|
|
319
|
+
def __init__(self, base_frame: "PandasApiGroupbyTdsFrame"):
|
|
320
|
+
super().__init__(base_frame)
|
|
321
|
+
PyLegendString.__init__(self, self)
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
class NumberGroupbySeries(GroupbySeries, PyLegendNumber, PyLegendExpressionNumberReturn):
|
|
325
|
+
def __init__(self, base_frame: "PandasApiGroupbyTdsFrame"):
|
|
326
|
+
super().__init__(base_frame)
|
|
327
|
+
PyLegendNumber.__init__(self, self)
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
class IntegerGroupbySeries(NumberGroupbySeries, PyLegendInteger, PyLegendExpressionIntegerReturn):
|
|
331
|
+
def __init__(self, base_frame: "PandasApiGroupbyTdsFrame"):
|
|
332
|
+
super().__init__(base_frame)
|
|
333
|
+
PyLegendInteger.__init__(self, self)
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
class FloatGroupbySeries(NumberGroupbySeries, PyLegendFloat, PyLegendExpressionFloatReturn):
|
|
337
|
+
def __init__(self, base_frame: "PandasApiGroupbyTdsFrame"):
|
|
338
|
+
super().__init__(base_frame)
|
|
339
|
+
PyLegendFloat.__init__(self, self)
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
class DateGroupbySeries(GroupbySeries, PyLegendDate, PyLegendExpressionDateReturn):
|
|
343
|
+
def __init__(self, base_frame: "PandasApiGroupbyTdsFrame"):
|
|
344
|
+
super().__init__(base_frame)
|
|
345
|
+
PyLegendDate.__init__(self, self)
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
class DateTimeGroupbySeries(DateGroupbySeries, PyLegendDateTime, PyLegendExpressionDateTimeReturn):
|
|
349
|
+
def __init__(self, base_frame: "PandasApiGroupbyTdsFrame"):
|
|
350
|
+
super().__init__(base_frame)
|
|
351
|
+
PyLegendDateTime.__init__(self, self)
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
class StrictDateGroupbySeries(DateGroupbySeries, PyLegendStrictDate, PyLegendExpressionStrictDateReturn):
|
|
355
|
+
def __init__(self, base_frame: "PandasApiGroupbyTdsFrame"):
|
|
356
|
+
super().__init__(base_frame)
|
|
357
|
+
PyLegendStrictDate.__init__(self, self)
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from typing import TYPE_CHECKING
|
|
15
|
+
from typing import TYPE_CHECKING, runtime_checkable, Protocol
|
|
16
16
|
|
|
17
17
|
import pandas as pd
|
|
18
18
|
|
|
@@ -22,8 +22,10 @@ from pylegend._typing import (
|
|
|
22
22
|
from pylegend._typing import (
|
|
23
23
|
PyLegendSequence,
|
|
24
24
|
PyLegendOptional,
|
|
25
|
-
PyLegendTypeVar
|
|
25
|
+
PyLegendTypeVar,
|
|
26
|
+
PyLegendUnion
|
|
26
27
|
)
|
|
28
|
+
from pylegend.core.language.pandas_api.pandas_api_aggregate_specification import PyLegendAggInput
|
|
27
29
|
from pylegend.core.language.pandas_api.pandas_api_tds_row import PandasApiTdsRow
|
|
28
30
|
from pylegend.core.language.shared.column_expressions import PyLegendColumnExpression
|
|
29
31
|
from pylegend.core.language.shared.expression import (
|
|
@@ -42,7 +44,7 @@ from pylegend.core.language.shared.primitives.datetime import PyLegendDateTime
|
|
|
42
44
|
from pylegend.core.language.shared.primitives.float import PyLegendFloat
|
|
43
45
|
from pylegend.core.language.shared.primitives.integer import PyLegendInteger
|
|
44
46
|
from pylegend.core.language.shared.primitives.number import PyLegendNumber
|
|
45
|
-
from pylegend.core.language.shared.primitives.primitive import PyLegendPrimitive
|
|
47
|
+
from pylegend.core.language.shared.primitives.primitive import PyLegendPrimitive, PyLegendPrimitiveOrPythonPrimitive
|
|
46
48
|
from pylegend.core.language.shared.primitives.strictdate import PyLegendStrictDate
|
|
47
49
|
from pylegend.core.language.shared.primitives.string import PyLegendString
|
|
48
50
|
from pylegend.core.sql.metamodel import (
|
|
@@ -50,6 +52,8 @@ from pylegend.core.sql.metamodel import (
|
|
|
50
52
|
)
|
|
51
53
|
from pylegend.core.sql.metamodel import QuerySpecification
|
|
52
54
|
from pylegend.core.tds.abstract.frames.base_tds_frame import BaseTdsFrame
|
|
55
|
+
from pylegend.core.tds.pandas_api.frames.functions.filter import PandasApiFilterFunction
|
|
56
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import PandasApiAppliedFunctionTdsFrame
|
|
53
57
|
from pylegend.core.tds.result_handler import ResultHandler
|
|
54
58
|
from pylegend.core.tds.tds_column import TdsColumn
|
|
55
59
|
from pylegend.core.tds.tds_frame import FrameToPureConfig
|
|
@@ -60,19 +64,39 @@ if TYPE_CHECKING:
|
|
|
60
64
|
from pylegend.core.tds.pandas_api.frames.pandas_api_tds_frame import PandasApiTdsFrame
|
|
61
65
|
|
|
62
66
|
__all__: PyLegendSequence[str] = [
|
|
63
|
-
"Series"
|
|
67
|
+
"Series",
|
|
68
|
+
"SupportsToSqlExpression",
|
|
69
|
+
"SupportsToPureExpression",
|
|
64
70
|
]
|
|
65
71
|
|
|
66
72
|
R = PyLegendTypeVar('R')
|
|
67
73
|
|
|
68
74
|
|
|
75
|
+
@runtime_checkable
|
|
76
|
+
class SupportsToSqlExpression(Protocol):
|
|
77
|
+
def to_sql_expression(
|
|
78
|
+
self,
|
|
79
|
+
frame_name_to_base_query_map: PyLegendDict[str, QuerySpecification],
|
|
80
|
+
config: FrameToSqlConfig
|
|
81
|
+
) -> Expression:
|
|
82
|
+
...
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@runtime_checkable
|
|
86
|
+
class SupportsToPureExpression(Protocol):
|
|
87
|
+
def to_pure_expression(self, config: FrameToPureConfig) -> str:
|
|
88
|
+
...
|
|
89
|
+
|
|
90
|
+
|
|
69
91
|
class Series(PyLegendColumnExpression, PyLegendPrimitive, BaseTdsFrame):
|
|
70
92
|
def __init__(self, base_frame: "PandasApiTdsFrame", column: str):
|
|
71
93
|
row = PandasApiTdsRow.from_tds_frame("c", base_frame)
|
|
72
94
|
PyLegendColumnExpression.__init__(self, row=row, column=column)
|
|
73
95
|
|
|
74
96
|
self.__base_frame = base_frame
|
|
75
|
-
|
|
97
|
+
filtered = base_frame.filter(items=[column])
|
|
98
|
+
assert isinstance(filtered, PandasApiAppliedFunctionTdsFrame)
|
|
99
|
+
self._filtered_frame: PandasApiAppliedFunctionTdsFrame = filtered
|
|
76
100
|
|
|
77
101
|
def value(self) -> PyLegendColumnExpression:
|
|
78
102
|
return self
|
|
@@ -85,9 +109,27 @@ class Series(PyLegendColumnExpression, PyLegendPrimitive, BaseTdsFrame):
|
|
|
85
109
|
frame_name_to_base_query_map: PyLegendDict[str, QuerySpecification],
|
|
86
110
|
config: FrameToSqlConfig
|
|
87
111
|
) -> Expression:
|
|
112
|
+
applied_func = self._filtered_frame.get_applied_function()
|
|
113
|
+
if not isinstance(applied_func, PandasApiFilterFunction): # pragma: no cover
|
|
114
|
+
if isinstance(applied_func, SupportsToSqlExpression):
|
|
115
|
+
return applied_func.to_sql_expression(frame_name_to_base_query_map, config)
|
|
116
|
+
else:
|
|
117
|
+
raise NotImplementedError(
|
|
118
|
+
f"The '{applied_func.name()}' function cannot provide a SQL expression"
|
|
119
|
+
)
|
|
120
|
+
|
|
88
121
|
return super().to_sql_expression(frame_name_to_base_query_map, config)
|
|
89
122
|
|
|
90
123
|
def to_pure_expression(self, config: FrameToPureConfig) -> str:
|
|
124
|
+
applied_func = self._filtered_frame.get_applied_function()
|
|
125
|
+
if not isinstance(applied_func, PandasApiFilterFunction): # pragma: no cover
|
|
126
|
+
if isinstance(applied_func, SupportsToPureExpression):
|
|
127
|
+
return applied_func.to_pure_expression(config)
|
|
128
|
+
else:
|
|
129
|
+
raise NotImplementedError(
|
|
130
|
+
f"The '{applied_func.name()}' function cannot provide a pure expression"
|
|
131
|
+
)
|
|
132
|
+
|
|
91
133
|
return super().to_pure_expression(config)
|
|
92
134
|
|
|
93
135
|
def columns(self) -> PyLegendSequence[TdsColumn]:
|
|
@@ -120,13 +162,165 @@ class Series(PyLegendColumnExpression, PyLegendPrimitive, BaseTdsFrame):
|
|
|
120
162
|
return self._filtered_frame.execute_frame_to_pandas_df(chunk_size, pandas_df_read_config) # pragma: no cover
|
|
121
163
|
|
|
122
164
|
def to_sql_query_object(self, config: FrameToSqlConfig) -> QuerySpecification:
|
|
123
|
-
return self._filtered_frame.to_sql_query_object(config)
|
|
165
|
+
return self._filtered_frame.to_sql_query_object(config)
|
|
124
166
|
|
|
125
167
|
def to_pure(self, config: FrameToPureConfig) -> str:
|
|
126
|
-
return self._filtered_frame.to_pure(config)
|
|
168
|
+
return self._filtered_frame.to_pure(config)
|
|
127
169
|
|
|
128
170
|
def get_all_tds_frames(self) -> PyLegendSequence["BaseTdsFrame"]:
|
|
129
|
-
return self._filtered_frame.get_all_tds_frames()
|
|
171
|
+
return self._filtered_frame.get_all_tds_frames()
|
|
172
|
+
|
|
173
|
+
def aggregate(
|
|
174
|
+
self,
|
|
175
|
+
func: PyLegendAggInput,
|
|
176
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
177
|
+
*args: PyLegendPrimitiveOrPythonPrimitive,
|
|
178
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
179
|
+
) -> "PandasApiTdsFrame":
|
|
180
|
+
return self._filtered_frame.aggregate(func, axis, *args, **kwargs)
|
|
181
|
+
|
|
182
|
+
def agg(
|
|
183
|
+
self,
|
|
184
|
+
func: PyLegendAggInput,
|
|
185
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
186
|
+
*args: PyLegendPrimitiveOrPythonPrimitive,
|
|
187
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
188
|
+
) -> "PandasApiTdsFrame":
|
|
189
|
+
return self.aggregate(func, axis, *args, **kwargs)
|
|
190
|
+
|
|
191
|
+
def sum(
|
|
192
|
+
self,
|
|
193
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
194
|
+
skipna: bool = True,
|
|
195
|
+
numeric_only: bool = False,
|
|
196
|
+
min_count: int = 0,
|
|
197
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
198
|
+
) -> "PandasApiTdsFrame":
|
|
199
|
+
if axis not in [0, "index"]:
|
|
200
|
+
raise NotImplementedError(f"The 'axis' parameter must be 0 or 'index' in sum function, but got: {axis}")
|
|
201
|
+
if skipna is not True:
|
|
202
|
+
raise NotImplementedError("skipna=False is not currently supported in sum function. "
|
|
203
|
+
"SQL aggregation ignores nulls by default.")
|
|
204
|
+
if numeric_only is not False:
|
|
205
|
+
raise NotImplementedError("numeric_only=True is not currently supported in sum function.")
|
|
206
|
+
if min_count != 0:
|
|
207
|
+
raise NotImplementedError(f"min_count must be 0 in sum function, but got: {min_count}")
|
|
208
|
+
if len(kwargs) > 0:
|
|
209
|
+
raise NotImplementedError(
|
|
210
|
+
f"Additional keyword arguments not supported in sum function: {list(kwargs.keys())}")
|
|
211
|
+
return self.aggregate("sum", 0)
|
|
212
|
+
|
|
213
|
+
def mean(
|
|
214
|
+
self,
|
|
215
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
216
|
+
skipna: bool = True,
|
|
217
|
+
numeric_only: bool = False,
|
|
218
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
219
|
+
) -> "PandasApiTdsFrame":
|
|
220
|
+
if axis not in [0, "index"]:
|
|
221
|
+
raise NotImplementedError(f"The 'axis' parameter must be 0 or 'index' in mean function, but got: {axis}")
|
|
222
|
+
if skipna is not True:
|
|
223
|
+
raise NotImplementedError("skipna=False is not currently supported in mean function.")
|
|
224
|
+
if numeric_only is not False:
|
|
225
|
+
raise NotImplementedError("numeric_only=True is not currently supported in mean function.")
|
|
226
|
+
if len(kwargs) > 0:
|
|
227
|
+
raise NotImplementedError(
|
|
228
|
+
f"Additional keyword arguments not supported in mean function: {list(kwargs.keys())}")
|
|
229
|
+
return self.aggregate("mean", 0)
|
|
230
|
+
|
|
231
|
+
def min(
|
|
232
|
+
self,
|
|
233
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
234
|
+
skipna: bool = True,
|
|
235
|
+
numeric_only: bool = False,
|
|
236
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
237
|
+
) -> "PandasApiTdsFrame":
|
|
238
|
+
if axis not in [0, "index"]:
|
|
239
|
+
raise NotImplementedError(f"The 'axis' parameter must be 0 or 'index' in min function, but got: {axis}")
|
|
240
|
+
if skipna is not True:
|
|
241
|
+
raise NotImplementedError("skipna=False is not currently supported in min function.")
|
|
242
|
+
if numeric_only is not False:
|
|
243
|
+
raise NotImplementedError("numeric_only=True is not currently supported in min function.")
|
|
244
|
+
if len(kwargs) > 0:
|
|
245
|
+
raise NotImplementedError(
|
|
246
|
+
f"Additional keyword arguments not supported in min function: {list(kwargs.keys())}")
|
|
247
|
+
return self.aggregate("min", 0)
|
|
248
|
+
|
|
249
|
+
def max(
|
|
250
|
+
self,
|
|
251
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
252
|
+
skipna: bool = True,
|
|
253
|
+
numeric_only: bool = False,
|
|
254
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
255
|
+
) -> "PandasApiTdsFrame":
|
|
256
|
+
if axis not in [0, "index"]:
|
|
257
|
+
raise NotImplementedError(f"The 'axis' parameter must be 0 or 'index' in max function, but got: {axis}")
|
|
258
|
+
if skipna is not True:
|
|
259
|
+
raise NotImplementedError("skipna=False is not currently supported in max function.")
|
|
260
|
+
if numeric_only is not False:
|
|
261
|
+
raise NotImplementedError("numeric_only=True is not currently supported in max function.")
|
|
262
|
+
if len(kwargs) > 0:
|
|
263
|
+
raise NotImplementedError(
|
|
264
|
+
f"Additional keyword arguments not supported in max function: {list(kwargs.keys())}")
|
|
265
|
+
return self.aggregate("max", 0)
|
|
266
|
+
|
|
267
|
+
def std(
|
|
268
|
+
self,
|
|
269
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
270
|
+
skipna: bool = True,
|
|
271
|
+
ddof: int = 1,
|
|
272
|
+
numeric_only: bool = False,
|
|
273
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
274
|
+
) -> "PandasApiTdsFrame":
|
|
275
|
+
if axis not in [0, "index"]:
|
|
276
|
+
raise NotImplementedError(f"The 'axis' parameter must be 0 or 'index' in std function, but got: {axis}")
|
|
277
|
+
if skipna is not True:
|
|
278
|
+
raise NotImplementedError("skipna=False is not currently supported in std function.")
|
|
279
|
+
if ddof != 1:
|
|
280
|
+
raise NotImplementedError(
|
|
281
|
+
f"Only ddof=1 (Sample Standard Deviation) is supported in std function, but got: {ddof}")
|
|
282
|
+
if numeric_only is not False:
|
|
283
|
+
raise NotImplementedError("numeric_only=True is not currently supported in std function.")
|
|
284
|
+
if len(kwargs) > 0:
|
|
285
|
+
raise NotImplementedError(
|
|
286
|
+
f"Additional keyword arguments not supported in std function: {list(kwargs.keys())}")
|
|
287
|
+
return self.aggregate("std", 0)
|
|
288
|
+
|
|
289
|
+
def var(
|
|
290
|
+
self,
|
|
291
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
292
|
+
skipna: bool = True,
|
|
293
|
+
ddof: int = 1,
|
|
294
|
+
numeric_only: bool = False,
|
|
295
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
296
|
+
) -> "PandasApiTdsFrame":
|
|
297
|
+
if axis not in [0, "index"]:
|
|
298
|
+
raise NotImplementedError(f"The 'axis' parameter must be 0 or 'index' in var function, but got: {axis}")
|
|
299
|
+
if skipna is not True:
|
|
300
|
+
raise NotImplementedError("skipna=False is not currently supported in var function.")
|
|
301
|
+
if ddof != 1:
|
|
302
|
+
raise NotImplementedError(f"Only ddof=1 (Sample Variance) is supported in var function, but got: {ddof}")
|
|
303
|
+
if numeric_only is not False:
|
|
304
|
+
raise NotImplementedError("numeric_only=True is not currently supported in var function.")
|
|
305
|
+
if len(kwargs) > 0:
|
|
306
|
+
raise NotImplementedError(
|
|
307
|
+
f"Additional keyword arguments not supported in var function: {list(kwargs.keys())}")
|
|
308
|
+
return self.aggregate("var", 0)
|
|
309
|
+
|
|
310
|
+
def count(
|
|
311
|
+
self,
|
|
312
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
313
|
+
numeric_only: bool = False,
|
|
314
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
315
|
+
) -> "PandasApiTdsFrame":
|
|
316
|
+
if axis not in [0, "index"]:
|
|
317
|
+
raise NotImplementedError(f"The 'axis' parameter must be 0 or 'index' in count function, but got: {axis}")
|
|
318
|
+
if numeric_only is not False:
|
|
319
|
+
raise NotImplementedError("numeric_only=True is not currently supported in count function.")
|
|
320
|
+
if len(kwargs) > 0:
|
|
321
|
+
raise NotImplementedError(
|
|
322
|
+
f"Additional keyword arguments not supported in count function: {list(kwargs.keys())}")
|
|
323
|
+
return self.aggregate("count", 0)
|
|
130
324
|
|
|
131
325
|
|
|
132
326
|
class BooleanSeries(Series, PyLegendBoolean, PyLegendExpressionBooleanReturn): # type: ignore
|
|
@@ -100,7 +100,7 @@ class AggregateFunction(PandasApiAppliedFunction):
|
|
|
100
100
|
|
|
101
101
|
if isinstance(self.__base_frame, PandasApiGroupbyTdsFrame):
|
|
102
102
|
columns_to_retain: PyLegendList[str] = [
|
|
103
|
-
db_extension.quote_identifier(x) for x in self.__base_frame.
|
|
103
|
+
db_extension.quote_identifier(x.get_name()) for x in self.__base_frame.get_grouping_columns()
|
|
104
104
|
]
|
|
105
105
|
new_cols_with_index: PyLegendList[PyLegendTuple[int, "SelectItem"]] = []
|
|
106
106
|
for col in new_query.select.selectItems:
|
|
@@ -127,8 +127,8 @@ class AggregateFunction(PandasApiAppliedFunction):
|
|
|
127
127
|
if isinstance(self.__base_frame, PandasApiGroupbyTdsFrame):
|
|
128
128
|
tds_row = PandasApiTdsRow.from_tds_frame("r", self.base_frame())
|
|
129
129
|
new_query.groupBy = [
|
|
130
|
-
(lambda x: x[c])(tds_row).to_sql_expression({"r": new_query}, config)
|
|
131
|
-
for c in self.__base_frame.
|
|
130
|
+
(lambda x: x[c.get_name()])(tds_row).to_sql_expression({"r": new_query}, config)
|
|
131
|
+
for c in self.__base_frame.get_grouping_columns()
|
|
132
132
|
]
|
|
133
133
|
|
|
134
134
|
return new_query
|
|
@@ -149,8 +149,8 @@ class AggregateFunction(PandasApiAppliedFunction):
|
|
|
149
149
|
|
|
150
150
|
if isinstance(self.__base_frame, PandasApiGroupbyTdsFrame):
|
|
151
151
|
group_strings = []
|
|
152
|
-
for
|
|
153
|
-
group_strings.append(escape_column_name(
|
|
152
|
+
for col in self.__base_frame.get_grouping_columns():
|
|
153
|
+
group_strings.append(escape_column_name(col.get_name()))
|
|
154
154
|
|
|
155
155
|
pure_expression = (
|
|
156
156
|
f"{self.base_frame().to_pure(config)}{config.separator(1)}" + f"->groupBy({config.separator(2)}"
|
|
@@ -182,7 +182,8 @@ class AggregateFunction(PandasApiAppliedFunction):
|
|
|
182
182
|
|
|
183
183
|
if isinstance(self.__base_frame, PandasApiGroupbyTdsFrame):
|
|
184
184
|
base_cols_map = {c.get_name(): c for c in self.base_frame().columns()}
|
|
185
|
-
for
|
|
185
|
+
for group_col in self.__base_frame.get_grouping_columns():
|
|
186
|
+
group_col_name = group_col.get_name()
|
|
186
187
|
if group_col_name in base_cols_map:
|
|
187
188
|
new_columns.append(base_cols_map[group_col_name].copy())
|
|
188
189
|
|
|
@@ -231,6 +232,10 @@ class AggregateFunction(PandasApiAppliedFunction):
|
|
|
231
232
|
|
|
232
233
|
tds_row = PandasApiTdsRow.from_tds_frame("r", self.base_frame())
|
|
233
234
|
|
|
235
|
+
group_cols: set[str] = set()
|
|
236
|
+
if isinstance(self.__base_frame, PandasApiGroupbyTdsFrame):
|
|
237
|
+
group_cols = set([col.get_name() for col in self.__base_frame.get_grouping_columns()])
|
|
238
|
+
|
|
234
239
|
for column_name, agg_input in normalized_func.items():
|
|
235
240
|
mapper_function: PyLegendCallable[[PandasApiTdsRow], PyLegendPrimitiveOrPythonPrimitive] = eval(
|
|
236
241
|
f'lambda r: r["{column_name}"]'
|
|
@@ -259,7 +264,12 @@ class AggregateFunction(PandasApiAppliedFunction):
|
|
|
259
264
|
normalized_agg_func = self.__normalize_agg_func_to_lambda_function(agg_input)
|
|
260
265
|
agg_result = normalized_agg_func(collection)
|
|
261
266
|
|
|
262
|
-
|
|
267
|
+
if column_name in group_cols:
|
|
268
|
+
alias = self._generate_column_alias(column_name, agg_input, 0)
|
|
269
|
+
else:
|
|
270
|
+
alias = column_name
|
|
271
|
+
|
|
272
|
+
self.__aggregates_list.append((alias, map_result, agg_result))
|
|
263
273
|
|
|
264
274
|
return True
|
|
265
275
|
|
|
@@ -274,13 +284,13 @@ class AggregateFunction(PandasApiAppliedFunction):
|
|
|
274
284
|
all_cols = [col.get_name() for col in self.base_frame().columns()]
|
|
275
285
|
|
|
276
286
|
if isinstance(self.__base_frame, PandasApiGroupbyTdsFrame):
|
|
277
|
-
group_cols = set(self.__base_frame.
|
|
287
|
+
group_cols = set([col.get_name() for col in self.__base_frame.get_grouping_columns()])
|
|
278
288
|
|
|
279
|
-
selected_cols = self.__base_frame.
|
|
289
|
+
selected_cols = self.__base_frame.get_selected_columns()
|
|
280
290
|
|
|
281
291
|
if selected_cols is not None:
|
|
282
|
-
validation_columns = selected_cols
|
|
283
|
-
default_broadcast_columns = selected_cols
|
|
292
|
+
validation_columns = [col.get_name() for col in selected_cols]
|
|
293
|
+
default_broadcast_columns = [col.get_name() for col in selected_cols]
|
|
284
294
|
else:
|
|
285
295
|
validation_columns = all_cols
|
|
286
296
|
default_broadcast_columns = [c for c in all_cols if c not in group_cols]
|
|
@@ -85,3 +85,6 @@ class PandasApiAppliedFunctionTdsFrame(PandasApiBaseTdsFrame):
|
|
|
85
85
|
for x in [self.__applied_function.base_frame()] + self.__applied_function.tds_frame_parameters()
|
|
86
86
|
for y in x.get_all_tds_frames()
|
|
87
87
|
] + [self]
|
|
88
|
+
|
|
89
|
+
def get_applied_function(self) -> PandasApiAppliedFunction:
|
|
90
|
+
return self.__applied_function
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
import copy
|
|
16
16
|
from abc import ABCMeta, abstractmethod
|
|
17
17
|
from datetime import date, datetime
|
|
18
|
-
from typing import TYPE_CHECKING
|
|
18
|
+
from typing import TYPE_CHECKING, overload
|
|
19
19
|
|
|
20
20
|
from typing_extensions import Concatenate
|
|
21
21
|
|
|
@@ -96,6 +96,14 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
96
96
|
return [c.copy() for c in self.__columns]
|
|
97
97
|
return self._transformed_frame.columns()
|
|
98
98
|
|
|
99
|
+
@overload # type: ignore[override]
|
|
100
|
+
def __getitem__(self, key: str) -> "Series":
|
|
101
|
+
...
|
|
102
|
+
|
|
103
|
+
@overload
|
|
104
|
+
def __getitem__(self, key: PyLegendList[str]) -> "PandasApiTdsFrame":
|
|
105
|
+
...
|
|
106
|
+
|
|
99
107
|
def __getitem__(
|
|
100
108
|
self,
|
|
101
109
|
key: PyLegendUnion[str, PyLegendList[str], PyLegendBoolean]
|
|
@@ -665,7 +673,7 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
665
673
|
# Compute row callable via func on the Series
|
|
666
674
|
def _row_callable(
|
|
667
675
|
_row: PandasApiTdsRow,
|
|
668
|
-
_s: Series = series,
|
|
676
|
+
_s: Series = series,
|
|
669
677
|
_a: PyLegendTuple[PyLegendPrimitiveOrPythonPrimitive, ...] = args,
|
|
670
678
|
_k: PyLegendPrimitiveOrPythonPrimitive = kwargs # type: ignore
|
|
671
679
|
) -> PyLegendPrimitiveOrPythonPrimitive:
|
|
@@ -11,21 +11,24 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
14
|
+
from typing import overload
|
|
15
15
|
|
|
16
16
|
from pylegend._typing import (
|
|
17
17
|
PyLegendOptional,
|
|
18
18
|
PyLegendUnion,
|
|
19
19
|
PyLegendList,
|
|
20
20
|
PyLegendDict,
|
|
21
|
+
PyLegendSet,
|
|
21
22
|
TYPE_CHECKING,
|
|
22
23
|
)
|
|
23
24
|
from pylegend.core.language.pandas_api.pandas_api_aggregate_specification import PyLegendAggInput
|
|
24
25
|
from pylegend.core.language.shared.primitives.primitive import PyLegendPrimitiveOrPythonPrimitive
|
|
25
26
|
from pylegend.core.tds.pandas_api.frames.pandas_api_base_tds_frame import PandasApiBaseTdsFrame
|
|
27
|
+
from pylegend.core.tds.tds_column import TdsColumn
|
|
26
28
|
|
|
27
29
|
if TYPE_CHECKING:
|
|
28
30
|
from pylegend.core.tds.pandas_api.frames.pandas_api_tds_frame import PandasApiTdsFrame
|
|
31
|
+
from pylegend.core.language.pandas_api.pandas_api_groupby_series import GroupbySeries
|
|
29
32
|
|
|
30
33
|
|
|
31
34
|
class PandasApiGroupbyTdsFrame:
|
|
@@ -38,8 +41,8 @@ class PandasApiGroupbyTdsFrame:
|
|
|
38
41
|
__observed: bool
|
|
39
42
|
__dropna: bool
|
|
40
43
|
|
|
41
|
-
|
|
42
|
-
__selected_columns: PyLegendOptional[PyLegendList[
|
|
44
|
+
__grouping_columns: PyLegendList[TdsColumn]
|
|
45
|
+
__selected_columns: PyLegendOptional[PyLegendList[TdsColumn]]
|
|
43
46
|
|
|
44
47
|
@classmethod
|
|
45
48
|
def name(cls) -> str:
|
|
@@ -101,39 +104,65 @@ class PandasApiGroupbyTdsFrame:
|
|
|
101
104
|
f"but got: {self.__dropna} (type: {type(self.__dropna).__name__})"
|
|
102
105
|
)
|
|
103
106
|
|
|
104
|
-
input_cols:
|
|
107
|
+
input_cols: PyLegendSet[str]
|
|
105
108
|
if isinstance(self.__by, str):
|
|
106
|
-
input_cols = [self.__by]
|
|
109
|
+
input_cols = set([self.__by])
|
|
107
110
|
elif isinstance(self.__by, list):
|
|
108
|
-
input_cols = self.__by
|
|
111
|
+
input_cols = set(self.__by)
|
|
109
112
|
else:
|
|
110
113
|
raise TypeError(
|
|
111
114
|
f"The 'by' parameter in groupby function must be a string or a list of strings."
|
|
112
115
|
f"but got: {self.__by} (type: {type(self.__by).__name__})"
|
|
113
|
-
)
|
|
116
|
+
) # pragma: no cover
|
|
117
|
+
group_by_names: PyLegendList[str]
|
|
118
|
+
if isinstance(self.__by, str):
|
|
119
|
+
group_by_names = [self.__by]
|
|
120
|
+
elif isinstance(self.__by, list):
|
|
121
|
+
group_by_names = self.__by
|
|
122
|
+
else:
|
|
123
|
+
raise TypeError(
|
|
124
|
+
f"The 'by' parameter in groupby function must be a string or a list of strings."
|
|
125
|
+
f"but got: {self.__by} (type: {type(self.__by).__name__})"
|
|
126
|
+
) # pragma: no cover
|
|
114
127
|
|
|
115
|
-
if len(
|
|
128
|
+
if len(group_by_names) == 0:
|
|
116
129
|
raise ValueError("The 'by' parameter in groupby function must contain at least one column name.")
|
|
117
130
|
|
|
118
|
-
|
|
119
|
-
|
|
131
|
+
base_col_map = {col.get_name(): col for col in self.__base_frame.columns()}
|
|
132
|
+
|
|
133
|
+
self.__grouping_columns = [
|
|
134
|
+
base_col_map[name]
|
|
135
|
+
for name in group_by_names
|
|
136
|
+
if name in base_col_map
|
|
137
|
+
]
|
|
120
138
|
|
|
121
|
-
if len(
|
|
139
|
+
if len(self.__grouping_columns) < len(input_cols):
|
|
140
|
+
available_columns = {c.get_name() for c in self.__base_frame.columns()}
|
|
141
|
+
missing_cols = [col for col in input_cols if col not in available_columns]
|
|
122
142
|
raise KeyError(
|
|
123
143
|
f"Column(s) {missing_cols} in groupby function's provided columns list "
|
|
124
144
|
f"do not exist in the current frame. "
|
|
125
145
|
f"Current frame columns: {sorted(available_columns)}"
|
|
126
146
|
)
|
|
127
147
|
|
|
128
|
-
|
|
148
|
+
@overload
|
|
149
|
+
def __getitem__(self, key: str) -> "GroupbySeries":
|
|
150
|
+
...
|
|
151
|
+
|
|
152
|
+
@overload
|
|
153
|
+
def __getitem__(self, key: PyLegendList[str]) -> "PandasApiGroupbyTdsFrame":
|
|
154
|
+
...
|
|
129
155
|
|
|
130
|
-
def __getitem__(
|
|
131
|
-
|
|
156
|
+
def __getitem__(
|
|
157
|
+
self,
|
|
158
|
+
item: PyLegendUnion[str, PyLegendList[str]]
|
|
159
|
+
) -> PyLegendUnion["PandasApiGroupbyTdsFrame", "GroupbySeries"]:
|
|
160
|
+
columns_to_select: PyLegendSet[str]
|
|
132
161
|
|
|
133
162
|
if isinstance(item, str):
|
|
134
|
-
columns_to_select = [item]
|
|
163
|
+
columns_to_select = set([item])
|
|
135
164
|
elif isinstance(item, list):
|
|
136
|
-
columns_to_select = item
|
|
165
|
+
columns_to_select = set(item)
|
|
137
166
|
else:
|
|
138
167
|
raise TypeError(
|
|
139
168
|
f"Column selection after groupby function must be a string or a list of strings, "
|
|
@@ -143,10 +172,12 @@ class PandasApiGroupbyTdsFrame:
|
|
|
143
172
|
if len(columns_to_select) == 0:
|
|
144
173
|
raise ValueError("When performing column selection after groupby, at least one column must be selected.")
|
|
145
174
|
|
|
146
|
-
|
|
147
|
-
|
|
175
|
+
selected_columns: PyLegendList[TdsColumn] = [
|
|
176
|
+
col for col in self.__base_frame.columns() if col.get_name() in columns_to_select]
|
|
148
177
|
|
|
149
|
-
if len(
|
|
178
|
+
if len(selected_columns) < len(columns_to_select):
|
|
179
|
+
available_columns = {c.get_name() for c in self.__base_frame.columns()}
|
|
180
|
+
missing_cols = [col for col in columns_to_select if col not in available_columns]
|
|
150
181
|
raise KeyError(
|
|
151
182
|
f"Column(s) {missing_cols} selected after groupby do not exist in the current frame. "
|
|
152
183
|
f"Current frame columns: {sorted(available_columns)}"
|
|
@@ -163,16 +194,47 @@ class PandasApiGroupbyTdsFrame:
|
|
|
163
194
|
dropna=self.__dropna,
|
|
164
195
|
)
|
|
165
196
|
|
|
166
|
-
new_frame.__selected_columns =
|
|
197
|
+
new_frame.__selected_columns = selected_columns
|
|
198
|
+
|
|
199
|
+
if selected_columns is not None and isinstance(item, str):
|
|
200
|
+
column: TdsColumn = selected_columns[0]
|
|
201
|
+
col_type = column.get_type()
|
|
202
|
+
if col_type == "Boolean": # pragma: no cover (Boolean column not supported in PURE)
|
|
203
|
+
from pylegend.core.language.pandas_api.pandas_api_groupby_series import BooleanGroupbySeries
|
|
204
|
+
return BooleanGroupbySeries(new_frame)
|
|
205
|
+
elif col_type == "String":
|
|
206
|
+
from pylegend.core.language.pandas_api.pandas_api_groupby_series import StringGroupbySeries
|
|
207
|
+
return StringGroupbySeries(new_frame)
|
|
208
|
+
elif col_type == "Number":
|
|
209
|
+
from pylegend.core.language.pandas_api.pandas_api_groupby_series import NumberGroupbySeries
|
|
210
|
+
return NumberGroupbySeries(new_frame)
|
|
211
|
+
elif col_type == "Integer":
|
|
212
|
+
from pylegend.core.language.pandas_api.pandas_api_groupby_series import IntegerGroupbySeries
|
|
213
|
+
return IntegerGroupbySeries(new_frame)
|
|
214
|
+
elif col_type == "Float":
|
|
215
|
+
from pylegend.core.language.pandas_api.pandas_api_groupby_series import FloatGroupbySeries
|
|
216
|
+
return FloatGroupbySeries(new_frame)
|
|
217
|
+
elif col_type == "Date":
|
|
218
|
+
from pylegend.core.language.pandas_api.pandas_api_groupby_series import DateGroupbySeries
|
|
219
|
+
return DateGroupbySeries(new_frame)
|
|
220
|
+
elif col_type == "DateTime":
|
|
221
|
+
from pylegend.core.language.pandas_api.pandas_api_groupby_series import DateTimeGroupbySeries
|
|
222
|
+
return DateTimeGroupbySeries(new_frame)
|
|
223
|
+
elif col_type == "StrictDate":
|
|
224
|
+
from pylegend.core.language.pandas_api.pandas_api_groupby_series import StrictDateGroupbySeries
|
|
225
|
+
return StrictDateGroupbySeries(new_frame)
|
|
226
|
+
else:
|
|
227
|
+
raise ValueError(f"Unsupported column type '{col_type}' for column '{column.get_name()}'") # pragma: no cover
|
|
228
|
+
|
|
167
229
|
return new_frame
|
|
168
230
|
|
|
169
231
|
def base_frame(self) -> PandasApiBaseTdsFrame:
|
|
170
232
|
return self.__base_frame
|
|
171
233
|
|
|
172
|
-
def
|
|
173
|
-
return self.
|
|
234
|
+
def get_grouping_columns(self) -> PyLegendList[TdsColumn]:
|
|
235
|
+
return self.__grouping_columns.copy()
|
|
174
236
|
|
|
175
|
-
def
|
|
237
|
+
def get_selected_columns(self) -> PyLegendOptional[PyLegendList[TdsColumn]]:
|
|
176
238
|
if self.__selected_columns is None:
|
|
177
239
|
return None
|
|
178
240
|
return self.__selected_columns.copy()
|
|
@@ -197,7 +259,7 @@ class PandasApiGroupbyTdsFrame:
|
|
|
197
259
|
aggregated_result = PandasApiAppliedFunctionTdsFrame(
|
|
198
260
|
SortValuesFunction(
|
|
199
261
|
base_frame=aggregated_result,
|
|
200
|
-
by=self.
|
|
262
|
+
by=[col.get_name() for col in self.get_grouping_columns()],
|
|
201
263
|
axis=0,
|
|
202
264
|
ascending=True,
|
|
203
265
|
inplace=False,
|
|
@@ -217,10 +279,8 @@ class PandasApiGroupbyTdsFrame:
|
|
|
217
279
|
*args: PyLegendPrimitiveOrPythonPrimitive,
|
|
218
280
|
**kwargs: PyLegendPrimitiveOrPythonPrimitive,
|
|
219
281
|
) -> "PandasApiTdsFrame":
|
|
220
|
-
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import PandasApiAppliedFunctionTdsFrame
|
|
221
|
-
from pylegend.core.tds.pandas_api.frames.functions.aggregate_function import AggregateFunction
|
|
222
282
|
|
|
223
|
-
return
|
|
283
|
+
return self.aggregate(func, axis, *args, **kwargs)
|
|
224
284
|
|
|
225
285
|
def sum(
|
|
226
286
|
self,
|
|
@@ -16,7 +16,8 @@ pylegend/core/language/legendql_api/legendql_api_tds_row.py,sha256=5hejBF2uYjXua
|
|
|
16
16
|
pylegend/core/language/pandas_api/__init__.py,sha256=g6w4WCuQ2pqQG6yyn-QLLXED3ttOOB8YnXzVt3ijb28,578
|
|
17
17
|
pylegend/core/language/pandas_api/pandas_api_aggregate_specification.py,sha256=T_RaPB9y_7kcnMC_CIEDJhxORID1plR99IGGPZvDnBk,1474
|
|
18
18
|
pylegend/core/language/pandas_api/pandas_api_custom_expressions.py,sha256=xOXp0NdyNW6j_R_1sCy1JcdN9EWIqYRXngocsINpllE,2590
|
|
19
|
-
pylegend/core/language/pandas_api/
|
|
19
|
+
pylegend/core/language/pandas_api/pandas_api_groupby_series.py,sha256=MU6o_e2NIwo2OZHcXS8zuxZa14QG5SygXjnhHRy8H-4,16200
|
|
20
|
+
pylegend/core/language/pandas_api/pandas_api_series.py,sha256=Jock_ZYikBBCr-bVHpYEcoKV7bg0hdzQV4KJU99Z0O4,16452
|
|
20
21
|
pylegend/core/language/pandas_api/pandas_api_tds_row.py,sha256=L0O5BLok3KqmzUgXFfM2fQgrpAxCfQ74bOplnetjyvw,2516
|
|
21
22
|
pylegend/core/language/shared/__init__.py,sha256=g6w4WCuQ2pqQG6yyn-QLLXED3ttOOB8YnXzVt3ijb28,578
|
|
22
23
|
pylegend/core/language/shared/column_expressions.py,sha256=qWHVvwPGwKroQX94a_ovUrxCPnosVMX3tBWlTj7uJ6k,4333
|
|
@@ -112,7 +113,7 @@ pylegend/core/tds/legendql_api/frames/legendql_api_tds_frame.py,sha256=BFDdgeZ66
|
|
|
112
113
|
pylegend/core/tds/pandas_api/__init__.py,sha256=LXTDJSDmHQXtnMDZouhZp9IZQVpY6ONkINbUYjtnMkE,578
|
|
113
114
|
pylegend/core/tds/pandas_api/frames/__init__.py,sha256=LXTDJSDmHQXtnMDZouhZp9IZQVpY6ONkINbUYjtnMkE,578
|
|
114
115
|
pylegend/core/tds/pandas_api/frames/functions/__init__.py,sha256=LXTDJSDmHQXtnMDZouhZp9IZQVpY6ONkINbUYjtnMkE,578
|
|
115
|
-
pylegend/core/tds/pandas_api/frames/functions/aggregate_function.py,sha256=
|
|
116
|
+
pylegend/core/tds/pandas_api/frames/functions/aggregate_function.py,sha256=L-re1fgm1VPgy2VeY0BoXIDEhpfciWV-pgDK164HVFI,21646
|
|
116
117
|
pylegend/core/tds/pandas_api/frames/functions/assign_function.py,sha256=uhCv1sDnVBZmT3H5Alu2NUvek8aKqWz5HIG2Vtut-hs,6931
|
|
117
118
|
pylegend/core/tds/pandas_api/frames/functions/drop.py,sha256=tJMeL9Or43QDng9SsxYvW_yus3Hjp03PSlX7P857d7s,7269
|
|
118
119
|
pylegend/core/tds/pandas_api/frames/functions/dropna.py,sha256=OVVwUsPSAykm1g-afVvwxQCr_AaoQtH0cFRVffl9eHs,6161
|
|
@@ -125,9 +126,9 @@ pylegend/core/tds/pandas_api/frames/functions/merge.py,sha256=bp9a9reNtUAKqaz1Kf
|
|
|
125
126
|
pylegend/core/tds/pandas_api/frames/functions/rename.py,sha256=afXj8EhsTVUNJAZDFAM_K3VOX5oH3TA2FxSDoZHfT6M,8898
|
|
126
127
|
pylegend/core/tds/pandas_api/frames/functions/sort_values_function.py,sha256=sppDTCW3X0RXLYD2zBvjKEObtc_JfEtoNY7lj-60zqQ,7132
|
|
127
128
|
pylegend/core/tds/pandas_api/frames/functions/truncate_function.py,sha256=VUr9jzVhnU_mJVootUQfcEG8Q66vSJba1QGUGQiYxCk,6214
|
|
128
|
-
pylegend/core/tds/pandas_api/frames/pandas_api_applied_function_tds_frame.py,sha256=
|
|
129
|
-
pylegend/core/tds/pandas_api/frames/pandas_api_base_tds_frame.py,sha256=
|
|
130
|
-
pylegend/core/tds/pandas_api/frames/pandas_api_groupby_tds_frame.py,sha256=
|
|
129
|
+
pylegend/core/tds/pandas_api/frames/pandas_api_applied_function_tds_frame.py,sha256=3BcR2OH5uGvKIep6P1hrgK8QoT5RCzZeh9KfBx1TmgU,3071
|
|
130
|
+
pylegend/core/tds/pandas_api/frames/pandas_api_base_tds_frame.py,sha256=eK-goe9LdQJQLwdlfAllQ0kE8OCUECO9qRzyYnPQNZw,39017
|
|
131
|
+
pylegend/core/tds/pandas_api/frames/pandas_api_groupby_tds_frame.py,sha256=YEtrE07ynHzGBJpSa7FhCQ2834NrOjPkECowfCPFJrQ,16820
|
|
131
132
|
pylegend/core/tds/pandas_api/frames/pandas_api_input_tds_frame.py,sha256=FgwIJCkawXuIjXYfVVrLa5RHfOO5xnSFI0pXti34L_8,2116
|
|
132
133
|
pylegend/core/tds/pandas_api/frames/pandas_api_tds_frame.py,sha256=vPPtO_zvElaZfa4kgUEOIMnBMe133TNqTGE2DDDyC3M,12233
|
|
133
134
|
pylegend/core/tds/result_handler/__init__.py,sha256=8RE84xfkARwDbaQCvZulXcvDJlI-V5DuJp9RsdaGnqU,1141
|
|
@@ -171,9 +172,9 @@ pylegend/legacy_api_tds_client.py,sha256=IXfo2pdBFV3M3S4RYKJcvudMc_OGdR0yvJhTV-o
|
|
|
171
172
|
pylegend/legendql_api_tds_client.py,sha256=oS6NET5pAA-hfVhVvwG6sRF7omyBs_gEYSAgA8Tky8U,2357
|
|
172
173
|
pylegend/utils/__init__.py,sha256=LXTDJSDmHQXtnMDZouhZp9IZQVpY6ONkINbUYjtnMkE,578
|
|
173
174
|
pylegend/utils/class_utils.py,sha256=t4PpF3jAXS_D6p9TqlSppryNYNOuy5C-kbKn2Kgb4QU,973
|
|
174
|
-
pylegend-0.
|
|
175
|
-
pylegend-0.
|
|
176
|
-
pylegend-0.
|
|
177
|
-
pylegend-0.
|
|
178
|
-
pylegend-0.
|
|
179
|
-
pylegend-0.
|
|
175
|
+
pylegend-0.14.0.dist-info/METADATA,sha256=9onT96z16Z8t06EPg1At-Zqd31fTSiuFUEt9GVgEQkc,4281
|
|
176
|
+
pylegend-0.14.0.dist-info/WHEEL,sha256=kJCRJT_g0adfAJzTx2GUMmS80rTJIVHRCfG0DQgLq3o,88
|
|
177
|
+
pylegend-0.14.0.dist-info/licenses/LICENSE,sha256=AGR96_qQPZO66Gjqq4G6r_g670K35VtW-IobTAkmZJM,11343
|
|
178
|
+
pylegend-0.14.0.dist-info/licenses/LICENSE.spdx,sha256=i7TsBclLotUvMjx9vZ_6S8Pp0r4uknWGw1RwiKBBvQ4,207
|
|
179
|
+
pylegend-0.14.0.dist-info/licenses/NOTICE,sha256=2Lr4FqiscyRI7-vyn7c2z-zqUw2p6x7upJyBvFKkHjk,167
|
|
180
|
+
pylegend-0.14.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|