pylegend 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylegend/core/database/sql_to_string/db_extension.py +68 -6
- pylegend/core/language/legendql_api/legendql_api_custom_expressions.py +190 -5
- pylegend/core/language/pandas_api/pandas_api_series.py +3 -0
- pylegend/core/sql/metamodel.py +4 -1
- pylegend/core/tds/legendql_api/frames/functions/legendql_api_distinct_function.py +53 -7
- pylegend/core/tds/legendql_api/frames/legendql_api_base_tds_frame.py +146 -4
- pylegend/core/tds/legendql_api/frames/legendql_api_tds_frame.py +33 -2
- pylegend/core/tds/pandas_api/frames/functions/aggregate_function.py +221 -96
- pylegend/core/tds/pandas_api/frames/functions/assign_function.py +65 -23
- pylegend/core/tds/pandas_api/frames/functions/drop.py +3 -3
- pylegend/core/tds/pandas_api/frames/functions/dropna.py +167 -0
- pylegend/core/tds/pandas_api/frames/functions/fillna.py +162 -0
- pylegend/core/tds/pandas_api/frames/functions/filter.py +10 -5
- pylegend/core/tds/pandas_api/frames/functions/merge.py +513 -0
- pylegend/core/tds/pandas_api/frames/functions/rename.py +214 -0
- pylegend/core/tds/pandas_api/frames/functions/truncate_function.py +151 -120
- pylegend/core/tds/pandas_api/frames/pandas_api_applied_function_tds_frame.py +7 -3
- pylegend/core/tds/pandas_api/frames/pandas_api_base_tds_frame.py +559 -18
- pylegend/core/tds/pandas_api/frames/pandas_api_groupby_tds_frame.py +325 -0
- pylegend/core/tds/pandas_api/frames/pandas_api_tds_frame.py +218 -12
- pylegend/extensions/tds/abstract/csv_tds_frame.py +95 -0
- pylegend/extensions/tds/legendql_api/frames/legendql_api_csv_input_frame.py +36 -0
- pylegend/extensions/tds/pandas_api/frames/pandas_api_legend_function_input_frame.py +9 -4
- pylegend/extensions/tds/pandas_api/frames/pandas_api_legend_service_input_frame.py +12 -5
- pylegend/extensions/tds/pandas_api/frames/pandas_api_table_spec_input_frame.py +12 -4
- {pylegend-0.10.0.dist-info → pylegend-0.12.0.dist-info}/METADATA +1 -1
- {pylegend-0.10.0.dist-info → pylegend-0.12.0.dist-info}/RECORD +31 -24
- {pylegend-0.10.0.dist-info → pylegend-0.12.0.dist-info}/WHEEL +0 -0
- {pylegend-0.10.0.dist-info → pylegend-0.12.0.dist-info}/licenses/LICENSE +0 -0
- {pylegend-0.10.0.dist-info → pylegend-0.12.0.dist-info}/licenses/LICENSE.spdx +0 -0
- {pylegend-0.10.0.dist-info → pylegend-0.12.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -12,26 +12,41 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import copy
|
|
15
16
|
from abc import ABCMeta, abstractmethod
|
|
16
17
|
from datetime import date, datetime
|
|
17
18
|
from typing import TYPE_CHECKING
|
|
18
19
|
|
|
20
|
+
from typing_extensions import Concatenate
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
from typing import ParamSpec
|
|
24
|
+
except Exception:
|
|
25
|
+
from typing_extensions import ParamSpec # type: ignore
|
|
26
|
+
|
|
19
27
|
import pandas as pd
|
|
20
28
|
|
|
21
29
|
from pylegend._typing import (
|
|
22
30
|
PyLegendSequence,
|
|
23
31
|
PyLegendTypeVar,
|
|
32
|
+
PyLegendType,
|
|
24
33
|
PyLegendList,
|
|
34
|
+
PyLegendTuple,
|
|
25
35
|
PyLegendSet,
|
|
26
36
|
PyLegendOptional,
|
|
27
37
|
PyLegendCallable,
|
|
28
38
|
PyLegendUnion,
|
|
39
|
+
PyLegendDict
|
|
29
40
|
)
|
|
30
41
|
from pylegend.core.database.sql_to_string import (
|
|
31
42
|
SqlToStringConfig,
|
|
32
43
|
SqlToStringFormat
|
|
33
44
|
)
|
|
34
|
-
from pylegend.core.language import
|
|
45
|
+
from pylegend.core.language import (
|
|
46
|
+
PyLegendPrimitive,
|
|
47
|
+
PyLegendInteger,
|
|
48
|
+
PyLegendBoolean,
|
|
49
|
+
)
|
|
35
50
|
from pylegend.core.language.pandas_api.pandas_api_aggregate_specification import PyLegendAggInput
|
|
36
51
|
from pylegend.core.language.pandas_api.pandas_api_tds_row import PandasApiTdsRow
|
|
37
52
|
from pylegend.core.language.shared.primitives.primitive import PyLegendPrimitiveOrPythonPrimitive
|
|
@@ -45,7 +60,7 @@ from pylegend.core.tds.result_handler import (
|
|
|
45
60
|
)
|
|
46
61
|
from pylegend.core.tds.tds_column import TdsColumn
|
|
47
62
|
from pylegend.core.tds.tds_frame import FrameToPureConfig
|
|
48
|
-
from pylegend.core.tds.tds_frame import FrameToSqlConfig
|
|
63
|
+
from pylegend.core.tds.tds_frame import FrameToSqlConfig, PyLegendTdsFrame
|
|
49
64
|
from pylegend.extensions.tds.result_handler import (
|
|
50
65
|
ToPandasDfResultHandler,
|
|
51
66
|
PandasDfReadConfig,
|
|
@@ -53,12 +68,14 @@ from pylegend.extensions.tds.result_handler import (
|
|
|
53
68
|
|
|
54
69
|
if TYPE_CHECKING:
|
|
55
70
|
from pylegend.core.language.pandas_api.pandas_api_series import Series
|
|
71
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_groupby_tds_frame import PandasApiGroupbyTdsFrame
|
|
56
72
|
|
|
57
73
|
__all__: PyLegendSequence[str] = [
|
|
58
74
|
"PandasApiBaseTdsFrame"
|
|
59
75
|
]
|
|
60
76
|
|
|
61
77
|
R = PyLegendTypeVar('R')
|
|
78
|
+
P = ParamSpec("P")
|
|
62
79
|
|
|
63
80
|
|
|
64
81
|
class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
@@ -70,9 +87,12 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
70
87
|
cols = "[" + ", ".join([str(c) for c in columns]) + "]"
|
|
71
88
|
raise ValueError(f"TdsFrame cannot have duplicated column names. Passed columns: {cols}")
|
|
72
89
|
self.__columns = [c.copy() for c in columns]
|
|
90
|
+
self._transformed_frame = None
|
|
73
91
|
|
|
74
92
|
def columns(self) -> PyLegendSequence[TdsColumn]:
|
|
75
|
-
|
|
93
|
+
if self._transformed_frame is None:
|
|
94
|
+
return [c.copy() for c in self.__columns]
|
|
95
|
+
return self._transformed_frame.columns()
|
|
76
96
|
|
|
77
97
|
def __getitem__(
|
|
78
98
|
self,
|
|
@@ -94,7 +114,8 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
94
114
|
if col.get_name() == key:
|
|
95
115
|
col_type = col.get_type()
|
|
96
116
|
if col_type == "Boolean":
|
|
97
|
-
from pylegend.core.language.pandas_api.pandas_api_series import
|
|
117
|
+
from pylegend.core.language.pandas_api.pandas_api_series import \
|
|
118
|
+
BooleanSeries # pragma: no cover
|
|
98
119
|
return BooleanSeries(self, key) # pragma: no cover (Boolean column not supported in PURE)
|
|
99
120
|
elif col_type == "String":
|
|
100
121
|
from pylegend.core.language.pandas_api.pandas_api_series import StringSeries
|
|
@@ -127,6 +148,41 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
127
148
|
else:
|
|
128
149
|
raise TypeError(f"Invalid key type: {type(key)}. Expected str, list, or boolean expression")
|
|
129
150
|
|
|
151
|
+
def __setitem__(self, key: str, value: PyLegendUnion["Series", PyLegendPrimitiveOrPythonPrimitive]) -> None:
|
|
152
|
+
"""
|
|
153
|
+
Pandas-like column assignment with replace semantics:
|
|
154
|
+
- If column exists, drop it first.
|
|
155
|
+
- Then assign the new value (Series or constant).
|
|
156
|
+
"""
|
|
157
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import (
|
|
158
|
+
PandasApiAppliedFunctionTdsFrame
|
|
159
|
+
)
|
|
160
|
+
from pylegend.core.tds.pandas_api.frames.functions.assign_function import AssignFunction
|
|
161
|
+
from pylegend.core.language.pandas_api.pandas_api_series import Series
|
|
162
|
+
|
|
163
|
+
# Type Check
|
|
164
|
+
if not isinstance(key, str):
|
|
165
|
+
raise TypeError(f"Column name must be a string, got: {type(key)}")
|
|
166
|
+
|
|
167
|
+
# Reject cross-frame assignment
|
|
168
|
+
if isinstance(value, Series):
|
|
169
|
+
origin = value.get_base_frame()
|
|
170
|
+
if origin is not None and origin is not self:
|
|
171
|
+
raise ValueError("Assignment from a different frame is not allowed")
|
|
172
|
+
|
|
173
|
+
# Normalize the assignment value
|
|
174
|
+
col_def = {}
|
|
175
|
+
if callable(value):
|
|
176
|
+
col_def[key] = value
|
|
177
|
+
else:
|
|
178
|
+
col_def[key] = lambda row: value
|
|
179
|
+
|
|
180
|
+
working_frame = copy.deepcopy(self)
|
|
181
|
+
assign_applied = PandasApiAppliedFunctionTdsFrame(AssignFunction(working_frame, col_definitions=col_def))
|
|
182
|
+
|
|
183
|
+
self._transformed_frame = assign_applied # type: ignore
|
|
184
|
+
self.__columns = assign_applied.columns()
|
|
185
|
+
|
|
130
186
|
def assign(
|
|
131
187
|
self,
|
|
132
188
|
**kwargs: PyLegendCallable[
|
|
@@ -214,7 +270,7 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
214
270
|
index: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str], PyLegendSet[str]]] = None,
|
|
215
271
|
columns: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str], PyLegendSet[str]]] = None,
|
|
216
272
|
level: PyLegendOptional[PyLegendUnion[int, PyLegendInteger, str]] = None,
|
|
217
|
-
inplace: PyLegendUnion[bool, PyLegendBoolean] =
|
|
273
|
+
inplace: PyLegendUnion[bool, PyLegendBoolean] = False,
|
|
218
274
|
errors: str = "raise",
|
|
219
275
|
) -> "PandasApiTdsFrame":
|
|
220
276
|
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import \
|
|
@@ -235,11 +291,11 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
235
291
|
)
|
|
236
292
|
|
|
237
293
|
def aggregate(
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
294
|
+
self,
|
|
295
|
+
func: PyLegendAggInput,
|
|
296
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
297
|
+
*args: PyLegendPrimitiveOrPythonPrimitive,
|
|
298
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
243
299
|
) -> "PandasApiTdsFrame":
|
|
244
300
|
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import (
|
|
245
301
|
PandasApiAppliedFunctionTdsFrame
|
|
@@ -254,11 +310,11 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
254
310
|
))
|
|
255
311
|
|
|
256
312
|
def agg(
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
313
|
+
self,
|
|
314
|
+
func: PyLegendAggInput,
|
|
315
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
316
|
+
*args: PyLegendPrimitiveOrPythonPrimitive,
|
|
317
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
262
318
|
) -> "PandasApiTdsFrame":
|
|
263
319
|
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import (
|
|
264
320
|
PandasApiAppliedFunctionTdsFrame
|
|
@@ -272,13 +328,498 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
272
328
|
**kwargs
|
|
273
329
|
))
|
|
274
330
|
|
|
331
|
+
def sum(
|
|
332
|
+
self,
|
|
333
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
334
|
+
skipna: bool = True,
|
|
335
|
+
numeric_only: bool = False,
|
|
336
|
+
min_count: int = 0,
|
|
337
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
338
|
+
) -> "PandasApiTdsFrame":
|
|
339
|
+
if axis not in [0, "index"]:
|
|
340
|
+
raise NotImplementedError(f"The 'axis' parameter must be 0 or 'index' in sum function, but got: {axis}")
|
|
341
|
+
if skipna is not True:
|
|
342
|
+
raise NotImplementedError("skipna=False is not currently supported in sum function. "
|
|
343
|
+
"SQL aggregation ignores nulls by default.")
|
|
344
|
+
if numeric_only is not False:
|
|
345
|
+
raise NotImplementedError("numeric_only=True is not currently supported in sum function.")
|
|
346
|
+
if min_count != 0:
|
|
347
|
+
raise NotImplementedError(f"min_count must be 0 in sum function, but got: {min_count}")
|
|
348
|
+
if len(kwargs) > 0:
|
|
349
|
+
raise NotImplementedError(
|
|
350
|
+
f"Additional keyword arguments not supported in sum function: {list(kwargs.keys())}")
|
|
351
|
+
return self.aggregate("sum", 0)
|
|
352
|
+
|
|
353
|
+
def mean(
|
|
354
|
+
self,
|
|
355
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
356
|
+
skipna: bool = True,
|
|
357
|
+
numeric_only: bool = False,
|
|
358
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
359
|
+
) -> "PandasApiTdsFrame":
|
|
360
|
+
if axis not in [0, "index"]:
|
|
361
|
+
raise NotImplementedError(f"The 'axis' parameter must be 0 or 'index' in mean function, but got: {axis}")
|
|
362
|
+
if skipna is not True:
|
|
363
|
+
raise NotImplementedError("skipna=False is not currently supported in mean function.")
|
|
364
|
+
if numeric_only is not False:
|
|
365
|
+
raise NotImplementedError("numeric_only=True is not currently supported in mean function.")
|
|
366
|
+
if len(kwargs) > 0:
|
|
367
|
+
raise NotImplementedError(
|
|
368
|
+
f"Additional keyword arguments not supported in mean function: {list(kwargs.keys())}")
|
|
369
|
+
return self.aggregate("mean", 0)
|
|
370
|
+
|
|
371
|
+
def min(
|
|
372
|
+
self,
|
|
373
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
374
|
+
skipna: bool = True,
|
|
375
|
+
numeric_only: bool = False,
|
|
376
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
377
|
+
) -> "PandasApiTdsFrame":
|
|
378
|
+
if axis not in [0, "index"]:
|
|
379
|
+
raise NotImplementedError(f"The 'axis' parameter must be 0 or 'index' in min function, but got: {axis}")
|
|
380
|
+
if skipna is not True:
|
|
381
|
+
raise NotImplementedError("skipna=False is not currently supported in min function.")
|
|
382
|
+
if numeric_only is not False:
|
|
383
|
+
raise NotImplementedError("numeric_only=True is not currently supported in min function.")
|
|
384
|
+
if len(kwargs) > 0:
|
|
385
|
+
raise NotImplementedError(
|
|
386
|
+
f"Additional keyword arguments not supported in min function: {list(kwargs.keys())}")
|
|
387
|
+
return self.aggregate("min", 0)
|
|
388
|
+
|
|
389
|
+
def max(
|
|
390
|
+
self,
|
|
391
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
392
|
+
skipna: bool = True,
|
|
393
|
+
numeric_only: bool = False,
|
|
394
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
395
|
+
) -> "PandasApiTdsFrame":
|
|
396
|
+
if axis not in [0, "index"]:
|
|
397
|
+
raise NotImplementedError(f"The 'axis' parameter must be 0 or 'index' in max function, but got: {axis}")
|
|
398
|
+
if skipna is not True:
|
|
399
|
+
raise NotImplementedError("skipna=False is not currently supported in max function.")
|
|
400
|
+
if numeric_only is not False:
|
|
401
|
+
raise NotImplementedError("numeric_only=True is not currently supported in max function.")
|
|
402
|
+
if len(kwargs) > 0:
|
|
403
|
+
raise NotImplementedError(
|
|
404
|
+
f"Additional keyword arguments not supported in max function: {list(kwargs.keys())}")
|
|
405
|
+
return self.aggregate("max", 0)
|
|
406
|
+
|
|
407
|
+
def std(
|
|
408
|
+
self,
|
|
409
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
410
|
+
skipna: bool = True,
|
|
411
|
+
ddof: int = 1,
|
|
412
|
+
numeric_only: bool = False,
|
|
413
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
414
|
+
) -> "PandasApiTdsFrame":
|
|
415
|
+
if axis not in [0, "index"]:
|
|
416
|
+
raise NotImplementedError(f"The 'axis' parameter must be 0 or 'index' in std function, but got: {axis}")
|
|
417
|
+
if skipna is not True:
|
|
418
|
+
raise NotImplementedError("skipna=False is not currently supported in std function.")
|
|
419
|
+
if ddof != 1:
|
|
420
|
+
raise NotImplementedError(
|
|
421
|
+
f"Only ddof=1 (Sample Standard Deviation) is supported in std function, but got: {ddof}")
|
|
422
|
+
if numeric_only is not False:
|
|
423
|
+
raise NotImplementedError("numeric_only=True is not currently supported in std function.")
|
|
424
|
+
if len(kwargs) > 0:
|
|
425
|
+
raise NotImplementedError(
|
|
426
|
+
f"Additional keyword arguments not supported in std function: {list(kwargs.keys())}")
|
|
427
|
+
return self.aggregate("std", 0)
|
|
428
|
+
|
|
429
|
+
def var(
|
|
430
|
+
self,
|
|
431
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
432
|
+
skipna: bool = True,
|
|
433
|
+
ddof: int = 1,
|
|
434
|
+
numeric_only: bool = False,
|
|
435
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
436
|
+
) -> "PandasApiTdsFrame":
|
|
437
|
+
if axis not in [0, "index"]:
|
|
438
|
+
raise NotImplementedError(f"The 'axis' parameter must be 0 or 'index' in var function, but got: {axis}")
|
|
439
|
+
if skipna is not True:
|
|
440
|
+
raise NotImplementedError("skipna=False is not currently supported in var function.")
|
|
441
|
+
if ddof != 1:
|
|
442
|
+
raise NotImplementedError(f"Only ddof=1 (Sample Variance) is supported in var function, but got: {ddof}")
|
|
443
|
+
if numeric_only is not False:
|
|
444
|
+
raise NotImplementedError("numeric_only=True is not currently supported in var function.")
|
|
445
|
+
if len(kwargs) > 0:
|
|
446
|
+
raise NotImplementedError(
|
|
447
|
+
f"Additional keyword arguments not supported in var function: {list(kwargs.keys())}")
|
|
448
|
+
return self.aggregate("var", 0)
|
|
449
|
+
|
|
450
|
+
def count(
|
|
451
|
+
self,
|
|
452
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
453
|
+
numeric_only: bool = False,
|
|
454
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
455
|
+
) -> "PandasApiTdsFrame":
|
|
456
|
+
if axis not in [0, "index"]:
|
|
457
|
+
raise NotImplementedError(f"The 'axis' parameter must be 0 or 'index' in count function, but got: {axis}")
|
|
458
|
+
if numeric_only is not False:
|
|
459
|
+
raise NotImplementedError("numeric_only=True is not currently supported in count function.")
|
|
460
|
+
if len(kwargs) > 0:
|
|
461
|
+
raise NotImplementedError(
|
|
462
|
+
f"Additional keyword arguments not supported in count function: {list(kwargs.keys())}")
|
|
463
|
+
return self.aggregate("count", 0)
|
|
464
|
+
|
|
465
|
+
def groupby(
|
|
466
|
+
self,
|
|
467
|
+
by: PyLegendUnion[str, PyLegendList[str]],
|
|
468
|
+
level: PyLegendOptional[PyLegendUnion[str, int, PyLegendList[str]]] = None,
|
|
469
|
+
as_index: bool = False,
|
|
470
|
+
sort: bool = True,
|
|
471
|
+
group_keys: bool = False,
|
|
472
|
+
observed: bool = False,
|
|
473
|
+
dropna: bool = False,
|
|
474
|
+
) -> "PandasApiGroupbyTdsFrame":
|
|
475
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_groupby_tds_frame import (
|
|
476
|
+
PandasApiGroupbyTdsFrame
|
|
477
|
+
)
|
|
478
|
+
return PandasApiGroupbyTdsFrame(
|
|
479
|
+
base_frame=self,
|
|
480
|
+
by=by,
|
|
481
|
+
level=level,
|
|
482
|
+
as_index=as_index,
|
|
483
|
+
sort=sort,
|
|
484
|
+
group_keys=group_keys,
|
|
485
|
+
observed=observed,
|
|
486
|
+
dropna=dropna
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
def merge(
|
|
490
|
+
self,
|
|
491
|
+
other: "PandasApiTdsFrame",
|
|
492
|
+
how: PyLegendOptional[str] = "inner",
|
|
493
|
+
on: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
|
|
494
|
+
left_on: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
|
|
495
|
+
right_on: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
|
|
496
|
+
left_index: PyLegendOptional[bool] = False,
|
|
497
|
+
right_index: PyLegendOptional[bool] = False,
|
|
498
|
+
sort: PyLegendOptional[bool] = False,
|
|
499
|
+
suffixes: PyLegendOptional[
|
|
500
|
+
PyLegendUnion[
|
|
501
|
+
PyLegendTuple[PyLegendUnion[str, None], PyLegendUnion[str, None]],
|
|
502
|
+
PyLegendList[PyLegendUnion[str, None]],
|
|
503
|
+
]
|
|
504
|
+
] = ("_x", "_y"),
|
|
505
|
+
indicator: PyLegendOptional[PyLegendUnion[bool, str]] = False,
|
|
506
|
+
validate: PyLegendOptional[str] = None
|
|
507
|
+
) -> "PandasApiTdsFrame":
|
|
508
|
+
"""
|
|
509
|
+
Pandas-like merge:
|
|
510
|
+
- Mutually exclusive: `on` vs (`left_on`, `right_on`)
|
|
511
|
+
- If no keys provided, infer intersection of column names
|
|
512
|
+
- `how`: inner | left | right | outer (outer mapped to full)
|
|
513
|
+
- `suffixes`: applied to overlapping non-key columns
|
|
514
|
+
"""
|
|
515
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import (
|
|
516
|
+
PandasApiAppliedFunctionTdsFrame
|
|
517
|
+
)
|
|
518
|
+
from pylegend.core.tds.pandas_api.frames.functions.merge import (
|
|
519
|
+
PandasApiMergeFunction
|
|
520
|
+
)
|
|
521
|
+
merge_fn = PandasApiMergeFunction(
|
|
522
|
+
self,
|
|
523
|
+
other, # type: ignore
|
|
524
|
+
how=how,
|
|
525
|
+
on=on,
|
|
526
|
+
left_on=left_on,
|
|
527
|
+
right_on=right_on,
|
|
528
|
+
left_index=left_index,
|
|
529
|
+
right_index=right_index,
|
|
530
|
+
sort=sort,
|
|
531
|
+
suffixes=suffixes,
|
|
532
|
+
indicator=indicator,
|
|
533
|
+
validate=validate
|
|
534
|
+
)
|
|
535
|
+
merged = PandasApiAppliedFunctionTdsFrame(merge_fn)
|
|
536
|
+
|
|
537
|
+
if sort:
|
|
538
|
+
return merged.sort_values(
|
|
539
|
+
by=merge_fn.get_sort_keys(),
|
|
540
|
+
axis=0,
|
|
541
|
+
ascending=True,
|
|
542
|
+
inplace=False,
|
|
543
|
+
kind=None,
|
|
544
|
+
na_position="last",
|
|
545
|
+
ignore_index=True,
|
|
546
|
+
key=None
|
|
547
|
+
)
|
|
548
|
+
else:
|
|
549
|
+
return merged
|
|
550
|
+
|
|
551
|
+
def join(
|
|
552
|
+
self,
|
|
553
|
+
other: "PandasApiTdsFrame",
|
|
554
|
+
on: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
|
|
555
|
+
how: PyLegendOptional[str] = "left",
|
|
556
|
+
lsuffix: str = "",
|
|
557
|
+
rsuffix: str = "",
|
|
558
|
+
sort: PyLegendOptional[bool] = False,
|
|
559
|
+
validate: PyLegendOptional[str] = None
|
|
560
|
+
) -> "PandasApiTdsFrame":
|
|
561
|
+
"""
|
|
562
|
+
Pandas-like join delegating to merge. No index support, only column-on-column via `on`.
|
|
563
|
+
"""
|
|
564
|
+
return self.merge(
|
|
565
|
+
other=other,
|
|
566
|
+
how=how,
|
|
567
|
+
on=on,
|
|
568
|
+
sort=sort,
|
|
569
|
+
suffixes=[lsuffix, rsuffix],
|
|
570
|
+
validate=validate
|
|
571
|
+
)
|
|
572
|
+
|
|
573
|
+
def rename(
|
|
574
|
+
self,
|
|
575
|
+
mapper: PyLegendOptional[PyLegendUnion[PyLegendDict[str, str], PyLegendCallable[[str], str]]] = None,
|
|
576
|
+
index: PyLegendOptional[PyLegendUnion[PyLegendDict[str, str], PyLegendCallable[[str], str]]] = None,
|
|
577
|
+
columns: PyLegendOptional[PyLegendUnion[PyLegendDict[str, str], PyLegendCallable[[str], str]]] = None,
|
|
578
|
+
axis: PyLegendUnion[str, int] = 1,
|
|
579
|
+
inplace: PyLegendUnion[bool] = False,
|
|
580
|
+
copy: PyLegendUnion[bool] = True,
|
|
581
|
+
level: PyLegendOptional[PyLegendUnion[int, str]] = None,
|
|
582
|
+
errors: str = "ignore",
|
|
583
|
+
) -> "PandasApiTdsFrame":
|
|
584
|
+
"""
|
|
585
|
+
Pandas-like rename:
|
|
586
|
+
- Supports mapping via `mapper` or explicit `index`/`columns`
|
|
587
|
+
- Only column renames are applied when `axis` is 1
|
|
588
|
+
- `errors`: ignore | raise
|
|
589
|
+
"""
|
|
590
|
+
|
|
591
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import (
|
|
592
|
+
PandasApiAppliedFunctionTdsFrame
|
|
593
|
+
)
|
|
594
|
+
from pylegend.core.tds.pandas_api.frames.functions.rename import (
|
|
595
|
+
PandasApiRenameFunction
|
|
596
|
+
)
|
|
597
|
+
return PandasApiAppliedFunctionTdsFrame(
|
|
598
|
+
PandasApiRenameFunction(
|
|
599
|
+
base_frame=self,
|
|
600
|
+
mapper=mapper,
|
|
601
|
+
axis=axis,
|
|
602
|
+
index=index,
|
|
603
|
+
columns=columns,
|
|
604
|
+
copy=copy,
|
|
605
|
+
inplace=inplace,
|
|
606
|
+
level=level,
|
|
607
|
+
errors=errors
|
|
608
|
+
)
|
|
609
|
+
)
|
|
610
|
+
|
|
611
|
+
def apply(
|
|
612
|
+
self,
|
|
613
|
+
func: PyLegendUnion[
|
|
614
|
+
PyLegendCallable[Concatenate["Series", P], PyLegendPrimitiveOrPythonPrimitive],
|
|
615
|
+
str
|
|
616
|
+
],
|
|
617
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
618
|
+
raw: bool = False,
|
|
619
|
+
result_type: PyLegendOptional[str] = None,
|
|
620
|
+
args: PyLegendTuple[PyLegendPrimitiveOrPythonPrimitive, ...] = (),
|
|
621
|
+
by_row: PyLegendUnion[bool, str] = "compat",
|
|
622
|
+
engine: str = "python",
|
|
623
|
+
engine_kwargs: PyLegendOptional[PyLegendDict[str, PyLegendPrimitiveOrPythonPrimitive]] = None,
|
|
624
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
625
|
+
) -> "PandasApiTdsFrame":
|
|
626
|
+
"""
|
|
627
|
+
Pandas-like apply (columns-only):
|
|
628
|
+
- Supports callable func applied to each column (axis=0 or 'index')
|
|
629
|
+
- Internally delegates to assign by constructing lambdas per column
|
|
630
|
+
- Unsupported params raise NotImplementedError
|
|
631
|
+
"""
|
|
632
|
+
|
|
633
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import (
|
|
634
|
+
PandasApiAppliedFunctionTdsFrame
|
|
635
|
+
)
|
|
636
|
+
from pylegend.core.tds.pandas_api.frames.functions.assign_function import AssignFunction
|
|
637
|
+
from pylegend.core.language.pandas_api.pandas_api_series import Series
|
|
638
|
+
|
|
639
|
+
# Validation
|
|
640
|
+
if axis not in (0, "index"):
|
|
641
|
+
raise ValueError("Only column-wise apply is supported. Use axis=0 or 'index'")
|
|
642
|
+
if raw:
|
|
643
|
+
raise NotImplementedError("raw=True is not supported. Use raw=False")
|
|
644
|
+
if result_type is not None:
|
|
645
|
+
raise NotImplementedError("result_type is not supported")
|
|
646
|
+
if by_row not in (False, "compat"):
|
|
647
|
+
raise NotImplementedError("by_row must be False or 'compat'")
|
|
648
|
+
if engine != "python":
|
|
649
|
+
raise NotImplementedError("Only engine='python' is supported")
|
|
650
|
+
if engine_kwargs is not None:
|
|
651
|
+
raise NotImplementedError("engine_kwargs are not supported")
|
|
652
|
+
if isinstance(func, str):
|
|
653
|
+
raise NotImplementedError("String-based apply is not supported")
|
|
654
|
+
if not callable(func):
|
|
655
|
+
raise TypeError("Function must be a callable")
|
|
656
|
+
|
|
657
|
+
# Build assign column definitions: apply func to each column Series
|
|
658
|
+
col_definitions = {}
|
|
659
|
+
for c in self.columns():
|
|
660
|
+
col_name = c.get_name()
|
|
661
|
+
series = self[col_name]
|
|
662
|
+
|
|
663
|
+
# Compute row callable via func on the Series
|
|
664
|
+
def _row_callable(
|
|
665
|
+
_row: PandasApiTdsRow,
|
|
666
|
+
_s: Series = series, # type: ignore
|
|
667
|
+
_a: PyLegendTuple[PyLegendPrimitiveOrPythonPrimitive, ...] = args,
|
|
668
|
+
_k: PyLegendPrimitiveOrPythonPrimitive = kwargs # type: ignore
|
|
669
|
+
) -> PyLegendPrimitiveOrPythonPrimitive:
|
|
670
|
+
return func(_s, *_a, **_k) # type: ignore
|
|
671
|
+
|
|
672
|
+
col_definitions[col_name] = _row_callable
|
|
673
|
+
|
|
674
|
+
return PandasApiAppliedFunctionTdsFrame(
|
|
675
|
+
AssignFunction(self, col_definitions=col_definitions) # type: ignore
|
|
676
|
+
)
|
|
677
|
+
|
|
678
|
+
def head(self, n: int = 5) -> "PandasApiTdsFrame":
|
|
679
|
+
"""
|
|
680
|
+
Return the first `n` rows by calling truncate on rows.
|
|
681
|
+
Negative `n` is not supported.
|
|
682
|
+
"""
|
|
683
|
+
if not isinstance(n, int):
|
|
684
|
+
raise TypeError(f"n must be an int, got {type(n)}")
|
|
685
|
+
if n < 0:
|
|
686
|
+
raise NotImplementedError("Negative n is not supported yet in Pandas API head")
|
|
687
|
+
|
|
688
|
+
return self.truncate(before=None, after=max(n - 1, -1), axis=0, copy=True)
|
|
689
|
+
|
|
690
|
+
@property
|
|
691
|
+
def shape(self) -> PyLegendTuple[int, int]:
|
|
692
|
+
"""
|
|
693
|
+
Return a tuple representing the dimensionality of the TdsFrame
|
|
694
|
+
as (number of rows, number of columns).
|
|
695
|
+
"""
|
|
696
|
+
|
|
697
|
+
col_name = self.columns()[0].get_name()
|
|
698
|
+
newframe = self.aggregate(func={col_name: "count"}, axis=0)
|
|
699
|
+
|
|
700
|
+
df = newframe.execute_frame_to_pandas_df()
|
|
701
|
+
|
|
702
|
+
total_rows = df.iloc[0, 0]
|
|
703
|
+
total_cols = len(self.columns())
|
|
704
|
+
|
|
705
|
+
return (total_rows, total_cols) # type: ignore
|
|
706
|
+
|
|
707
|
+
def dropna(
|
|
708
|
+
self,
|
|
709
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
710
|
+
how: str = "any",
|
|
711
|
+
thresh: PyLegendOptional[int] = None,
|
|
712
|
+
subset: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
|
|
713
|
+
inplace: bool = False,
|
|
714
|
+
ignore_index: bool = False
|
|
715
|
+
) -> "PandasApiTdsFrame":
|
|
716
|
+
"""
|
|
717
|
+
Remove missing values.
|
|
718
|
+
|
|
719
|
+
Parameters
|
|
720
|
+
----------
|
|
721
|
+
axis : {0 or 'index'}, default 0
|
|
722
|
+
Determine if rows or columns which contain missing values are removed.
|
|
723
|
+
* 0, or 'index' : Drop rows which contain missing values.
|
|
724
|
+
Currently, only `axis=0` is supported.
|
|
725
|
+
how : {'any', 'all'}, default 'any'
|
|
726
|
+
Determine if row is removed from TdsFrame, when we have at least one NA or all NA.
|
|
727
|
+
* 'any' : If any NA values are present, drop that row.
|
|
728
|
+
* 'all' : If all values are NA, drop that row.
|
|
729
|
+
thresh : int, optional
|
|
730
|
+
Not implemented yet.
|
|
731
|
+
subset : list-like, optional
|
|
732
|
+
Labels along other axis to consider, e.g. if you are dropping rows
|
|
733
|
+
these would be a list of columns to include.
|
|
734
|
+
inplace : bool, default False
|
|
735
|
+
Not implemented yet.
|
|
736
|
+
ignore_index : bool, default False
|
|
737
|
+
Not implemented yet.
|
|
738
|
+
|
|
739
|
+
Returns
|
|
740
|
+
-------
|
|
741
|
+
PandasApiTdsFrame
|
|
742
|
+
TdsFrame with NA entries dropped.
|
|
743
|
+
"""
|
|
744
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import (
|
|
745
|
+
PandasApiAppliedFunctionTdsFrame
|
|
746
|
+
)
|
|
747
|
+
from pylegend.core.tds.pandas_api.frames.functions.dropna import PandasApiDropnaFunction
|
|
748
|
+
return PandasApiAppliedFunctionTdsFrame(
|
|
749
|
+
PandasApiDropnaFunction(
|
|
750
|
+
base_frame=self,
|
|
751
|
+
axis=axis,
|
|
752
|
+
how=how,
|
|
753
|
+
thresh=thresh,
|
|
754
|
+
subset=subset,
|
|
755
|
+
inplace=inplace,
|
|
756
|
+
ignore_index=ignore_index
|
|
757
|
+
)
|
|
758
|
+
)
|
|
759
|
+
|
|
760
|
+
def fillna(
|
|
761
|
+
self,
|
|
762
|
+
value: PyLegendUnion[
|
|
763
|
+
int, float, str, bool, date, datetime,
|
|
764
|
+
PyLegendDict[str, PyLegendUnion[int, float, str, bool, date, datetime]]
|
|
765
|
+
] = None, # type: ignore
|
|
766
|
+
axis: PyLegendOptional[PyLegendUnion[int, str]] = 0,
|
|
767
|
+
inplace: bool = False,
|
|
768
|
+
limit: PyLegendOptional[int] = None
|
|
769
|
+
) -> "PandasApiTdsFrame":
|
|
770
|
+
"""
|
|
771
|
+
Fill missing values.
|
|
772
|
+
|
|
773
|
+
Parameters
|
|
774
|
+
----------
|
|
775
|
+
base_frame : PandasApiBaseTdsFrame
|
|
776
|
+
The base frame to apply fillna on.
|
|
777
|
+
value : scalar, dict, default None
|
|
778
|
+
Value to use to fill holes (e.g. 0), alternately a dict of values specifying
|
|
779
|
+
which value to use for each column of TdsFrame.
|
|
780
|
+
axis : {0 or 'index'}, default 0
|
|
781
|
+
Axis along which to fill missing values.
|
|
782
|
+
* 0, or 'index' : Fill missing values for each column.
|
|
783
|
+
Currently, only `axis=0` is supported.
|
|
784
|
+
inplace : bool, default False
|
|
785
|
+
Not implemented yet.
|
|
786
|
+
limit : int, optional
|
|
787
|
+
Not implemented yet.
|
|
788
|
+
|
|
789
|
+
Returns
|
|
790
|
+
-------
|
|
791
|
+
PandasApiTdsFrame
|
|
792
|
+
TdsFrame with NA entries filled.
|
|
793
|
+
"""
|
|
794
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import (
|
|
795
|
+
PandasApiAppliedFunctionTdsFrame
|
|
796
|
+
)
|
|
797
|
+
from pylegend.core.tds.pandas_api.frames.functions.fillna import PandasApiFillnaFunction
|
|
798
|
+
return PandasApiAppliedFunctionTdsFrame(
|
|
799
|
+
PandasApiFillnaFunction(
|
|
800
|
+
base_frame=self,
|
|
801
|
+
value=value,
|
|
802
|
+
axis=axis,
|
|
803
|
+
inplace=inplace,
|
|
804
|
+
limit=limit
|
|
805
|
+
)
|
|
806
|
+
)
|
|
807
|
+
|
|
275
808
|
@abstractmethod
|
|
276
|
-
def
|
|
809
|
+
def get_super_type(self) -> PyLegendType[PyLegendTdsFrame]:
|
|
277
810
|
pass # pragma: no cover
|
|
278
811
|
|
|
279
|
-
|
|
812
|
+
def to_sql_query_object(self, config: FrameToSqlConfig) -> QuerySpecification:
|
|
813
|
+
if self._transformed_frame is None:
|
|
814
|
+
return self.get_super_type().to_sql_query_object(self, config) # type: ignore
|
|
815
|
+
else:
|
|
816
|
+
return self._transformed_frame.to_sql_query_object(config)
|
|
817
|
+
|
|
280
818
|
def to_pure(self, config: FrameToPureConfig) -> str:
|
|
281
|
-
|
|
819
|
+
if self._transformed_frame is None:
|
|
820
|
+
return self.get_super_type().to_pure(self, config) # type: ignore
|
|
821
|
+
else:
|
|
822
|
+
return self._transformed_frame.to_pure(config)
|
|
282
823
|
|
|
283
824
|
def to_pure_query(self, config: FrameToPureConfig = FrameToPureConfig()) -> str:
|
|
284
825
|
return self.to_pure(config)
|