pylegend 0.11.0__py3-none-any.whl → 0.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylegend/core/database/sql_to_string/db_extension.py +244 -6
- pylegend/core/language/legendql_api/legendql_api_custom_expressions.py +190 -5
- pylegend/core/language/pandas_api/pandas_api_series.py +3 -0
- pylegend/core/language/shared/expression.py +5 -0
- pylegend/core/language/shared/literal_expressions.py +22 -1
- pylegend/core/language/shared/operations/boolean_operation_expressions.py +144 -0
- pylegend/core/language/shared/operations/date_operation_expressions.py +91 -0
- pylegend/core/language/shared/operations/integer_operation_expressions.py +183 -1
- pylegend/core/language/shared/operations/string_operation_expressions.py +31 -1
- pylegend/core/language/shared/primitives/boolean.py +40 -0
- pylegend/core/language/shared/primitives/date.py +39 -0
- pylegend/core/language/shared/primitives/datetime.py +18 -0
- pylegend/core/language/shared/primitives/integer.py +54 -1
- pylegend/core/language/shared/primitives/strictdate.py +25 -1
- pylegend/core/language/shared/primitives/string.py +16 -2
- pylegend/core/sql/metamodel.py +54 -2
- pylegend/core/sql/metamodel_extension.py +77 -1
- pylegend/core/tds/legendql_api/frames/functions/legendql_api_distinct_function.py +53 -7
- pylegend/core/tds/legendql_api/frames/legendql_api_base_tds_frame.py +146 -4
- pylegend/core/tds/legendql_api/frames/legendql_api_tds_frame.py +33 -2
- pylegend/core/tds/pandas_api/frames/functions/assign_function.py +65 -23
- pylegend/core/tds/pandas_api/frames/functions/drop.py +3 -3
- pylegend/core/tds/pandas_api/frames/functions/dropna.py +167 -0
- pylegend/core/tds/pandas_api/frames/functions/fillna.py +162 -0
- pylegend/core/tds/pandas_api/frames/functions/filter.py +10 -5
- pylegend/core/tds/pandas_api/frames/functions/iloc.py +99 -0
- pylegend/core/tds/pandas_api/frames/functions/loc.py +136 -0
- pylegend/core/tds/pandas_api/frames/functions/truncate_function.py +151 -120
- pylegend/core/tds/pandas_api/frames/pandas_api_applied_function_tds_frame.py +7 -3
- pylegend/core/tds/pandas_api/frames/pandas_api_base_tds_frame.py +340 -34
- pylegend/core/tds/pandas_api/frames/pandas_api_tds_frame.py +90 -9
- pylegend/extensions/tds/pandas_api/frames/pandas_api_legend_function_input_frame.py +9 -4
- pylegend/extensions/tds/pandas_api/frames/pandas_api_legend_service_input_frame.py +12 -5
- pylegend/extensions/tds/pandas_api/frames/pandas_api_table_spec_input_frame.py +12 -4
- {pylegend-0.11.0.dist-info → pylegend-0.13.0.dist-info}/METADATA +1 -1
- {pylegend-0.11.0.dist-info → pylegend-0.13.0.dist-info}/RECORD +40 -36
- {pylegend-0.11.0.dist-info → pylegend-0.13.0.dist-info}/WHEEL +1 -1
- {pylegend-0.11.0.dist-info → pylegend-0.13.0.dist-info}/licenses/LICENSE +0 -0
- {pylegend-0.11.0.dist-info → pylegend-0.13.0.dist-info}/licenses/LICENSE.spdx +0 -0
- {pylegend-0.11.0.dist-info → pylegend-0.13.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -12,15 +12,24 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import copy
|
|
15
16
|
from abc import ABCMeta, abstractmethod
|
|
16
17
|
from datetime import date, datetime
|
|
17
18
|
from typing import TYPE_CHECKING
|
|
18
19
|
|
|
20
|
+
from typing_extensions import Concatenate
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
from typing import ParamSpec
|
|
24
|
+
except Exception:
|
|
25
|
+
from typing_extensions import ParamSpec # type: ignore
|
|
26
|
+
|
|
19
27
|
import pandas as pd
|
|
20
28
|
|
|
21
29
|
from pylegend._typing import (
|
|
22
30
|
PyLegendSequence,
|
|
23
31
|
PyLegendTypeVar,
|
|
32
|
+
PyLegendType,
|
|
24
33
|
PyLegendList,
|
|
25
34
|
PyLegendTuple,
|
|
26
35
|
PyLegendSet,
|
|
@@ -33,7 +42,11 @@ from pylegend.core.database.sql_to_string import (
|
|
|
33
42
|
SqlToStringConfig,
|
|
34
43
|
SqlToStringFormat
|
|
35
44
|
)
|
|
36
|
-
from pylegend.core.language import
|
|
45
|
+
from pylegend.core.language import (
|
|
46
|
+
PyLegendPrimitive,
|
|
47
|
+
PyLegendInteger,
|
|
48
|
+
PyLegendBoolean,
|
|
49
|
+
)
|
|
37
50
|
from pylegend.core.language.pandas_api.pandas_api_aggregate_specification import PyLegendAggInput
|
|
38
51
|
from pylegend.core.language.pandas_api.pandas_api_tds_row import PandasApiTdsRow
|
|
39
52
|
from pylegend.core.language.shared.primitives.primitive import PyLegendPrimitiveOrPythonPrimitive
|
|
@@ -47,7 +60,7 @@ from pylegend.core.tds.result_handler import (
|
|
|
47
60
|
)
|
|
48
61
|
from pylegend.core.tds.tds_column import TdsColumn
|
|
49
62
|
from pylegend.core.tds.tds_frame import FrameToPureConfig
|
|
50
|
-
from pylegend.core.tds.tds_frame import FrameToSqlConfig
|
|
63
|
+
from pylegend.core.tds.tds_frame import FrameToSqlConfig, PyLegendTdsFrame
|
|
51
64
|
from pylegend.extensions.tds.result_handler import (
|
|
52
65
|
ToPandasDfResultHandler,
|
|
53
66
|
PandasDfReadConfig,
|
|
@@ -56,12 +69,15 @@ from pylegend.extensions.tds.result_handler import (
|
|
|
56
69
|
if TYPE_CHECKING:
|
|
57
70
|
from pylegend.core.language.pandas_api.pandas_api_series import Series
|
|
58
71
|
from pylegend.core.tds.pandas_api.frames.pandas_api_groupby_tds_frame import PandasApiGroupbyTdsFrame
|
|
72
|
+
from pylegend.core.tds.pandas_api.frames.functions.iloc import PandasApiIlocIndexer
|
|
73
|
+
from pylegend.core.tds.pandas_api.frames.functions.loc import PandasApiLocIndexer
|
|
59
74
|
|
|
60
75
|
__all__: PyLegendSequence[str] = [
|
|
61
76
|
"PandasApiBaseTdsFrame"
|
|
62
77
|
]
|
|
63
78
|
|
|
64
79
|
R = PyLegendTypeVar('R')
|
|
80
|
+
P = ParamSpec("P")
|
|
65
81
|
|
|
66
82
|
|
|
67
83
|
class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
@@ -73,9 +89,12 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
73
89
|
cols = "[" + ", ".join([str(c) for c in columns]) + "]"
|
|
74
90
|
raise ValueError(f"TdsFrame cannot have duplicated column names. Passed columns: {cols}")
|
|
75
91
|
self.__columns = [c.copy() for c in columns]
|
|
92
|
+
self._transformed_frame = None
|
|
76
93
|
|
|
77
94
|
def columns(self) -> PyLegendSequence[TdsColumn]:
|
|
78
|
-
|
|
95
|
+
if self._transformed_frame is None:
|
|
96
|
+
return [c.copy() for c in self.__columns]
|
|
97
|
+
return self._transformed_frame.columns()
|
|
79
98
|
|
|
80
99
|
def __getitem__(
|
|
81
100
|
self,
|
|
@@ -97,7 +116,8 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
97
116
|
if col.get_name() == key:
|
|
98
117
|
col_type = col.get_type()
|
|
99
118
|
if col_type == "Boolean":
|
|
100
|
-
from pylegend.core.language.pandas_api.pandas_api_series import
|
|
119
|
+
from pylegend.core.language.pandas_api.pandas_api_series import \
|
|
120
|
+
BooleanSeries # pragma: no cover
|
|
101
121
|
return BooleanSeries(self, key) # pragma: no cover (Boolean column not supported in PURE)
|
|
102
122
|
elif col_type == "String":
|
|
103
123
|
from pylegend.core.language.pandas_api.pandas_api_series import StringSeries
|
|
@@ -130,6 +150,41 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
130
150
|
else:
|
|
131
151
|
raise TypeError(f"Invalid key type: {type(key)}. Expected str, list, or boolean expression")
|
|
132
152
|
|
|
153
|
+
def __setitem__(self, key: str, value: PyLegendUnion["Series", PyLegendPrimitiveOrPythonPrimitive]) -> None:
|
|
154
|
+
"""
|
|
155
|
+
Pandas-like column assignment with replace semantics:
|
|
156
|
+
- If column exists, drop it first.
|
|
157
|
+
- Then assign the new value (Series or constant).
|
|
158
|
+
"""
|
|
159
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import (
|
|
160
|
+
PandasApiAppliedFunctionTdsFrame
|
|
161
|
+
)
|
|
162
|
+
from pylegend.core.tds.pandas_api.frames.functions.assign_function import AssignFunction
|
|
163
|
+
from pylegend.core.language.pandas_api.pandas_api_series import Series
|
|
164
|
+
|
|
165
|
+
# Type Check
|
|
166
|
+
if not isinstance(key, str):
|
|
167
|
+
raise TypeError(f"Column name must be a string, got: {type(key)}")
|
|
168
|
+
|
|
169
|
+
# Reject cross-frame assignment
|
|
170
|
+
if isinstance(value, Series):
|
|
171
|
+
origin = value.get_base_frame()
|
|
172
|
+
if origin is not None and origin is not self:
|
|
173
|
+
raise ValueError("Assignment from a different frame is not allowed")
|
|
174
|
+
|
|
175
|
+
# Normalize the assignment value
|
|
176
|
+
col_def = {}
|
|
177
|
+
if callable(value):
|
|
178
|
+
col_def[key] = value
|
|
179
|
+
else:
|
|
180
|
+
col_def[key] = lambda row: value
|
|
181
|
+
|
|
182
|
+
working_frame = copy.deepcopy(self)
|
|
183
|
+
assign_applied = PandasApiAppliedFunctionTdsFrame(AssignFunction(working_frame, col_definitions=col_def))
|
|
184
|
+
|
|
185
|
+
self._transformed_frame = assign_applied # type: ignore
|
|
186
|
+
self.__columns = assign_applied.columns()
|
|
187
|
+
|
|
133
188
|
def assign(
|
|
134
189
|
self,
|
|
135
190
|
**kwargs: PyLegendCallable[
|
|
@@ -217,7 +272,7 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
217
272
|
index: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str], PyLegendSet[str]]] = None,
|
|
218
273
|
columns: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str], PyLegendSet[str]]] = None,
|
|
219
274
|
level: PyLegendOptional[PyLegendUnion[int, PyLegendInteger, str]] = None,
|
|
220
|
-
inplace: PyLegendUnion[bool, PyLegendBoolean] =
|
|
275
|
+
inplace: PyLegendUnion[bool, PyLegendBoolean] = False,
|
|
221
276
|
errors: str = "raise",
|
|
222
277
|
) -> "PandasApiTdsFrame":
|
|
223
278
|
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import \
|
|
@@ -238,11 +293,11 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
238
293
|
)
|
|
239
294
|
|
|
240
295
|
def aggregate(
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
296
|
+
self,
|
|
297
|
+
func: PyLegendAggInput,
|
|
298
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
299
|
+
*args: PyLegendPrimitiveOrPythonPrimitive,
|
|
300
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
246
301
|
) -> "PandasApiTdsFrame":
|
|
247
302
|
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import (
|
|
248
303
|
PandasApiAppliedFunctionTdsFrame
|
|
@@ -257,11 +312,11 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
257
312
|
))
|
|
258
313
|
|
|
259
314
|
def agg(
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
315
|
+
self,
|
|
316
|
+
func: PyLegendAggInput,
|
|
317
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
318
|
+
*args: PyLegendPrimitiveOrPythonPrimitive,
|
|
319
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
265
320
|
) -> "PandasApiTdsFrame":
|
|
266
321
|
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import (
|
|
267
322
|
PandasApiAppliedFunctionTdsFrame
|
|
@@ -293,7 +348,8 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
293
348
|
if min_count != 0:
|
|
294
349
|
raise NotImplementedError(f"min_count must be 0 in sum function, but got: {min_count}")
|
|
295
350
|
if len(kwargs) > 0:
|
|
296
|
-
raise NotImplementedError(
|
|
351
|
+
raise NotImplementedError(
|
|
352
|
+
f"Additional keyword arguments not supported in sum function: {list(kwargs.keys())}")
|
|
297
353
|
return self.aggregate("sum", 0)
|
|
298
354
|
|
|
299
355
|
def mean(
|
|
@@ -310,7 +366,8 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
310
366
|
if numeric_only is not False:
|
|
311
367
|
raise NotImplementedError("numeric_only=True is not currently supported in mean function.")
|
|
312
368
|
if len(kwargs) > 0:
|
|
313
|
-
raise NotImplementedError(
|
|
369
|
+
raise NotImplementedError(
|
|
370
|
+
f"Additional keyword arguments not supported in mean function: {list(kwargs.keys())}")
|
|
314
371
|
return self.aggregate("mean", 0)
|
|
315
372
|
|
|
316
373
|
def min(
|
|
@@ -327,7 +384,8 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
327
384
|
if numeric_only is not False:
|
|
328
385
|
raise NotImplementedError("numeric_only=True is not currently supported in min function.")
|
|
329
386
|
if len(kwargs) > 0:
|
|
330
|
-
raise NotImplementedError(
|
|
387
|
+
raise NotImplementedError(
|
|
388
|
+
f"Additional keyword arguments not supported in min function: {list(kwargs.keys())}")
|
|
331
389
|
return self.aggregate("min", 0)
|
|
332
390
|
|
|
333
391
|
def max(
|
|
@@ -344,7 +402,8 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
344
402
|
if numeric_only is not False:
|
|
345
403
|
raise NotImplementedError("numeric_only=True is not currently supported in max function.")
|
|
346
404
|
if len(kwargs) > 0:
|
|
347
|
-
raise NotImplementedError(
|
|
405
|
+
raise NotImplementedError(
|
|
406
|
+
f"Additional keyword arguments not supported in max function: {list(kwargs.keys())}")
|
|
348
407
|
return self.aggregate("max", 0)
|
|
349
408
|
|
|
350
409
|
def std(
|
|
@@ -360,11 +419,13 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
360
419
|
if skipna is not True:
|
|
361
420
|
raise NotImplementedError("skipna=False is not currently supported in std function.")
|
|
362
421
|
if ddof != 1:
|
|
363
|
-
raise NotImplementedError(
|
|
422
|
+
raise NotImplementedError(
|
|
423
|
+
f"Only ddof=1 (Sample Standard Deviation) is supported in std function, but got: {ddof}")
|
|
364
424
|
if numeric_only is not False:
|
|
365
425
|
raise NotImplementedError("numeric_only=True is not currently supported in std function.")
|
|
366
426
|
if len(kwargs) > 0:
|
|
367
|
-
raise NotImplementedError(
|
|
427
|
+
raise NotImplementedError(
|
|
428
|
+
f"Additional keyword arguments not supported in std function: {list(kwargs.keys())}")
|
|
368
429
|
return self.aggregate("std", 0)
|
|
369
430
|
|
|
370
431
|
def var(
|
|
@@ -384,7 +445,8 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
384
445
|
if numeric_only is not False:
|
|
385
446
|
raise NotImplementedError("numeric_only=True is not currently supported in var function.")
|
|
386
447
|
if len(kwargs) > 0:
|
|
387
|
-
raise NotImplementedError(
|
|
448
|
+
raise NotImplementedError(
|
|
449
|
+
f"Additional keyword arguments not supported in var function: {list(kwargs.keys())}")
|
|
388
450
|
return self.aggregate("var", 0)
|
|
389
451
|
|
|
390
452
|
def count(
|
|
@@ -398,18 +460,19 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
398
460
|
if numeric_only is not False:
|
|
399
461
|
raise NotImplementedError("numeric_only=True is not currently supported in count function.")
|
|
400
462
|
if len(kwargs) > 0:
|
|
401
|
-
raise NotImplementedError(
|
|
463
|
+
raise NotImplementedError(
|
|
464
|
+
f"Additional keyword arguments not supported in count function: {list(kwargs.keys())}")
|
|
402
465
|
return self.aggregate("count", 0)
|
|
403
466
|
|
|
404
467
|
def groupby(
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
468
|
+
self,
|
|
469
|
+
by: PyLegendUnion[str, PyLegendList[str]],
|
|
470
|
+
level: PyLegendOptional[PyLegendUnion[str, int, PyLegendList[str]]] = None,
|
|
471
|
+
as_index: bool = False,
|
|
472
|
+
sort: bool = True,
|
|
473
|
+
group_keys: bool = False,
|
|
474
|
+
observed: bool = False,
|
|
475
|
+
dropna: bool = False,
|
|
413
476
|
) -> "PandasApiGroupbyTdsFrame":
|
|
414
477
|
from pylegend.core.tds.pandas_api.frames.pandas_api_groupby_tds_frame import (
|
|
415
478
|
PandasApiGroupbyTdsFrame
|
|
@@ -547,13 +610,256 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
547
610
|
)
|
|
548
611
|
)
|
|
549
612
|
|
|
613
|
+
def apply(
|
|
614
|
+
self,
|
|
615
|
+
func: PyLegendUnion[
|
|
616
|
+
PyLegendCallable[Concatenate["Series", P], PyLegendPrimitiveOrPythonPrimitive],
|
|
617
|
+
str
|
|
618
|
+
],
|
|
619
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
620
|
+
raw: bool = False,
|
|
621
|
+
result_type: PyLegendOptional[str] = None,
|
|
622
|
+
args: PyLegendTuple[PyLegendPrimitiveOrPythonPrimitive, ...] = (),
|
|
623
|
+
by_row: PyLegendUnion[bool, str] = "compat",
|
|
624
|
+
engine: str = "python",
|
|
625
|
+
engine_kwargs: PyLegendOptional[PyLegendDict[str, PyLegendPrimitiveOrPythonPrimitive]] = None,
|
|
626
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
627
|
+
) -> "PandasApiTdsFrame":
|
|
628
|
+
"""
|
|
629
|
+
Pandas-like apply (columns-only):
|
|
630
|
+
- Supports callable func applied to each column (axis=0 or 'index')
|
|
631
|
+
- Internally delegates to assign by constructing lambdas per column
|
|
632
|
+
- Unsupported params raise NotImplementedError
|
|
633
|
+
"""
|
|
634
|
+
|
|
635
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import (
|
|
636
|
+
PandasApiAppliedFunctionTdsFrame
|
|
637
|
+
)
|
|
638
|
+
from pylegend.core.tds.pandas_api.frames.functions.assign_function import AssignFunction
|
|
639
|
+
from pylegend.core.language.pandas_api.pandas_api_series import Series
|
|
640
|
+
|
|
641
|
+
# Validation
|
|
642
|
+
if axis not in (0, "index"):
|
|
643
|
+
raise ValueError("Only column-wise apply is supported. Use axis=0 or 'index'")
|
|
644
|
+
if raw:
|
|
645
|
+
raise NotImplementedError("raw=True is not supported. Use raw=False")
|
|
646
|
+
if result_type is not None:
|
|
647
|
+
raise NotImplementedError("result_type is not supported")
|
|
648
|
+
if by_row not in (False, "compat"):
|
|
649
|
+
raise NotImplementedError("by_row must be False or 'compat'")
|
|
650
|
+
if engine != "python":
|
|
651
|
+
raise NotImplementedError("Only engine='python' is supported")
|
|
652
|
+
if engine_kwargs is not None:
|
|
653
|
+
raise NotImplementedError("engine_kwargs are not supported")
|
|
654
|
+
if isinstance(func, str):
|
|
655
|
+
raise NotImplementedError("String-based apply is not supported")
|
|
656
|
+
if not callable(func):
|
|
657
|
+
raise TypeError("Function must be a callable")
|
|
658
|
+
|
|
659
|
+
# Build assign column definitions: apply func to each column Series
|
|
660
|
+
col_definitions = {}
|
|
661
|
+
for c in self.columns():
|
|
662
|
+
col_name = c.get_name()
|
|
663
|
+
series = self[col_name]
|
|
664
|
+
|
|
665
|
+
# Compute row callable via func on the Series
|
|
666
|
+
def _row_callable(
|
|
667
|
+
_row: PandasApiTdsRow,
|
|
668
|
+
_s: Series = series, # type: ignore
|
|
669
|
+
_a: PyLegendTuple[PyLegendPrimitiveOrPythonPrimitive, ...] = args,
|
|
670
|
+
_k: PyLegendPrimitiveOrPythonPrimitive = kwargs # type: ignore
|
|
671
|
+
) -> PyLegendPrimitiveOrPythonPrimitive:
|
|
672
|
+
return func(_s, *_a, **_k) # type: ignore
|
|
673
|
+
|
|
674
|
+
col_definitions[col_name] = _row_callable
|
|
675
|
+
|
|
676
|
+
return PandasApiAppliedFunctionTdsFrame(
|
|
677
|
+
AssignFunction(self, col_definitions=col_definitions) # type: ignore
|
|
678
|
+
)
|
|
679
|
+
|
|
680
|
+
@property
|
|
681
|
+
def iloc(self) -> "PandasApiIlocIndexer":
|
|
682
|
+
"""
|
|
683
|
+
Purely integer-location based indexing for selection by position.
|
|
684
|
+
.iloc[] is primarily integer position based (from 0 to length-1 of the axis).
|
|
685
|
+
|
|
686
|
+
Allowed inputs are:
|
|
687
|
+
- An integer, e.g. 5.
|
|
688
|
+
- A slice object with ints, e.g. 1:7.
|
|
689
|
+
- A tuple of row and column indexes, e.g., (slice(1, 5), slice(0, 2))
|
|
690
|
+
|
|
691
|
+
Other pandas iloc features such as list of integers, boolean arrays, and callables
|
|
692
|
+
are not supported and will raise a NotImplementedError.
|
|
693
|
+
"""
|
|
694
|
+
from pylegend.core.tds.pandas_api.frames.functions.iloc import PandasApiIlocIndexer
|
|
695
|
+
return PandasApiIlocIndexer(self)
|
|
696
|
+
|
|
697
|
+
@property
|
|
698
|
+
def loc(self) -> "PandasApiLocIndexer":
|
|
699
|
+
"""
|
|
700
|
+
Access a group of rows and columns by label(s) or a boolean array.
|
|
701
|
+
.loc[] is primarily label based, but may also be used with a boolean array.
|
|
702
|
+
|
|
703
|
+
Allowed inputs are:
|
|
704
|
+
- A single label, e.g. 5 or 'a', (note that 5 is interpreted as a
|
|
705
|
+
label of the index, not as an integer position along the index).
|
|
706
|
+
- A list or array of labels, e.g. ['a', 'b', 'c'].
|
|
707
|
+
- A slice object with labels, e.g. 'a':'f'.
|
|
708
|
+
- A boolean array of the same length as the axis being sliced.
|
|
709
|
+
- A callable function with one argument (the calling Series or
|
|
710
|
+
DataFrame) and that returns valid output for indexing (one of the above).
|
|
711
|
+
|
|
712
|
+
Currently, for row selection, only callable function or complete slice are supported.
|
|
713
|
+
For column selection, string labels, lists of string labels, and slices of string labels are supported.
|
|
714
|
+
"""
|
|
715
|
+
from pylegend.core.tds.pandas_api.frames.functions.loc import PandasApiLocIndexer
|
|
716
|
+
return PandasApiLocIndexer(self)
|
|
717
|
+
|
|
718
|
+
def head(self, n: int = 5) -> "PandasApiTdsFrame":
|
|
719
|
+
"""
|
|
720
|
+
Return the first `n` rows by calling truncate on rows.
|
|
721
|
+
Negative `n` is not supported.
|
|
722
|
+
"""
|
|
723
|
+
if not isinstance(n, int):
|
|
724
|
+
raise TypeError(f"n must be an int, got {type(n)}")
|
|
725
|
+
if n < 0:
|
|
726
|
+
raise NotImplementedError("Negative n is not supported yet in Pandas API head")
|
|
727
|
+
|
|
728
|
+
return self.truncate(before=None, after=max(n - 1, -1), axis=0, copy=True)
|
|
729
|
+
|
|
730
|
+
@property
|
|
731
|
+
def shape(self) -> PyLegendTuple[int, int]:
|
|
732
|
+
"""
|
|
733
|
+
Return a tuple representing the dimensionality of the TdsFrame
|
|
734
|
+
as (number of rows, number of columns).
|
|
735
|
+
"""
|
|
736
|
+
|
|
737
|
+
col_name = self.columns()[0].get_name()
|
|
738
|
+
newframe = self.aggregate(func={col_name: "count"}, axis=0)
|
|
739
|
+
|
|
740
|
+
df = newframe.execute_frame_to_pandas_df()
|
|
741
|
+
|
|
742
|
+
total_rows = df.iloc[0, 0]
|
|
743
|
+
total_cols = len(self.columns())
|
|
744
|
+
|
|
745
|
+
return (total_rows, total_cols) # type: ignore
|
|
746
|
+
|
|
747
|
+
def dropna(
|
|
748
|
+
self,
|
|
749
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
750
|
+
how: str = "any",
|
|
751
|
+
thresh: PyLegendOptional[int] = None,
|
|
752
|
+
subset: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
|
|
753
|
+
inplace: bool = False,
|
|
754
|
+
ignore_index: bool = False
|
|
755
|
+
) -> "PandasApiTdsFrame":
|
|
756
|
+
"""
|
|
757
|
+
Remove missing values.
|
|
758
|
+
|
|
759
|
+
Parameters
|
|
760
|
+
----------
|
|
761
|
+
axis : {0 or 'index'}, default 0
|
|
762
|
+
Determine if rows or columns which contain missing values are removed.
|
|
763
|
+
* 0, or 'index' : Drop rows which contain missing values.
|
|
764
|
+
Currently, only `axis=0` is supported.
|
|
765
|
+
how : {'any', 'all'}, default 'any'
|
|
766
|
+
Determine if row is removed from TdsFrame, when we have at least one NA or all NA.
|
|
767
|
+
* 'any' : If any NA values are present, drop that row.
|
|
768
|
+
* 'all' : If all values are NA, drop that row.
|
|
769
|
+
thresh : int, optional
|
|
770
|
+
Not implemented yet.
|
|
771
|
+
subset : list-like, optional
|
|
772
|
+
Labels along other axis to consider, e.g. if you are dropping rows
|
|
773
|
+
these would be a list of columns to include.
|
|
774
|
+
inplace : bool, default False
|
|
775
|
+
Not implemented yet.
|
|
776
|
+
ignore_index : bool, default False
|
|
777
|
+
Not implemented yet.
|
|
778
|
+
|
|
779
|
+
Returns
|
|
780
|
+
-------
|
|
781
|
+
PandasApiTdsFrame
|
|
782
|
+
TdsFrame with NA entries dropped.
|
|
783
|
+
"""
|
|
784
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import (
|
|
785
|
+
PandasApiAppliedFunctionTdsFrame
|
|
786
|
+
)
|
|
787
|
+
from pylegend.core.tds.pandas_api.frames.functions.dropna import PandasApiDropnaFunction
|
|
788
|
+
return PandasApiAppliedFunctionTdsFrame(
|
|
789
|
+
PandasApiDropnaFunction(
|
|
790
|
+
base_frame=self,
|
|
791
|
+
axis=axis,
|
|
792
|
+
how=how,
|
|
793
|
+
thresh=thresh,
|
|
794
|
+
subset=subset,
|
|
795
|
+
inplace=inplace,
|
|
796
|
+
ignore_index=ignore_index
|
|
797
|
+
)
|
|
798
|
+
)
|
|
799
|
+
|
|
800
|
+
def fillna(
|
|
801
|
+
self,
|
|
802
|
+
value: PyLegendUnion[
|
|
803
|
+
int, float, str, bool, date, datetime,
|
|
804
|
+
PyLegendDict[str, PyLegendUnion[int, float, str, bool, date, datetime]]
|
|
805
|
+
] = None, # type: ignore
|
|
806
|
+
axis: PyLegendOptional[PyLegendUnion[int, str]] = 0,
|
|
807
|
+
inplace: bool = False,
|
|
808
|
+
limit: PyLegendOptional[int] = None
|
|
809
|
+
) -> "PandasApiTdsFrame":
|
|
810
|
+
"""
|
|
811
|
+
Fill missing values.
|
|
812
|
+
|
|
813
|
+
Parameters
|
|
814
|
+
----------
|
|
815
|
+
base_frame : PandasApiBaseTdsFrame
|
|
816
|
+
The base frame to apply fillna on.
|
|
817
|
+
value : scalar, dict, default None
|
|
818
|
+
Value to use to fill holes (e.g. 0), alternately a dict of values specifying
|
|
819
|
+
which value to use for each column of TdsFrame.
|
|
820
|
+
axis : {0 or 'index'}, default 0
|
|
821
|
+
Axis along which to fill missing values.
|
|
822
|
+
* 0, or 'index' : Fill missing values for each column.
|
|
823
|
+
Currently, only `axis=0` is supported.
|
|
824
|
+
inplace : bool, default False
|
|
825
|
+
Not implemented yet.
|
|
826
|
+
limit : int, optional
|
|
827
|
+
Not implemented yet.
|
|
828
|
+
|
|
829
|
+
Returns
|
|
830
|
+
-------
|
|
831
|
+
PandasApiTdsFrame
|
|
832
|
+
TdsFrame with NA entries filled.
|
|
833
|
+
"""
|
|
834
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import (
|
|
835
|
+
PandasApiAppliedFunctionTdsFrame
|
|
836
|
+
)
|
|
837
|
+
from pylegend.core.tds.pandas_api.frames.functions.fillna import PandasApiFillnaFunction
|
|
838
|
+
return PandasApiAppliedFunctionTdsFrame(
|
|
839
|
+
PandasApiFillnaFunction(
|
|
840
|
+
base_frame=self,
|
|
841
|
+
value=value,
|
|
842
|
+
axis=axis,
|
|
843
|
+
inplace=inplace,
|
|
844
|
+
limit=limit
|
|
845
|
+
)
|
|
846
|
+
)
|
|
847
|
+
|
|
550
848
|
@abstractmethod
|
|
551
|
-
def
|
|
849
|
+
def get_super_type(self) -> PyLegendType[PyLegendTdsFrame]:
|
|
552
850
|
pass # pragma: no cover
|
|
553
851
|
|
|
554
|
-
|
|
852
|
+
def to_sql_query_object(self, config: FrameToSqlConfig) -> QuerySpecification:
|
|
853
|
+
if self._transformed_frame is None:
|
|
854
|
+
return self.get_super_type().to_sql_query_object(self, config) # type: ignore
|
|
855
|
+
else:
|
|
856
|
+
return self._transformed_frame.to_sql_query_object(config)
|
|
857
|
+
|
|
555
858
|
def to_pure(self, config: FrameToPureConfig) -> str:
|
|
556
|
-
|
|
859
|
+
if self._transformed_frame is None:
|
|
860
|
+
return self.get_super_type().to_pure(self, config) # type: ignore
|
|
861
|
+
else:
|
|
862
|
+
return self._transformed_frame.to_pure(config)
|
|
557
863
|
|
|
558
864
|
def to_pure_query(self, config: FrameToPureConfig = FrameToPureConfig()) -> str:
|
|
559
865
|
return self.to_pure(config)
|
|
@@ -16,6 +16,13 @@ from abc import abstractmethod
|
|
|
16
16
|
from datetime import date, datetime
|
|
17
17
|
from typing import TYPE_CHECKING
|
|
18
18
|
|
|
19
|
+
from typing_extensions import Concatenate
|
|
20
|
+
|
|
21
|
+
try:
|
|
22
|
+
from typing import ParamSpec
|
|
23
|
+
except Exception:
|
|
24
|
+
from typing_extensions import ParamSpec # type: ignore
|
|
25
|
+
|
|
19
26
|
from pylegend._typing import (
|
|
20
27
|
PyLegendCallable,
|
|
21
28
|
PyLegendSequence,
|
|
@@ -40,11 +47,15 @@ from pylegend.core.tds.tds_frame import PyLegendTdsFrame
|
|
|
40
47
|
if TYPE_CHECKING:
|
|
41
48
|
from pylegend.core.language.pandas_api.pandas_api_series import Series
|
|
42
49
|
from pylegend.core.tds.pandas_api.frames.pandas_api_groupby_tds_frame import PandasApiGroupbyTdsFrame
|
|
50
|
+
from pylegend.core.tds.pandas_api.frames.functions.iloc import PandasApiIlocIndexer
|
|
51
|
+
from pylegend.core.tds.pandas_api.frames.functions.loc import PandasApiLocIndexer
|
|
43
52
|
|
|
44
53
|
__all__: PyLegendSequence[str] = [
|
|
45
54
|
"PandasApiTdsFrame"
|
|
46
55
|
]
|
|
47
56
|
|
|
57
|
+
P = ParamSpec("P")
|
|
58
|
+
|
|
48
59
|
|
|
49
60
|
class PandasApiTdsFrame(PyLegendTdsFrame):
|
|
50
61
|
|
|
@@ -55,6 +66,14 @@ class PandasApiTdsFrame(PyLegendTdsFrame):
|
|
|
55
66
|
) -> PyLegendUnion["PandasApiTdsFrame", "Series"]:
|
|
56
67
|
pass # pragma: no cover
|
|
57
68
|
|
|
69
|
+
@abstractmethod
|
|
70
|
+
def __setitem__(
|
|
71
|
+
self,
|
|
72
|
+
key: str,
|
|
73
|
+
value: PyLegendUnion["Series", PyLegendPrimitiveOrPythonPrimitive]
|
|
74
|
+
) -> None:
|
|
75
|
+
pass # pragma: no cover
|
|
76
|
+
|
|
58
77
|
@abstractmethod
|
|
59
78
|
def assign(
|
|
60
79
|
self,
|
|
@@ -107,7 +126,7 @@ class PandasApiTdsFrame(PyLegendTdsFrame):
|
|
|
107
126
|
index: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str], PyLegendSet[str]]] = None,
|
|
108
127
|
columns: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str], PyLegendSet[str]]] = None,
|
|
109
128
|
level: PyLegendOptional[PyLegendUnion[int, PyLegendInteger, str]] = None,
|
|
110
|
-
inplace: PyLegendUnion[bool, PyLegendBoolean] =
|
|
129
|
+
inplace: PyLegendUnion[bool, PyLegendBoolean] = False,
|
|
111
130
|
errors: str = "raise",
|
|
112
131
|
) -> "PandasApiTdsFrame":
|
|
113
132
|
pass # pragma: no cover
|
|
@@ -183,14 +202,14 @@ class PandasApiTdsFrame(PyLegendTdsFrame):
|
|
|
183
202
|
|
|
184
203
|
@abstractmethod
|
|
185
204
|
def groupby(
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
205
|
+
self,
|
|
206
|
+
by: PyLegendUnion[str, PyLegendList[str]],
|
|
207
|
+
level: PyLegendOptional[PyLegendUnion[str, int, PyLegendList[str]]] = None,
|
|
208
|
+
as_index: bool = False,
|
|
209
|
+
sort: bool = True,
|
|
210
|
+
group_keys: bool = False,
|
|
211
|
+
observed: bool = False,
|
|
212
|
+
dropna: bool = False,
|
|
194
213
|
) -> "PandasApiGroupbyTdsFrame":
|
|
195
214
|
pass # pragma: no cover
|
|
196
215
|
|
|
@@ -265,3 +284,65 @@ class PandasApiTdsFrame(PyLegendTdsFrame):
|
|
|
265
284
|
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
266
285
|
) -> "PandasApiTdsFrame":
|
|
267
286
|
pass # pragma: no cover
|
|
287
|
+
|
|
288
|
+
@abstractmethod
|
|
289
|
+
def apply(
|
|
290
|
+
self,
|
|
291
|
+
func: PyLegendUnion[
|
|
292
|
+
PyLegendCallable[Concatenate["Series", P], PyLegendPrimitiveOrPythonPrimitive],
|
|
293
|
+
str
|
|
294
|
+
],
|
|
295
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
296
|
+
raw: bool = False,
|
|
297
|
+
result_type: PyLegendOptional[str] = None,
|
|
298
|
+
args: PyLegendTuple[PyLegendPrimitiveOrPythonPrimitive, ...] = (),
|
|
299
|
+
by_row: PyLegendUnion[bool, str] = "compat",
|
|
300
|
+
engine: str = "python",
|
|
301
|
+
engine_kwargs: PyLegendOptional[PyLegendDict[str, PyLegendPrimitiveOrPythonPrimitive]] = None,
|
|
302
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
303
|
+
) -> "PandasApiTdsFrame":
|
|
304
|
+
pass # pragma: no cover
|
|
305
|
+
|
|
306
|
+
@property
|
|
307
|
+
@abstractmethod
|
|
308
|
+
def iloc(self) -> "PandasApiIlocIndexer":
|
|
309
|
+
pass # pragma: no cover
|
|
310
|
+
|
|
311
|
+
@property
|
|
312
|
+
@abstractmethod
|
|
313
|
+
def loc(self) -> "PandasApiLocIndexer":
|
|
314
|
+
pass # pragma: no cover
|
|
315
|
+
|
|
316
|
+
@abstractmethod
|
|
317
|
+
def head(self, n: int = 5) -> "PandasApiTdsFrame":
|
|
318
|
+
pass # pragma: no cover
|
|
319
|
+
|
|
320
|
+
@property
|
|
321
|
+
@abstractmethod
|
|
322
|
+
def shape(self) -> PyLegendTuple[int, int]:
|
|
323
|
+
pass # pragma: no cover
|
|
324
|
+
|
|
325
|
+
@abstractmethod
|
|
326
|
+
def dropna(
|
|
327
|
+
self,
|
|
328
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
329
|
+
how: str = "any",
|
|
330
|
+
thresh: PyLegendOptional[int] = None,
|
|
331
|
+
subset: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
|
|
332
|
+
inplace: bool = False,
|
|
333
|
+
ignore_index: bool = False
|
|
334
|
+
) -> "PandasApiTdsFrame":
|
|
335
|
+
pass # pragma: no cover
|
|
336
|
+
|
|
337
|
+
@abstractmethod
|
|
338
|
+
def fillna(
|
|
339
|
+
self,
|
|
340
|
+
value: PyLegendUnion[
|
|
341
|
+
int, float, str, bool, date, datetime,
|
|
342
|
+
PyLegendDict[str, PyLegendUnion[int, float, str, bool, date, datetime]]
|
|
343
|
+
] = None, # type: ignore
|
|
344
|
+
axis: PyLegendOptional[PyLegendUnion[int, str]] = 0,
|
|
345
|
+
inplace: bool = False,
|
|
346
|
+
limit: PyLegendOptional[int] = None
|
|
347
|
+
) -> "PandasApiTdsFrame":
|
|
348
|
+
pass # pragma: no cover
|