pylegend 0.12.0__py3-none-any.whl → 0.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylegend/core/database/sql_to_string/db_extension.py +177 -1
- pylegend/core/language/pandas_api/pandas_api_groupby_series.py +357 -0
- pylegend/core/language/pandas_api/pandas_api_series.py +202 -8
- pylegend/core/language/shared/expression.py +5 -0
- pylegend/core/language/shared/literal_expressions.py +22 -1
- pylegend/core/language/shared/operations/boolean_operation_expressions.py +144 -0
- pylegend/core/language/shared/operations/date_operation_expressions.py +91 -0
- pylegend/core/language/shared/operations/integer_operation_expressions.py +183 -1
- pylegend/core/language/shared/operations/string_operation_expressions.py +31 -1
- pylegend/core/language/shared/primitives/boolean.py +40 -0
- pylegend/core/language/shared/primitives/date.py +39 -0
- pylegend/core/language/shared/primitives/datetime.py +18 -0
- pylegend/core/language/shared/primitives/integer.py +54 -1
- pylegend/core/language/shared/primitives/strictdate.py +25 -1
- pylegend/core/language/shared/primitives/string.py +16 -2
- pylegend/core/sql/metamodel.py +50 -1
- pylegend/core/sql/metamodel_extension.py +77 -1
- pylegend/core/tds/pandas_api/frames/functions/aggregate_function.py +21 -11
- pylegend/core/tds/pandas_api/frames/functions/iloc.py +99 -0
- pylegend/core/tds/pandas_api/frames/functions/loc.py +136 -0
- pylegend/core/tds/pandas_api/frames/pandas_api_applied_function_tds_frame.py +3 -0
- pylegend/core/tds/pandas_api/frames/pandas_api_base_tds_frame.py +50 -2
- pylegend/core/tds/pandas_api/frames/pandas_api_groupby_tds_frame.py +87 -27
- pylegend/core/tds/pandas_api/frames/pandas_api_tds_frame.py +12 -0
- {pylegend-0.12.0.dist-info → pylegend-0.14.0.dist-info}/METADATA +1 -1
- {pylegend-0.12.0.dist-info → pylegend-0.14.0.dist-info}/RECORD +30 -27
- {pylegend-0.12.0.dist-info → pylegend-0.14.0.dist-info}/WHEEL +1 -1
- {pylegend-0.12.0.dist-info → pylegend-0.14.0.dist-info}/licenses/LICENSE +0 -0
- {pylegend-0.12.0.dist-info → pylegend-0.14.0.dist-info}/licenses/LICENSE.spdx +0 -0
- {pylegend-0.12.0.dist-info → pylegend-0.14.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -20,6 +20,7 @@ from pylegend._typing import (
|
|
|
20
20
|
from pylegend.core.sql.metamodel import (
|
|
21
21
|
Expression,
|
|
22
22
|
Window,
|
|
23
|
+
StringLiteral,
|
|
23
24
|
)
|
|
24
25
|
|
|
25
26
|
__all__: PyLegendSequence[str] = [
|
|
@@ -81,7 +82,12 @@ __all__: PyLegendSequence[str] = [
|
|
|
81
82
|
"EpochExpression",
|
|
82
83
|
"WindowExpression",
|
|
83
84
|
"ConstantExpression",
|
|
84
|
-
"StringSubStringExpression"
|
|
85
|
+
"StringSubStringExpression",
|
|
86
|
+
"DateAdjustExpression",
|
|
87
|
+
"BitwiseNotExpression",
|
|
88
|
+
"DateDiffExpression",
|
|
89
|
+
"DateTimeBucketExpression",
|
|
90
|
+
"DateType"
|
|
85
91
|
]
|
|
86
92
|
|
|
87
93
|
|
|
@@ -763,3 +769,73 @@ class StringSubStringExpression(Expression):
|
|
|
763
769
|
self.value = value
|
|
764
770
|
self.start = start
|
|
765
771
|
self.end = end
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
class DateAdjustExpression(Expression):
|
|
775
|
+
date: "Expression"
|
|
776
|
+
number: "Expression"
|
|
777
|
+
duration_unit: "StringLiteral"
|
|
778
|
+
|
|
779
|
+
def __init__(
|
|
780
|
+
self,
|
|
781
|
+
date: "Expression",
|
|
782
|
+
number: "Expression",
|
|
783
|
+
duration_unit: "StringLiteral",
|
|
784
|
+
) -> None:
|
|
785
|
+
super().__init__(_type="dateAdjustExpression")
|
|
786
|
+
self.date = date
|
|
787
|
+
self.number = number
|
|
788
|
+
self.duration_unit = duration_unit
|
|
789
|
+
|
|
790
|
+
|
|
791
|
+
class DateDiffExpression(Expression):
|
|
792
|
+
start_date: "Expression"
|
|
793
|
+
end_date: "Expression"
|
|
794
|
+
duration_unit: "StringLiteral"
|
|
795
|
+
|
|
796
|
+
def __init__(
|
|
797
|
+
self,
|
|
798
|
+
start_date: "Expression",
|
|
799
|
+
end_date: "Expression",
|
|
800
|
+
duration_unit: "StringLiteral",
|
|
801
|
+
) -> None:
|
|
802
|
+
super().__init__(_type="dateDiffExpression")
|
|
803
|
+
self.start_date = start_date
|
|
804
|
+
self.end_date = end_date
|
|
805
|
+
self.duration_unit = duration_unit
|
|
806
|
+
|
|
807
|
+
|
|
808
|
+
class DateType(Enum):
|
|
809
|
+
DateTime = 1
|
|
810
|
+
StrictDate = 2
|
|
811
|
+
|
|
812
|
+
|
|
813
|
+
class DateTimeBucketExpression(Expression):
|
|
814
|
+
date: "Expression"
|
|
815
|
+
quantity: "Expression"
|
|
816
|
+
duration_unit: "StringLiteral"
|
|
817
|
+
date_type: DateType
|
|
818
|
+
|
|
819
|
+
def __init__(
|
|
820
|
+
self,
|
|
821
|
+
date: "Expression",
|
|
822
|
+
quantity: "Expression",
|
|
823
|
+
duration_unit: "StringLiteral",
|
|
824
|
+
date_type: DateType = DateType.DateTime,
|
|
825
|
+
) -> None:
|
|
826
|
+
super().__init__(_type="dateTimeBucketExpression")
|
|
827
|
+
self.date = date
|
|
828
|
+
self.quantity = quantity
|
|
829
|
+
self.duration_unit = duration_unit
|
|
830
|
+
self.date_type = date_type
|
|
831
|
+
|
|
832
|
+
|
|
833
|
+
class BitwiseNotExpression(Expression):
|
|
834
|
+
value: "Expression"
|
|
835
|
+
|
|
836
|
+
def __init__(
|
|
837
|
+
self,
|
|
838
|
+
value: "Expression",
|
|
839
|
+
) -> None:
|
|
840
|
+
super().__init__(_type="bitwiseNotExpression")
|
|
841
|
+
self.value = value
|
|
@@ -100,7 +100,7 @@ class AggregateFunction(PandasApiAppliedFunction):
|
|
|
100
100
|
|
|
101
101
|
if isinstance(self.__base_frame, PandasApiGroupbyTdsFrame):
|
|
102
102
|
columns_to_retain: PyLegendList[str] = [
|
|
103
|
-
db_extension.quote_identifier(x) for x in self.__base_frame.
|
|
103
|
+
db_extension.quote_identifier(x.get_name()) for x in self.__base_frame.get_grouping_columns()
|
|
104
104
|
]
|
|
105
105
|
new_cols_with_index: PyLegendList[PyLegendTuple[int, "SelectItem"]] = []
|
|
106
106
|
for col in new_query.select.selectItems:
|
|
@@ -127,8 +127,8 @@ class AggregateFunction(PandasApiAppliedFunction):
|
|
|
127
127
|
if isinstance(self.__base_frame, PandasApiGroupbyTdsFrame):
|
|
128
128
|
tds_row = PandasApiTdsRow.from_tds_frame("r", self.base_frame())
|
|
129
129
|
new_query.groupBy = [
|
|
130
|
-
(lambda x: x[c])(tds_row).to_sql_expression({"r": new_query}, config)
|
|
131
|
-
for c in self.__base_frame.
|
|
130
|
+
(lambda x: x[c.get_name()])(tds_row).to_sql_expression({"r": new_query}, config)
|
|
131
|
+
for c in self.__base_frame.get_grouping_columns()
|
|
132
132
|
]
|
|
133
133
|
|
|
134
134
|
return new_query
|
|
@@ -149,8 +149,8 @@ class AggregateFunction(PandasApiAppliedFunction):
|
|
|
149
149
|
|
|
150
150
|
if isinstance(self.__base_frame, PandasApiGroupbyTdsFrame):
|
|
151
151
|
group_strings = []
|
|
152
|
-
for
|
|
153
|
-
group_strings.append(escape_column_name(
|
|
152
|
+
for col in self.__base_frame.get_grouping_columns():
|
|
153
|
+
group_strings.append(escape_column_name(col.get_name()))
|
|
154
154
|
|
|
155
155
|
pure_expression = (
|
|
156
156
|
f"{self.base_frame().to_pure(config)}{config.separator(1)}" + f"->groupBy({config.separator(2)}"
|
|
@@ -182,7 +182,8 @@ class AggregateFunction(PandasApiAppliedFunction):
|
|
|
182
182
|
|
|
183
183
|
if isinstance(self.__base_frame, PandasApiGroupbyTdsFrame):
|
|
184
184
|
base_cols_map = {c.get_name(): c for c in self.base_frame().columns()}
|
|
185
|
-
for
|
|
185
|
+
for group_col in self.__base_frame.get_grouping_columns():
|
|
186
|
+
group_col_name = group_col.get_name()
|
|
186
187
|
if group_col_name in base_cols_map:
|
|
187
188
|
new_columns.append(base_cols_map[group_col_name].copy())
|
|
188
189
|
|
|
@@ -231,6 +232,10 @@ class AggregateFunction(PandasApiAppliedFunction):
|
|
|
231
232
|
|
|
232
233
|
tds_row = PandasApiTdsRow.from_tds_frame("r", self.base_frame())
|
|
233
234
|
|
|
235
|
+
group_cols: set[str] = set()
|
|
236
|
+
if isinstance(self.__base_frame, PandasApiGroupbyTdsFrame):
|
|
237
|
+
group_cols = set([col.get_name() for col in self.__base_frame.get_grouping_columns()])
|
|
238
|
+
|
|
234
239
|
for column_name, agg_input in normalized_func.items():
|
|
235
240
|
mapper_function: PyLegendCallable[[PandasApiTdsRow], PyLegendPrimitiveOrPythonPrimitive] = eval(
|
|
236
241
|
f'lambda r: r["{column_name}"]'
|
|
@@ -259,7 +264,12 @@ class AggregateFunction(PandasApiAppliedFunction):
|
|
|
259
264
|
normalized_agg_func = self.__normalize_agg_func_to_lambda_function(agg_input)
|
|
260
265
|
agg_result = normalized_agg_func(collection)
|
|
261
266
|
|
|
262
|
-
|
|
267
|
+
if column_name in group_cols:
|
|
268
|
+
alias = self._generate_column_alias(column_name, agg_input, 0)
|
|
269
|
+
else:
|
|
270
|
+
alias = column_name
|
|
271
|
+
|
|
272
|
+
self.__aggregates_list.append((alias, map_result, agg_result))
|
|
263
273
|
|
|
264
274
|
return True
|
|
265
275
|
|
|
@@ -274,13 +284,13 @@ class AggregateFunction(PandasApiAppliedFunction):
|
|
|
274
284
|
all_cols = [col.get_name() for col in self.base_frame().columns()]
|
|
275
285
|
|
|
276
286
|
if isinstance(self.__base_frame, PandasApiGroupbyTdsFrame):
|
|
277
|
-
group_cols = set(self.__base_frame.
|
|
287
|
+
group_cols = set([col.get_name() for col in self.__base_frame.get_grouping_columns()])
|
|
278
288
|
|
|
279
|
-
selected_cols = self.__base_frame.
|
|
289
|
+
selected_cols = self.__base_frame.get_selected_columns()
|
|
280
290
|
|
|
281
291
|
if selected_cols is not None:
|
|
282
|
-
validation_columns = selected_cols
|
|
283
|
-
default_broadcast_columns = selected_cols
|
|
292
|
+
validation_columns = [col.get_name() for col in selected_cols]
|
|
293
|
+
default_broadcast_columns = [col.get_name() for col in selected_cols]
|
|
284
294
|
else:
|
|
285
295
|
validation_columns = all_cols
|
|
286
296
|
default_broadcast_columns = [c for c in all_cols if c not in group_cols]
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# Copyright 2026 Goldman Sachs
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import TYPE_CHECKING
|
|
16
|
+
from pylegend._typing import (
|
|
17
|
+
PyLegendUnion,
|
|
18
|
+
PyLegendTuple,
|
|
19
|
+
PyLegendSequence,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_base_tds_frame import PandasApiBaseTdsFrame
|
|
24
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_tds_frame import PandasApiTdsFrame
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
__all__: PyLegendSequence[str] = [
|
|
28
|
+
"PandasApiIlocIndexer"
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class PandasApiIlocIndexer:
|
|
33
|
+
_frame: "PandasApiBaseTdsFrame"
|
|
34
|
+
|
|
35
|
+
def __init__(self, frame: "PandasApiBaseTdsFrame") -> None:
|
|
36
|
+
self._frame = frame
|
|
37
|
+
|
|
38
|
+
def __getitem__( # type: ignore
|
|
39
|
+
self,
|
|
40
|
+
key: PyLegendUnion[int, slice, PyLegendTuple[PyLegendUnion[int, slice], ...]]
|
|
41
|
+
) -> "PandasApiTdsFrame":
|
|
42
|
+
if isinstance(key, tuple):
|
|
43
|
+
if len(key) > 2:
|
|
44
|
+
raise IndexError("Too many indexers")
|
|
45
|
+
elif len(key) == 1:
|
|
46
|
+
rows, cols = key[0], slice(None, None, None)
|
|
47
|
+
else:
|
|
48
|
+
rows, cols = key # type: ignore
|
|
49
|
+
else:
|
|
50
|
+
rows, cols = key, slice(None, None, None)
|
|
51
|
+
|
|
52
|
+
# Row selection
|
|
53
|
+
row_frame = self._handle_row_selection(rows)
|
|
54
|
+
|
|
55
|
+
# Column selection
|
|
56
|
+
return self._handle_column_selection(row_frame, cols)
|
|
57
|
+
|
|
58
|
+
def _handle_row_selection(self, rows: PyLegendUnion[int, slice]) -> "PandasApiTdsFrame": # type: ignore
|
|
59
|
+
if isinstance(rows, slice):
|
|
60
|
+
if rows.step is not None and rows.step != 1:
|
|
61
|
+
raise NotImplementedError("iloc with slice step other than 1 is not supported yet in Pandas Api")
|
|
62
|
+
|
|
63
|
+
start = rows.start
|
|
64
|
+
stop = rows.stop
|
|
65
|
+
after = stop - 1 if stop is not None else None
|
|
66
|
+
return self._frame.truncate(before=start, after=after)
|
|
67
|
+
|
|
68
|
+
elif isinstance(rows, int):
|
|
69
|
+
return self._frame.truncate(before=rows, after=rows)
|
|
70
|
+
|
|
71
|
+
else:
|
|
72
|
+
raise NotImplementedError(
|
|
73
|
+
f"iloc supports integer, slice, or tuple of these, but got indexer of type: {type(rows)}"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
def _handle_column_selection( # type: ignore
|
|
77
|
+
self,
|
|
78
|
+
frame: "PandasApiTdsFrame",
|
|
79
|
+
cols: PyLegendUnion[int, slice]
|
|
80
|
+
) -> "PandasApiTdsFrame":
|
|
81
|
+
if isinstance(cols, slice):
|
|
82
|
+
if cols.step is not None and cols.step != 1:
|
|
83
|
+
raise NotImplementedError("iloc with slice step other than 1 is not supported yet in Pandas Api")
|
|
84
|
+
|
|
85
|
+
all_columns = [c.get_name() for c in frame.columns()]
|
|
86
|
+
selected_columns = all_columns[cols]
|
|
87
|
+
return frame.filter(items=selected_columns)
|
|
88
|
+
|
|
89
|
+
elif isinstance(cols, int):
|
|
90
|
+
all_columns = [c.get_name() for c in frame.columns()]
|
|
91
|
+
if not -len(all_columns) <= cols < len(all_columns):
|
|
92
|
+
raise IndexError("single positional indexer is out-of-bounds")
|
|
93
|
+
selected_column = all_columns[cols]
|
|
94
|
+
return frame.filter(items=[selected_column])
|
|
95
|
+
|
|
96
|
+
else:
|
|
97
|
+
raise NotImplementedError(
|
|
98
|
+
f"iloc supports integer, slice, or tuple of these, but got indexer of type: {type(cols)}"
|
|
99
|
+
)
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# Copyright 2026 Goldman Sachs
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import TYPE_CHECKING
|
|
16
|
+
|
|
17
|
+
import pandas as pd
|
|
18
|
+
|
|
19
|
+
from pylegend._typing import (
|
|
20
|
+
PyLegendUnion,
|
|
21
|
+
PyLegendSequence,
|
|
22
|
+
PyLegendTuple,
|
|
23
|
+
PyLegendCallable
|
|
24
|
+
)
|
|
25
|
+
from pylegend.core.language import PyLegendBoolean
|
|
26
|
+
from pylegend.core.tds.pandas_api.frames.functions.filtering import PandasApiFilteringFunction
|
|
27
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import PandasApiAppliedFunctionTdsFrame
|
|
28
|
+
|
|
29
|
+
if TYPE_CHECKING:
|
|
30
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_base_tds_frame import PandasApiBaseTdsFrame
|
|
31
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_tds_frame import PandasApiTdsFrame
|
|
32
|
+
|
|
33
|
+
__all__: PyLegendSequence[str] = [
|
|
34
|
+
"PandasApiLocIndexer"
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class PandasApiLocIndexer:
|
|
39
|
+
_frame: "PandasApiBaseTdsFrame"
|
|
40
|
+
|
|
41
|
+
def __init__(self, frame: "PandasApiBaseTdsFrame") -> None:
|
|
42
|
+
self._frame = frame
|
|
43
|
+
|
|
44
|
+
def __getitem__( # type: ignore
|
|
45
|
+
self,
|
|
46
|
+
key: PyLegendUnion[
|
|
47
|
+
slice,
|
|
48
|
+
PyLegendBoolean,
|
|
49
|
+
PyLegendCallable[["PandasApiBaseTdsFrame"], PyLegendBoolean],
|
|
50
|
+
PyLegendTuple[
|
|
51
|
+
PyLegendUnion[slice, PyLegendBoolean, PyLegendCallable[["PandasApiBaseTdsFrame"], PyLegendBoolean]],
|
|
52
|
+
PyLegendUnion[str, slice, PyLegendSequence[str], PyLegendSequence[bool]]
|
|
53
|
+
]
|
|
54
|
+
]
|
|
55
|
+
) -> "PandasApiTdsFrame":
|
|
56
|
+
rows: PyLegendUnion[ # type: ignore
|
|
57
|
+
slice,
|
|
58
|
+
PyLegendBoolean,
|
|
59
|
+
PyLegendCallable[["PandasApiBaseTdsFrame"], PyLegendBoolean]
|
|
60
|
+
]
|
|
61
|
+
cols: PyLegendUnion[str, slice, PyLegendSequence[str], PyLegendSequence[bool]] # type: ignore
|
|
62
|
+
|
|
63
|
+
if isinstance(key, tuple):
|
|
64
|
+
if len(key) == 1:
|
|
65
|
+
rows, cols = key[0], slice(None, None, None)
|
|
66
|
+
elif len(key) == 2:
|
|
67
|
+
rows, cols = key[0], key[1]
|
|
68
|
+
else:
|
|
69
|
+
raise IndexError("Too many indexers")
|
|
70
|
+
else:
|
|
71
|
+
rows, cols = key, slice(None, None, None)
|
|
72
|
+
|
|
73
|
+
row_frame = self._handle_row_selection(rows)
|
|
74
|
+
return self._handle_column_selection(row_frame, cols)
|
|
75
|
+
|
|
76
|
+
def _handle_row_selection( # type: ignore
|
|
77
|
+
self,
|
|
78
|
+
rows: PyLegendUnion[slice, PyLegendBoolean, PyLegendCallable[["PandasApiBaseTdsFrame"], PyLegendBoolean]]
|
|
79
|
+
) -> "PandasApiTdsFrame":
|
|
80
|
+
if isinstance(rows, slice):
|
|
81
|
+
if rows.start is None and rows.stop is None and rows.step is None:
|
|
82
|
+
return self._frame
|
|
83
|
+
else:
|
|
84
|
+
raise TypeError(
|
|
85
|
+
"loc supports only ':' for row slicing. "
|
|
86
|
+
"Label-based slicing for rows is not supported."
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
if isinstance(rows, PyLegendBoolean):
|
|
90
|
+
return PandasApiAppliedFunctionTdsFrame(
|
|
91
|
+
PandasApiFilteringFunction(self._frame, filter_expr=rows)
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
if callable(rows):
|
|
95
|
+
new_key = rows(self._frame)
|
|
96
|
+
return self._handle_row_selection(new_key)
|
|
97
|
+
|
|
98
|
+
raise TypeError(f"Unsupported key type for .loc row selection: {type(rows)}")
|
|
99
|
+
|
|
100
|
+
def _handle_column_selection( # type: ignore
|
|
101
|
+
self,
|
|
102
|
+
frame: "PandasApiTdsFrame",
|
|
103
|
+
cols: PyLegendUnion[str, slice, PyLegendSequence[str], PyLegendSequence[bool]]
|
|
104
|
+
) -> "PandasApiTdsFrame":
|
|
105
|
+
if isinstance(cols, slice) and cols.start is None and cols.stop is None and cols.step is None:
|
|
106
|
+
return frame
|
|
107
|
+
|
|
108
|
+
if isinstance(cols, str):
|
|
109
|
+
return frame.filter(items=[cols])
|
|
110
|
+
|
|
111
|
+
if isinstance(cols, (list, tuple)):
|
|
112
|
+
all_columns = [c.get_name() for c in frame.columns()]
|
|
113
|
+
is_boolean_list = all(isinstance(k, bool) for k in cols)
|
|
114
|
+
|
|
115
|
+
if is_boolean_list:
|
|
116
|
+
if len(cols) != len(all_columns):
|
|
117
|
+
raise IndexError(f"Boolean index has wrong length: {len(cols)} instead of {len(all_columns)}")
|
|
118
|
+
selected_columns = [col for col, select in zip(all_columns, cols) if select]
|
|
119
|
+
return frame.filter(items=selected_columns)
|
|
120
|
+
else:
|
|
121
|
+
missing_cols = [c for c in cols if c not in all_columns]
|
|
122
|
+
if missing_cols:
|
|
123
|
+
raise KeyError(f"{missing_cols} not in index")
|
|
124
|
+
return frame.filter(items=cols) # type: ignore
|
|
125
|
+
|
|
126
|
+
if isinstance(cols, slice):
|
|
127
|
+
all_columns = [c.get_name() for c in frame.columns()]
|
|
128
|
+
pd_index = pd.Index(all_columns)
|
|
129
|
+
|
|
130
|
+
slicer = pd_index.slice_indexer(start=cols.start, end=cols.stop, step=cols.step)
|
|
131
|
+
selected_columns = pd_index[slicer].tolist()
|
|
132
|
+
if not selected_columns:
|
|
133
|
+
return frame.head(0)
|
|
134
|
+
return frame.filter(items=selected_columns)
|
|
135
|
+
|
|
136
|
+
raise TypeError(f"Unsupported key type for .loc column selection: {type(cols)}")
|
|
@@ -85,3 +85,6 @@ class PandasApiAppliedFunctionTdsFrame(PandasApiBaseTdsFrame):
|
|
|
85
85
|
for x in [self.__applied_function.base_frame()] + self.__applied_function.tds_frame_parameters()
|
|
86
86
|
for y in x.get_all_tds_frames()
|
|
87
87
|
] + [self]
|
|
88
|
+
|
|
89
|
+
def get_applied_function(self) -> PandasApiAppliedFunction:
|
|
90
|
+
return self.__applied_function
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
import copy
|
|
16
16
|
from abc import ABCMeta, abstractmethod
|
|
17
17
|
from datetime import date, datetime
|
|
18
|
-
from typing import TYPE_CHECKING
|
|
18
|
+
from typing import TYPE_CHECKING, overload
|
|
19
19
|
|
|
20
20
|
from typing_extensions import Concatenate
|
|
21
21
|
|
|
@@ -69,6 +69,8 @@ from pylegend.extensions.tds.result_handler import (
|
|
|
69
69
|
if TYPE_CHECKING:
|
|
70
70
|
from pylegend.core.language.pandas_api.pandas_api_series import Series
|
|
71
71
|
from pylegend.core.tds.pandas_api.frames.pandas_api_groupby_tds_frame import PandasApiGroupbyTdsFrame
|
|
72
|
+
from pylegend.core.tds.pandas_api.frames.functions.iloc import PandasApiIlocIndexer
|
|
73
|
+
from pylegend.core.tds.pandas_api.frames.functions.loc import PandasApiLocIndexer
|
|
72
74
|
|
|
73
75
|
__all__: PyLegendSequence[str] = [
|
|
74
76
|
"PandasApiBaseTdsFrame"
|
|
@@ -94,6 +96,14 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
94
96
|
return [c.copy() for c in self.__columns]
|
|
95
97
|
return self._transformed_frame.columns()
|
|
96
98
|
|
|
99
|
+
@overload # type: ignore[override]
|
|
100
|
+
def __getitem__(self, key: str) -> "Series":
|
|
101
|
+
...
|
|
102
|
+
|
|
103
|
+
@overload
|
|
104
|
+
def __getitem__(self, key: PyLegendList[str]) -> "PandasApiTdsFrame":
|
|
105
|
+
...
|
|
106
|
+
|
|
97
107
|
def __getitem__(
|
|
98
108
|
self,
|
|
99
109
|
key: PyLegendUnion[str, PyLegendList[str], PyLegendBoolean]
|
|
@@ -663,7 +673,7 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
663
673
|
# Compute row callable via func on the Series
|
|
664
674
|
def _row_callable(
|
|
665
675
|
_row: PandasApiTdsRow,
|
|
666
|
-
_s: Series = series,
|
|
676
|
+
_s: Series = series,
|
|
667
677
|
_a: PyLegendTuple[PyLegendPrimitiveOrPythonPrimitive, ...] = args,
|
|
668
678
|
_k: PyLegendPrimitiveOrPythonPrimitive = kwargs # type: ignore
|
|
669
679
|
) -> PyLegendPrimitiveOrPythonPrimitive:
|
|
@@ -675,6 +685,44 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
|
|
|
675
685
|
AssignFunction(self, col_definitions=col_definitions) # type: ignore
|
|
676
686
|
)
|
|
677
687
|
|
|
688
|
+
@property
|
|
689
|
+
def iloc(self) -> "PandasApiIlocIndexer":
|
|
690
|
+
"""
|
|
691
|
+
Purely integer-location based indexing for selection by position.
|
|
692
|
+
.iloc[] is primarily integer position based (from 0 to length-1 of the axis).
|
|
693
|
+
|
|
694
|
+
Allowed inputs are:
|
|
695
|
+
- An integer, e.g. 5.
|
|
696
|
+
- A slice object with ints, e.g. 1:7.
|
|
697
|
+
- A tuple of row and column indexes, e.g., (slice(1, 5), slice(0, 2))
|
|
698
|
+
|
|
699
|
+
Other pandas iloc features such as list of integers, boolean arrays, and callables
|
|
700
|
+
are not supported and will raise a NotImplementedError.
|
|
701
|
+
"""
|
|
702
|
+
from pylegend.core.tds.pandas_api.frames.functions.iloc import PandasApiIlocIndexer
|
|
703
|
+
return PandasApiIlocIndexer(self)
|
|
704
|
+
|
|
705
|
+
@property
|
|
706
|
+
def loc(self) -> "PandasApiLocIndexer":
|
|
707
|
+
"""
|
|
708
|
+
Access a group of rows and columns by label(s) or a boolean array.
|
|
709
|
+
.loc[] is primarily label based, but may also be used with a boolean array.
|
|
710
|
+
|
|
711
|
+
Allowed inputs are:
|
|
712
|
+
- A single label, e.g. 5 or 'a', (note that 5 is interpreted as a
|
|
713
|
+
label of the index, not as an integer position along the index).
|
|
714
|
+
- A list or array of labels, e.g. ['a', 'b', 'c'].
|
|
715
|
+
- A slice object with labels, e.g. 'a':'f'.
|
|
716
|
+
- A boolean array of the same length as the axis being sliced.
|
|
717
|
+
- A callable function with one argument (the calling Series or
|
|
718
|
+
DataFrame) and that returns valid output for indexing (one of the above).
|
|
719
|
+
|
|
720
|
+
Currently, for row selection, only callable function or complete slice are supported.
|
|
721
|
+
For column selection, string labels, lists of string labels, and slices of string labels are supported.
|
|
722
|
+
"""
|
|
723
|
+
from pylegend.core.tds.pandas_api.frames.functions.loc import PandasApiLocIndexer
|
|
724
|
+
return PandasApiLocIndexer(self)
|
|
725
|
+
|
|
678
726
|
def head(self, n: int = 5) -> "PandasApiTdsFrame":
|
|
679
727
|
"""
|
|
680
728
|
Return the first `n` rows by calling truncate on rows.
|