pylegend 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylegend/_typing.py +6 -0
- pylegend/core/database/sql_to_string/db_extension.py +35 -6
- pylegend/core/language/pandas_api/__init__.py +13 -0
- pylegend/core/language/pandas_api/pandas_api_aggregate_specification.py +54 -0
- pylegend/core/language/pandas_api/pandas_api_custom_expressions.py +85 -0
- pylegend/core/language/pandas_api/pandas_api_series.py +174 -0
- pylegend/core/language/pandas_api/pandas_api_tds_row.py +74 -0
- pylegend/core/language/shared/operations/integer_operation_expressions.py +35 -0
- pylegend/core/language/shared/operations/nary_expression.py +104 -0
- pylegend/core/language/shared/operations/primitive_operation_expressions.py +30 -0
- pylegend/core/language/shared/operations/string_operation_expressions.py +624 -1
- pylegend/core/language/shared/primitives/integer.py +6 -0
- pylegend/core/language/shared/primitives/primitive.py +6 -0
- pylegend/core/language/shared/primitives/string.py +129 -1
- pylegend/core/sql/metamodel.py +3 -1
- pylegend/core/sql/metamodel_extension.py +18 -0
- pylegend/core/tds/pandas_api/frames/functions/aggregate_function.py +316 -0
- pylegend/core/tds/pandas_api/frames/functions/assign_function.py +20 -15
- pylegend/core/tds/pandas_api/frames/functions/drop.py +171 -0
- pylegend/core/tds/pandas_api/frames/functions/filter.py +193 -0
- pylegend/core/tds/pandas_api/frames/functions/filtering.py +85 -0
- pylegend/core/tds/pandas_api/frames/functions/sort_values_function.py +189 -0
- pylegend/core/tds/pandas_api/frames/functions/truncate_function.py +120 -0
- pylegend/core/tds/pandas_api/frames/pandas_api_applied_function_tds_frame.py +5 -1
- pylegend/core/tds/pandas_api/frames/pandas_api_base_tds_frame.py +204 -7
- pylegend/core/tds/pandas_api/frames/pandas_api_input_tds_frame.py +5 -3
- pylegend/core/tds/pandas_api/frames/pandas_api_tds_frame.py +90 -3
- {pylegend-0.8.0.dist-info → pylegend-0.9.0.dist-info}/METADATA +1 -1
- {pylegend-0.8.0.dist-info → pylegend-0.9.0.dist-info}/RECORD +33 -21
- {pylegend-0.8.0.dist-info → pylegend-0.9.0.dist-info}/WHEEL +0 -0
- {pylegend-0.8.0.dist-info → pylegend-0.9.0.dist-info}/licenses/LICENSE +0 -0
- {pylegend-0.8.0.dist-info → pylegend-0.9.0.dist-info}/licenses/LICENSE.spdx +0 -0
- {pylegend-0.8.0.dist-info → pylegend-0.9.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# Copyright 2025 Goldman Sachs
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from pylegend._typing import (
|
|
16
|
+
PyLegendList,
|
|
17
|
+
PyLegendSet,
|
|
18
|
+
PyLegendSequence,
|
|
19
|
+
PyLegendUnion,
|
|
20
|
+
PyLegendOptional
|
|
21
|
+
)
|
|
22
|
+
from pylegend.core.language.shared.primitives.boolean import PyLegendBoolean
|
|
23
|
+
from pylegend.core.language.shared.primitives.integer import PyLegendInteger
|
|
24
|
+
from pylegend.core.sql.metamodel import (
|
|
25
|
+
QuerySpecification
|
|
26
|
+
)
|
|
27
|
+
from pylegend.core.tds.pandas_api.frames.functions.filter import PandasApiFilterFunction
|
|
28
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import PandasApiAppliedFunction
|
|
29
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_base_tds_frame import PandasApiBaseTdsFrame
|
|
30
|
+
from pylegend.core.tds.tds_column import TdsColumn
|
|
31
|
+
from pylegend.core.tds.tds_frame import FrameToPureConfig, FrameToSqlConfig
|
|
32
|
+
|
|
33
|
+
__all__: PyLegendSequence[str] = [
|
|
34
|
+
"PandasApiDropFunction"
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class PandasApiDropFunction(PandasApiAppliedFunction):
|
|
39
|
+
__base_frame: PandasApiBaseTdsFrame
|
|
40
|
+
__labels: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str], PyLegendSet[str]]]
|
|
41
|
+
__axis: PyLegendUnion[str, int, PyLegendInteger]
|
|
42
|
+
__index: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str], PyLegendSet[str]]]
|
|
43
|
+
__columns: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str], PyLegendSet[str]]]
|
|
44
|
+
__level: PyLegendOptional[PyLegendUnion[int, PyLegendInteger, str]]
|
|
45
|
+
__inplace: PyLegendUnion[bool, PyLegendBoolean]
|
|
46
|
+
__errors: str
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def name(cls) -> str:
|
|
50
|
+
return "drop" # pragma: no cover
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
base_frame: PandasApiBaseTdsFrame,
|
|
55
|
+
labels: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str], PyLegendSet[str]]],
|
|
56
|
+
axis: PyLegendUnion[str, int, PyLegendInteger],
|
|
57
|
+
index: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str], PyLegendSet[str]]],
|
|
58
|
+
columns: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str], PyLegendSet[str]]],
|
|
59
|
+
level: PyLegendOptional[PyLegendUnion[int, PyLegendInteger, str]],
|
|
60
|
+
inplace: PyLegendUnion[bool, PyLegendBoolean],
|
|
61
|
+
errors: str
|
|
62
|
+
) -> None:
|
|
63
|
+
self.__base_frame = base_frame
|
|
64
|
+
self.__labels = labels
|
|
65
|
+
self.__axis = axis
|
|
66
|
+
self.__index = index
|
|
67
|
+
self.__columns = columns
|
|
68
|
+
self.__level = level
|
|
69
|
+
self.__inplace = inplace
|
|
70
|
+
self.__errors = errors
|
|
71
|
+
|
|
72
|
+
def to_sql(self, config: FrameToSqlConfig) -> QuerySpecification:
|
|
73
|
+
base_cols = [c.get_name() for c in self.__base_frame.columns()]
|
|
74
|
+
|
|
75
|
+
if self.__errors == "raise":
|
|
76
|
+
not_found = [col for col in self.__columns if col not in base_cols] # type: ignore
|
|
77
|
+
if not_found:
|
|
78
|
+
raise KeyError(f"{not_found} not found in axis")
|
|
79
|
+
|
|
80
|
+
columns_to_retain = [col for col in base_cols if col not in self.__columns] # type: ignore
|
|
81
|
+
filter_func = PandasApiFilterFunction(
|
|
82
|
+
base_frame=self.__base_frame,
|
|
83
|
+
items=columns_to_retain,
|
|
84
|
+
like=None,
|
|
85
|
+
regex=None,
|
|
86
|
+
axis=1
|
|
87
|
+
)
|
|
88
|
+
return filter_func.to_sql(config)
|
|
89
|
+
|
|
90
|
+
def to_pure(self, config: FrameToPureConfig) -> str:
|
|
91
|
+
base_cols = [c.get_name() for c in self.__base_frame.columns()]
|
|
92
|
+
if self.__errors == "raise":
|
|
93
|
+
not_found = [col for col in self.__columns if col not in base_cols] # type: ignore
|
|
94
|
+
if not_found:
|
|
95
|
+
raise KeyError(f"{not_found} not found in axis")
|
|
96
|
+
|
|
97
|
+
columns_to_retain = [col for col in base_cols if col not in self.__columns] # type: ignore
|
|
98
|
+
filter_func = PandasApiFilterFunction(
|
|
99
|
+
base_frame=self.__base_frame,
|
|
100
|
+
items=columns_to_retain,
|
|
101
|
+
like=None,
|
|
102
|
+
regex=None,
|
|
103
|
+
axis=1
|
|
104
|
+
)
|
|
105
|
+
return filter_func.to_pure(config)
|
|
106
|
+
|
|
107
|
+
def base_frame(self) -> PandasApiBaseTdsFrame:
|
|
108
|
+
return self.__base_frame
|
|
109
|
+
|
|
110
|
+
def tds_frame_parameters(self) -> PyLegendList["PandasApiBaseTdsFrame"]:
|
|
111
|
+
return []
|
|
112
|
+
|
|
113
|
+
def calculate_columns(self) -> PyLegendSequence["TdsColumn"]:
|
|
114
|
+
base_cols = [c.copy() for c in self.__base_frame.columns()]
|
|
115
|
+
if self.__columns is not None:
|
|
116
|
+
new_cols = []
|
|
117
|
+
for col in base_cols:
|
|
118
|
+
if col.get_name() not in self.__columns:
|
|
119
|
+
new_cols.append(col.copy())
|
|
120
|
+
return new_cols
|
|
121
|
+
return base_cols # pragma: no cover
|
|
122
|
+
|
|
123
|
+
def validate(self) -> bool:
|
|
124
|
+
valid_paramters: int = 0
|
|
125
|
+
if self.__axis is not None:
|
|
126
|
+
if isinstance(self.__axis, (str, int, PyLegendInteger)):
|
|
127
|
+
if self.__axis != 1 and self.__axis != "columns":
|
|
128
|
+
if self.__axis == 0 or self.__axis == "index":
|
|
129
|
+
raise NotImplementedError(
|
|
130
|
+
f"Axis {self.__axis} is not supported for 'drop' function in PandasApi")
|
|
131
|
+
else:
|
|
132
|
+
raise ValueError(f"No axis named {self.__axis} for object type Tds DataFrame")
|
|
133
|
+
else:
|
|
134
|
+
raise TypeError(f"No axis named {self.__axis} for object type Tds DataFrame") # pragma: no cover
|
|
135
|
+
if self.__level is not None:
|
|
136
|
+
raise NotImplementedError("'level' parameter is not supported for 'drop' function in PandasApi")
|
|
137
|
+
|
|
138
|
+
if self.__index is not None:
|
|
139
|
+
raise NotImplementedError("'index' parameter is not supported for 'drop' function in PandasApi")
|
|
140
|
+
|
|
141
|
+
if self.__labels is not None:
|
|
142
|
+
valid_paramters += 1
|
|
143
|
+
|
|
144
|
+
if self.__columns is None:
|
|
145
|
+
self.__columns = self.__labels
|
|
146
|
+
else:
|
|
147
|
+
raise ValueError("Cannot specify both 'labels' and 'columns'")
|
|
148
|
+
|
|
149
|
+
if self.__columns is not None:
|
|
150
|
+
def _normalize_columns(columns): # type: ignore
|
|
151
|
+
if columns is None:
|
|
152
|
+
return [] # pragma: no cover
|
|
153
|
+
if isinstance(columns, str):
|
|
154
|
+
return [columns]
|
|
155
|
+
if isinstance(columns, (PyLegendSequence, PyLegendSet)):
|
|
156
|
+
return list(columns)
|
|
157
|
+
raise TypeError(f"Unsupported type for columns: {type(columns)}")
|
|
158
|
+
|
|
159
|
+
valid_paramters += 1
|
|
160
|
+
self.__columns = _normalize_columns(self.__columns) # type: ignore
|
|
161
|
+
|
|
162
|
+
if isinstance(self.__inplace, (bool, PyLegendBoolean)):
|
|
163
|
+
if self.__inplace is False:
|
|
164
|
+
raise NotImplementedError(f"Only inplace=True is supported. Got inplace={self.__inplace!r}")
|
|
165
|
+
else:
|
|
166
|
+
raise TypeError(f"Inplace must be True. Got inplace={self.__inplace!r}") # pragma: no cover
|
|
167
|
+
|
|
168
|
+
if valid_paramters == 0:
|
|
169
|
+
raise ValueError("Need to specify at least one of 'labels' or 'columns'")
|
|
170
|
+
|
|
171
|
+
return True
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
# Copyright 2025 Goldman Sachs
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import re
|
|
16
|
+
|
|
17
|
+
from pylegend._typing import (
|
|
18
|
+
PyLegendUnion,
|
|
19
|
+
PyLegendOptional,
|
|
20
|
+
PyLegendSequence,
|
|
21
|
+
PyLegendList,
|
|
22
|
+
PyLegendTuple
|
|
23
|
+
)
|
|
24
|
+
from pylegend.core.language import (
|
|
25
|
+
PyLegendInteger,
|
|
26
|
+
)
|
|
27
|
+
from pylegend.core.language.shared.helpers import escape_column_name
|
|
28
|
+
from pylegend.core.sql.metamodel import (
|
|
29
|
+
QuerySpecification,
|
|
30
|
+
SingleColumn,
|
|
31
|
+
SelectItem
|
|
32
|
+
)
|
|
33
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import (
|
|
34
|
+
PandasApiAppliedFunction,
|
|
35
|
+
)
|
|
36
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_base_tds_frame import (
|
|
37
|
+
PandasApiBaseTdsFrame,
|
|
38
|
+
)
|
|
39
|
+
from pylegend.core.tds.sql_query_helpers import copy_query, create_sub_query
|
|
40
|
+
from pylegend.core.tds.tds_column import TdsColumn
|
|
41
|
+
from pylegend.core.tds.tds_frame import FrameToSqlConfig, FrameToPureConfig
|
|
42
|
+
|
|
43
|
+
__all__: PyLegendSequence[str] = ["PandasApiFilterFunction"]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class PandasApiFilterFunction(PandasApiAppliedFunction):
|
|
47
|
+
__base_frame: PandasApiBaseTdsFrame
|
|
48
|
+
__items: PyLegendOptional[PyLegendList[str]]
|
|
49
|
+
__like: PyLegendOptional[str]
|
|
50
|
+
__regex: PyLegendOptional[str]
|
|
51
|
+
__axis: PyLegendOptional[PyLegendUnion[str, int, PyLegendInteger]]
|
|
52
|
+
|
|
53
|
+
@classmethod
|
|
54
|
+
def name(cls) -> str:
|
|
55
|
+
return "filter" # pragma: no cover
|
|
56
|
+
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
base_frame: PandasApiBaseTdsFrame,
|
|
60
|
+
items: PyLegendOptional[PyLegendList[str]],
|
|
61
|
+
like: PyLegendOptional[str],
|
|
62
|
+
regex: PyLegendOptional[str],
|
|
63
|
+
axis: PyLegendOptional[PyLegendUnion[str, int, PyLegendInteger]],
|
|
64
|
+
) -> None:
|
|
65
|
+
self.__base_frame = base_frame
|
|
66
|
+
self.__items = items
|
|
67
|
+
self.__like = like
|
|
68
|
+
self.__regex = regex
|
|
69
|
+
self.__axis = 1 if axis is None else axis
|
|
70
|
+
|
|
71
|
+
def __get_desired_columns(
|
|
72
|
+
self, col_names: PyLegendSequence[str]
|
|
73
|
+
) -> PyLegendSequence[str]:
|
|
74
|
+
if self.__items is not None:
|
|
75
|
+
return self.__items
|
|
76
|
+
elif self.__like is not None:
|
|
77
|
+
return [col for col in col_names if self.__like in col]
|
|
78
|
+
elif self.__regex is not None:
|
|
79
|
+
regex_pattern = re.compile(self.__regex)
|
|
80
|
+
return [col for col in col_names if regex_pattern.search(col)]
|
|
81
|
+
|
|
82
|
+
return [] # pragma: no cover
|
|
83
|
+
|
|
84
|
+
def to_sql(self, config: FrameToSqlConfig) -> QuerySpecification:
|
|
85
|
+
base_query = self.__base_frame.to_sql_query_object(config)
|
|
86
|
+
db_extension = config.sql_to_string_generator().get_db_extension()
|
|
87
|
+
columns_to_retain = [db_extension.quote_identifier(x) for x in
|
|
88
|
+
self.__get_desired_columns([c.get_name() for c in self.__base_frame.columns()])]
|
|
89
|
+
|
|
90
|
+
sub_query_required = (
|
|
91
|
+
len(base_query.groupBy) > 0 or
|
|
92
|
+
len(base_query.orderBy) > 0 or
|
|
93
|
+
base_query.having is not None or
|
|
94
|
+
base_query.select.distinct
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
if sub_query_required:
|
|
98
|
+
new_query = create_sub_query(base_query, config, "root", columns_to_retain=columns_to_retain)
|
|
99
|
+
return new_query
|
|
100
|
+
else:
|
|
101
|
+
new_cols_with_index: PyLegendList[PyLegendTuple[int, SelectItem]] = []
|
|
102
|
+
for col in base_query.select.selectItems:
|
|
103
|
+
if not isinstance(col, SingleColumn):
|
|
104
|
+
raise ValueError("Select operation not supported for queries with columns other than SingleColumn")
|
|
105
|
+
if col.alias is None:
|
|
106
|
+
raise ValueError("Select operation not supported for queries with SingleColumns with missing alias")
|
|
107
|
+
if col.alias in columns_to_retain:
|
|
108
|
+
new_cols_with_index.append((columns_to_retain.index(col.alias), col))
|
|
109
|
+
|
|
110
|
+
new_select_items = [y[1] for y in sorted(new_cols_with_index, key=lambda x: x[0])]
|
|
111
|
+
new_query = copy_query(base_query)
|
|
112
|
+
new_query.select.selectItems = new_select_items
|
|
113
|
+
return new_query
|
|
114
|
+
|
|
115
|
+
def to_pure(self, config: FrameToPureConfig) -> str:
|
|
116
|
+
col_names = [c.get_name() for c in self.__base_frame.columns()]
|
|
117
|
+
desired_columns = self.__get_desired_columns(col_names)
|
|
118
|
+
escaped_columns = [escape_column_name(col_name) for col_name in desired_columns]
|
|
119
|
+
return (
|
|
120
|
+
f"{self.__base_frame.to_pure(config)}{config.separator(1)}"
|
|
121
|
+
f"->select(~[{', '.join(escaped_columns)}])"
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
def base_frame(self) -> PandasApiBaseTdsFrame:
|
|
125
|
+
return self.__base_frame
|
|
126
|
+
|
|
127
|
+
def tds_frame_parameters(self) -> PyLegendList["PandasApiBaseTdsFrame"]:
|
|
128
|
+
return []
|
|
129
|
+
|
|
130
|
+
def calculate_columns(self) -> PyLegendSequence["TdsColumn"]:
|
|
131
|
+
base_cols = [c.copy() for c in self.__base_frame.columns()]
|
|
132
|
+
desired_col_names = self.__get_desired_columns([c.get_name() for c in base_cols])
|
|
133
|
+
return [
|
|
134
|
+
base_col.copy()
|
|
135
|
+
for base_col in base_cols
|
|
136
|
+
if base_col.get_name() in desired_col_names
|
|
137
|
+
]
|
|
138
|
+
|
|
139
|
+
def validate(self) -> bool:
|
|
140
|
+
mutual_exclusion = sum(
|
|
141
|
+
[
|
|
142
|
+
self.__items is not None,
|
|
143
|
+
self.__like is not None,
|
|
144
|
+
self.__regex is not None,
|
|
145
|
+
]
|
|
146
|
+
)
|
|
147
|
+
if mutual_exclusion > 1:
|
|
148
|
+
raise TypeError(
|
|
149
|
+
"Keyword arguments `items`, `like`, or `regex` are mutually exclusive"
|
|
150
|
+
)
|
|
151
|
+
if mutual_exclusion == 0:
|
|
152
|
+
raise TypeError("Must pass either `items`, `like`, or `regex`")
|
|
153
|
+
|
|
154
|
+
base_cols = [c.get_name() for c in self.__base_frame.columns()]
|
|
155
|
+
if self.__items is not None:
|
|
156
|
+
if not isinstance(self.__items, (list, PyLegendList)):
|
|
157
|
+
raise TypeError(
|
|
158
|
+
f"Index(...) must be called with a collection, got '{self.__items}'"
|
|
159
|
+
)
|
|
160
|
+
invalid_cols = [item for item in self.__items if item not in base_cols]
|
|
161
|
+
if invalid_cols:
|
|
162
|
+
raise ValueError(
|
|
163
|
+
f"Columns {invalid_cols} in `filter` items list do not exist. Available: {base_cols}"
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
if self.__like is not None:
|
|
167
|
+
if not isinstance(self.__like, str):
|
|
168
|
+
raise TypeError(f"'like' must be a string, got {type(self.__like)}")
|
|
169
|
+
if not any(self.__like in col for col in base_cols):
|
|
170
|
+
raise ValueError(
|
|
171
|
+
f"No columns match the pattern '{self.__like}'. Available: {base_cols}"
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
if self.__regex is not None:
|
|
175
|
+
if not isinstance(self.__regex, str):
|
|
176
|
+
raise TypeError(f"'regex' must be a string, got {type(self.__regex)}")
|
|
177
|
+
try:
|
|
178
|
+
regex_pattern = re.compile(self.__regex)
|
|
179
|
+
except re.error as e:
|
|
180
|
+
raise ValueError(f"Invalid regex pattern '{self.__regex}': {e}")
|
|
181
|
+
if not any(regex_pattern.search(col) for col in base_cols):
|
|
182
|
+
raise ValueError(
|
|
183
|
+
f"No columns match the regex '{self.__regex}'. Available: {base_cols}"
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
if not isinstance(
|
|
187
|
+
self.__axis, (str, int, PyLegendInteger)
|
|
188
|
+
) or self.__axis not in [1, "columns"]:
|
|
189
|
+
raise ValueError(
|
|
190
|
+
f"Unsupported axis value: {self.__axis}. Expected 1 or 'columns'"
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
return True
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# Copyright 2025 Goldman Sachs
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from pylegend._typing import (
|
|
16
|
+
PyLegendSequence,
|
|
17
|
+
PyLegendList
|
|
18
|
+
)
|
|
19
|
+
from pylegend.core.language import PyLegendBoolean
|
|
20
|
+
from pylegend.core.sql.metamodel import (
|
|
21
|
+
QuerySpecification,
|
|
22
|
+
LogicalBinaryExpression,
|
|
23
|
+
LogicalBinaryType,
|
|
24
|
+
)
|
|
25
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import PandasApiAppliedFunction
|
|
26
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_base_tds_frame import PandasApiBaseTdsFrame
|
|
27
|
+
from pylegend.core.tds.sql_query_helpers import copy_query, create_sub_query
|
|
28
|
+
from pylegend.core.tds.tds_column import TdsColumn
|
|
29
|
+
from pylegend.core.tds.tds_frame import FrameToSqlConfig, FrameToPureConfig
|
|
30
|
+
|
|
31
|
+
__all__: PyLegendSequence[str] = ["PandasApiFilteringFunction"]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class PandasApiFilteringFunction(PandasApiAppliedFunction):
|
|
35
|
+
__base_frame: PandasApiBaseTdsFrame
|
|
36
|
+
__filter_expr: PyLegendBoolean
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def name(cls) -> str:
|
|
40
|
+
return "boolean_filter" # pragma: no cover
|
|
41
|
+
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
base_frame: PandasApiBaseTdsFrame,
|
|
45
|
+
filter_expr: PyLegendBoolean
|
|
46
|
+
) -> None:
|
|
47
|
+
self.__base_frame = base_frame
|
|
48
|
+
self.__filter_expr = filter_expr
|
|
49
|
+
|
|
50
|
+
def to_sql(self, config: FrameToSqlConfig) -> QuerySpecification:
|
|
51
|
+
base_query = self.__base_frame.to_sql_query_object(config)
|
|
52
|
+
should_create_sub_query = (len(base_query.groupBy) > 0) or \
|
|
53
|
+
(base_query.offset is not None) or (base_query.limit is not None)
|
|
54
|
+
new_query = (
|
|
55
|
+
create_sub_query(base_query, config, "root") if should_create_sub_query else
|
|
56
|
+
copy_query(base_query)
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
sql_expr = self.__filter_expr.to_sql_expression({"c": new_query}, config)
|
|
60
|
+
|
|
61
|
+
if new_query.where is None:
|
|
62
|
+
new_query.where = sql_expr
|
|
63
|
+
else:
|
|
64
|
+
new_query.where = LogicalBinaryExpression(
|
|
65
|
+
type_=LogicalBinaryType.AND,
|
|
66
|
+
left=new_query.where,
|
|
67
|
+
right=sql_expr
|
|
68
|
+
)
|
|
69
|
+
return new_query
|
|
70
|
+
|
|
71
|
+
def to_pure(self, config: FrameToPureConfig) -> str:
|
|
72
|
+
pure_expr = self.__filter_expr.to_pure_expression(config)
|
|
73
|
+
return f"{self.__base_frame.to_pure(config)}{config.separator(1)}->filter(c|{pure_expr})"
|
|
74
|
+
|
|
75
|
+
def base_frame(self) -> PandasApiBaseTdsFrame:
|
|
76
|
+
return self.__base_frame
|
|
77
|
+
|
|
78
|
+
def tds_frame_parameters(self) -> PyLegendList["PandasApiBaseTdsFrame"]:
|
|
79
|
+
return []
|
|
80
|
+
|
|
81
|
+
def calculate_columns(self) -> PyLegendSequence["TdsColumn"]:
|
|
82
|
+
return [c.copy() for c in self.__base_frame.columns()]
|
|
83
|
+
|
|
84
|
+
def validate(self) -> bool:
|
|
85
|
+
return True
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# Copyright 2025 Goldman Sachs
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from pylegend._typing import (
|
|
16
|
+
PyLegendUnion,
|
|
17
|
+
PyLegendOptional,
|
|
18
|
+
PyLegendCallable,
|
|
19
|
+
PyLegendList,
|
|
20
|
+
PyLegendSequence,
|
|
21
|
+
)
|
|
22
|
+
from pylegend.core.language.shared.tds_row import AbstractTdsRow
|
|
23
|
+
from pylegend.core.sql.metamodel import (
|
|
24
|
+
QuerySpecification,
|
|
25
|
+
SortItemOrdering,
|
|
26
|
+
SortItem,
|
|
27
|
+
SortItemNullOrdering, SingleColumn, Expression,
|
|
28
|
+
)
|
|
29
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import (
|
|
30
|
+
PandasApiAppliedFunction,
|
|
31
|
+
)
|
|
32
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_base_tds_frame import (
|
|
33
|
+
PandasApiBaseTdsFrame,
|
|
34
|
+
)
|
|
35
|
+
from pylegend.core.tds.sql_query_helpers import create_sub_query, copy_query
|
|
36
|
+
from pylegend.core.tds.tds_frame import FrameToSqlConfig, FrameToPureConfig
|
|
37
|
+
from pylegend.core.tds.tds_column import TdsColumn
|
|
38
|
+
from pylegend.core.language.shared.helpers import escape_column_name
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class SortValuesFunction(PandasApiAppliedFunction):
|
|
42
|
+
__base_frame: PandasApiBaseTdsFrame
|
|
43
|
+
__by: PyLegendList[str]
|
|
44
|
+
__axis: PyLegendUnion[str, int]
|
|
45
|
+
__ascending: PyLegendList[bool]
|
|
46
|
+
__inplace: bool
|
|
47
|
+
__kind: PyLegendOptional[str]
|
|
48
|
+
__na_position: str
|
|
49
|
+
__ignore_index: bool
|
|
50
|
+
key: PyLegendOptional[PyLegendCallable[[AbstractTdsRow], AbstractTdsRow]] = None
|
|
51
|
+
|
|
52
|
+
@classmethod
|
|
53
|
+
def name(cls) -> str:
|
|
54
|
+
return "sort_values" # pragma: no cover
|
|
55
|
+
|
|
56
|
+
def __init__(
|
|
57
|
+
self,
|
|
58
|
+
base_frame: PandasApiBaseTdsFrame,
|
|
59
|
+
by: PyLegendUnion[str, PyLegendList[str]],
|
|
60
|
+
axis: PyLegendUnion[str, int],
|
|
61
|
+
ascending: PyLegendUnion[bool, PyLegendList[bool]],
|
|
62
|
+
inplace: bool,
|
|
63
|
+
kind: PyLegendOptional[str],
|
|
64
|
+
na_position: str,
|
|
65
|
+
ignore_index: bool,
|
|
66
|
+
key: PyLegendOptional[PyLegendCallable[[AbstractTdsRow], AbstractTdsRow]] = None
|
|
67
|
+
) -> None:
|
|
68
|
+
self.__base_frame = base_frame
|
|
69
|
+
self.__by_input = by
|
|
70
|
+
self.__axis = axis
|
|
71
|
+
self.__ascending_input = ascending
|
|
72
|
+
self.__inplace = inplace
|
|
73
|
+
self.__kind = kind
|
|
74
|
+
self.__na_position = na_position
|
|
75
|
+
self.__ignore_index = ignore_index
|
|
76
|
+
self.__key = key
|
|
77
|
+
|
|
78
|
+
def to_sql(self, config: FrameToSqlConfig) -> QuerySpecification:
|
|
79
|
+
base_query: QuerySpecification = self.__base_frame.to_sql_query_object(config)
|
|
80
|
+
should_create_sub_query = (base_query.offset is not None) or (base_query.limit is not None)
|
|
81
|
+
new_query = (
|
|
82
|
+
create_sub_query(base_query, config, "root") if should_create_sub_query
|
|
83
|
+
else copy_query(base_query)
|
|
84
|
+
)
|
|
85
|
+
new_query.orderBy = [
|
|
86
|
+
SortItem(
|
|
87
|
+
sortKey=self.get_expression_from_column_name(new_query, column_name, config),
|
|
88
|
+
ordering=(
|
|
89
|
+
SortItemOrdering.ASCENDING if ascending
|
|
90
|
+
else SortItemOrdering.DESCENDING
|
|
91
|
+
),
|
|
92
|
+
nullOrdering=SortItemNullOrdering.UNDEFINED,
|
|
93
|
+
)
|
|
94
|
+
for column_name, ascending in zip(self.__by, self.__ascending)
|
|
95
|
+
]
|
|
96
|
+
return new_query
|
|
97
|
+
|
|
98
|
+
def get_expression_from_column_name(self, query: QuerySpecification, column_name: str,
|
|
99
|
+
config: FrameToSqlConfig) -> Expression:
|
|
100
|
+
db_extension = config.sql_to_string_generator().get_db_extension()
|
|
101
|
+
filtered = [
|
|
102
|
+
s for s in query.select.selectItems
|
|
103
|
+
if (isinstance(s, SingleColumn) and
|
|
104
|
+
s.alias == db_extension.quote_identifier(column_name))
|
|
105
|
+
]
|
|
106
|
+
if len(filtered) == 0:
|
|
107
|
+
raise RuntimeError("Cannot find column: " + column_name) # pragma: no cover
|
|
108
|
+
return filtered[0].expression
|
|
109
|
+
|
|
110
|
+
def to_pure(self, config: FrameToPureConfig) -> str:
|
|
111
|
+
escaped_columns = []
|
|
112
|
+
for col_name in self.__by:
|
|
113
|
+
escaped_columns.append(escape_column_name(col_name))
|
|
114
|
+
sort_items = [
|
|
115
|
+
f"~{column_name}->ascending()" if ascending else f"~{column_name}->descending()"
|
|
116
|
+
for column_name, ascending in zip(escaped_columns, self.__ascending)
|
|
117
|
+
]
|
|
118
|
+
return (
|
|
119
|
+
f"{self.__base_frame.to_pure(config)}{config.separator(1)}"
|
|
120
|
+
+ f"->sort([{', '.join(sort_items)}])"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
def base_frame(self) -> PandasApiBaseTdsFrame:
|
|
124
|
+
return self.__base_frame
|
|
125
|
+
|
|
126
|
+
def tds_frame_parameters(self) -> PyLegendList["PandasApiBaseTdsFrame"]:
|
|
127
|
+
return []
|
|
128
|
+
|
|
129
|
+
def calculate_columns(self) -> PyLegendSequence["TdsColumn"]:
|
|
130
|
+
return [c.copy() for c in self.__base_frame.columns()]
|
|
131
|
+
|
|
132
|
+
def validate(self) -> bool:
|
|
133
|
+
if self.__axis not in [0, "index"]:
|
|
134
|
+
raise ValueError(
|
|
135
|
+
"Axis parameter of sort_values function must be 0 or 'index'"
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
if self.__inplace is not False:
|
|
139
|
+
raise ValueError("Inplace parameter of sort_values function must be False")
|
|
140
|
+
|
|
141
|
+
if self.__kind is not None:
|
|
142
|
+
raise NotImplementedError(
|
|
143
|
+
"Kind parameter of sort_values function is not supported"
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
if self.__ignore_index is not True:
|
|
147
|
+
raise ValueError(
|
|
148
|
+
"Ignore_index parameter of sort_values function must be True"
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
if self.__key is not None:
|
|
152
|
+
raise NotImplementedError(
|
|
153
|
+
"Key parameter of sort_values function is not supported"
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
base_frame_columns = [
|
|
157
|
+
column.get_name() for column in self.__base_frame.columns()
|
|
158
|
+
]
|
|
159
|
+
|
|
160
|
+
self.__by = self._build_by_list()
|
|
161
|
+
self.__ascending = self._build_ascending_list()
|
|
162
|
+
|
|
163
|
+
if len(self.__by) != len(self.__ascending):
|
|
164
|
+
raise ValueError(
|
|
165
|
+
"The number of columns in 'by' must equal the number of values in 'ascending' for sort_values function."
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
for column in self.__by:
|
|
169
|
+
if column not in base_frame_columns:
|
|
170
|
+
raise ValueError(
|
|
171
|
+
f"Column - '{column}' in sort_values columns list doesn't exist in the current frame. "
|
|
172
|
+
f"Current frame columns: {base_frame_columns}"
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
return True
|
|
176
|
+
|
|
177
|
+
def _build_by_list(self) -> PyLegendList[str]:
|
|
178
|
+
if isinstance(self.__by_input, str):
|
|
179
|
+
return [self.__by_input]
|
|
180
|
+
else:
|
|
181
|
+
return self.__by_input
|
|
182
|
+
|
|
183
|
+
def _build_ascending_list(self) -> PyLegendList[bool]:
|
|
184
|
+
if self.__ascending_input is True:
|
|
185
|
+
return [True for _ in self.__by]
|
|
186
|
+
elif self.__ascending_input is False:
|
|
187
|
+
return [False for _ in self.__by]
|
|
188
|
+
else:
|
|
189
|
+
return self.__ascending_input
|