pylegend 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylegend/_typing.py +6 -0
- pylegend/core/database/sql_to_string/db_extension.py +35 -6
- pylegend/core/language/pandas_api/__init__.py +13 -0
- pylegend/core/language/pandas_api/pandas_api_aggregate_specification.py +54 -0
- pylegend/core/language/pandas_api/pandas_api_custom_expressions.py +85 -0
- pylegend/core/language/pandas_api/pandas_api_series.py +174 -0
- pylegend/core/language/pandas_api/pandas_api_tds_row.py +74 -0
- pylegend/core/language/shared/literal_expressions.py +2 -2
- pylegend/core/language/shared/operations/integer_operation_expressions.py +35 -0
- pylegend/core/language/shared/operations/nary_expression.py +104 -0
- pylegend/core/language/shared/operations/primitive_operation_expressions.py +30 -0
- pylegend/core/language/shared/operations/string_operation_expressions.py +624 -1
- pylegend/core/language/shared/pct_helpers.py +56 -0
- pylegend/core/language/shared/primitives/integer.py +6 -0
- pylegend/core/language/shared/primitives/primitive.py +6 -0
- pylegend/core/language/shared/primitives/string.py +129 -1
- pylegend/core/sql/metamodel.py +3 -1
- pylegend/core/sql/metamodel_extension.py +18 -0
- pylegend/core/tds/pandas_api/frames/functions/aggregate_function.py +316 -0
- pylegend/core/tds/pandas_api/frames/functions/assign_function.py +20 -15
- pylegend/core/tds/pandas_api/frames/functions/drop.py +171 -0
- pylegend/core/tds/pandas_api/frames/functions/filter.py +193 -0
- pylegend/core/tds/pandas_api/frames/functions/filtering.py +85 -0
- pylegend/core/tds/pandas_api/frames/functions/sort_values_function.py +189 -0
- pylegend/core/tds/pandas_api/frames/functions/truncate_function.py +120 -0
- pylegend/core/tds/pandas_api/frames/pandas_api_applied_function_tds_frame.py +5 -1
- pylegend/core/tds/pandas_api/frames/pandas_api_base_tds_frame.py +204 -7
- pylegend/core/tds/pandas_api/frames/pandas_api_input_tds_frame.py +5 -3
- pylegend/core/tds/pandas_api/frames/pandas_api_tds_frame.py +90 -3
- {pylegend-0.7.0.dist-info → pylegend-0.9.0.dist-info}/METADATA +1 -1
- {pylegend-0.7.0.dist-info → pylegend-0.9.0.dist-info}/RECORD +35 -22
- {pylegend-0.7.0.dist-info → pylegend-0.9.0.dist-info}/WHEEL +0 -0
- {pylegend-0.7.0.dist-info → pylegend-0.9.0.dist-info}/licenses/LICENSE +0 -0
- {pylegend-0.7.0.dist-info → pylegend-0.9.0.dist-info}/licenses/LICENSE.spdx +0 -0
- {pylegend-0.7.0.dist-info → pylegend-0.9.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# Copyright 2025 Goldman Sachs
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from datetime import datetime, date
|
|
16
|
+
from pylegend._typing import (
|
|
17
|
+
PyLegendSequence,
|
|
18
|
+
PyLegendUnion,
|
|
19
|
+
)
|
|
20
|
+
from pylegend.core.language import (
|
|
21
|
+
convert_literal_to_literal_expression,
|
|
22
|
+
PyLegendBooleanLiteralExpression,
|
|
23
|
+
PyLegendStringLiteralExpression,
|
|
24
|
+
PyLegendIntegerLiteralExpression,
|
|
25
|
+
PyLegendFloatLiteralExpression,
|
|
26
|
+
PyLegendStrictDateLiteralExpression,
|
|
27
|
+
PyLegendDateTimeLiteralExpression,
|
|
28
|
+
PyLegendFloat,
|
|
29
|
+
PyLegendDateTime,
|
|
30
|
+
PyLegendStrictDate,
|
|
31
|
+
PyLegendString,
|
|
32
|
+
PyLegendInteger,
|
|
33
|
+
PyLegendBoolean,
|
|
34
|
+
PyLegendPrimitive
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
__all__: PyLegendSequence[str] = [
|
|
38
|
+
"c",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def c(constant: PyLegendUnion[int, float, bool, str, datetime, date]) -> PyLegendPrimitive:
|
|
43
|
+
expr = convert_literal_to_literal_expression(constant)
|
|
44
|
+
if isinstance(expr, PyLegendBooleanLiteralExpression):
|
|
45
|
+
return PyLegendBoolean(expr)
|
|
46
|
+
if isinstance(expr, PyLegendStringLiteralExpression):
|
|
47
|
+
return PyLegendString(expr)
|
|
48
|
+
if isinstance(expr, PyLegendIntegerLiteralExpression):
|
|
49
|
+
return PyLegendInteger(expr)
|
|
50
|
+
if isinstance(expr, PyLegendFloatLiteralExpression):
|
|
51
|
+
return PyLegendFloat(expr)
|
|
52
|
+
if isinstance(expr, PyLegendDateTimeLiteralExpression):
|
|
53
|
+
return PyLegendDateTime(expr)
|
|
54
|
+
if isinstance(expr, PyLegendStrictDateLiteralExpression):
|
|
55
|
+
return PyLegendStrictDate(expr)
|
|
56
|
+
raise RuntimeError(f"Unsupported constant type: {type(constant)}")
|
|
@@ -33,9 +33,11 @@ from pylegend.core.language.shared.operations.integer_operation_expressions impo
|
|
|
33
33
|
PyLegendIntegerSubtractExpression,
|
|
34
34
|
PyLegendIntegerMultiplyExpression,
|
|
35
35
|
PyLegendIntegerModuloExpression,
|
|
36
|
+
PyLegendIntegerCharExpression
|
|
36
37
|
)
|
|
37
38
|
if TYPE_CHECKING:
|
|
38
39
|
from pylegend.core.language.shared.primitives import PyLegendFloat
|
|
40
|
+
from pylegend.core.language.shared.primitives.string import PyLegendString
|
|
39
41
|
|
|
40
42
|
__all__: PyLegendSequence[str] = [
|
|
41
43
|
"PyLegendInteger"
|
|
@@ -52,6 +54,10 @@ class PyLegendInteger(PyLegendNumber):
|
|
|
52
54
|
self.__value_copy = value
|
|
53
55
|
super().__init__(value)
|
|
54
56
|
|
|
57
|
+
def char(self) -> "PyLegendString":
|
|
58
|
+
from pylegend.core.language.shared.primitives.string import PyLegendString
|
|
59
|
+
return PyLegendString(PyLegendIntegerCharExpression(self.__value_copy))
|
|
60
|
+
|
|
55
61
|
def to_sql_expression(
|
|
56
62
|
self,
|
|
57
63
|
frame_name_to_base_query_map: PyLegendDict[str, QuerySpecification],
|
|
@@ -32,11 +32,13 @@ from pylegend.core.language.shared.operations.primitive_operation_expressions im
|
|
|
32
32
|
PyLegendPrimitiveNotEqualsExpression,
|
|
33
33
|
PyLegendIsEmptyExpression,
|
|
34
34
|
PyLegendIsNotEmptyExpression,
|
|
35
|
+
PyLegendPrimitiveToStringExpression
|
|
35
36
|
)
|
|
36
37
|
from pylegend.core.tds.tds_frame import FrameToSqlConfig
|
|
37
38
|
from pylegend.core.tds.tds_frame import FrameToPureConfig
|
|
38
39
|
if TYPE_CHECKING:
|
|
39
40
|
from pylegend.core.language.shared.primitives.boolean import PyLegendBoolean
|
|
41
|
+
from pylegend.core.language.shared.primitives.string import PyLegendString
|
|
40
42
|
|
|
41
43
|
__all__: PyLegendSequence[str] = [
|
|
42
44
|
"PyLegendPrimitive",
|
|
@@ -100,6 +102,10 @@ class PyLegendPrimitive(metaclass=ABCMeta):
|
|
|
100
102
|
def is_not_null(self) -> "PyLegendBoolean":
|
|
101
103
|
return self.is_not_empty()
|
|
102
104
|
|
|
105
|
+
def to_string(self) -> "PyLegendString":
|
|
106
|
+
from pylegend.core.language.shared.primitives.string import PyLegendString
|
|
107
|
+
return PyLegendString(PyLegendPrimitiveToStringExpression(self.value()))
|
|
108
|
+
|
|
103
109
|
@staticmethod
|
|
104
110
|
def __validate_param_to_be_primitive(
|
|
105
111
|
param: "PyLegendUnion[int, float, bool, str, date, datetime, PyLegendPrimitive]",
|
|
@@ -16,11 +16,14 @@ from pylegend._typing import (
|
|
|
16
16
|
PyLegendSequence,
|
|
17
17
|
PyLegendDict,
|
|
18
18
|
PyLegendUnion,
|
|
19
|
+
PyLegendOptional
|
|
19
20
|
)
|
|
21
|
+
from pylegend.core.language.shared.literal_expressions import PyLegendIntegerLiteralExpression
|
|
20
22
|
from pylegend.core.language.shared.primitives.primitive import PyLegendPrimitive
|
|
21
23
|
from pylegend.core.language.shared.primitives.integer import PyLegendInteger
|
|
22
24
|
from pylegend.core.language.shared.primitives.float import PyLegendFloat
|
|
23
25
|
from pylegend.core.language.shared.primitives.boolean import PyLegendBoolean
|
|
26
|
+
from pylegend.core.language.shared.primitives.datetime import PyLegendDateTime
|
|
24
27
|
from pylegend.core.language.shared.expression import PyLegendExpressionStringReturn
|
|
25
28
|
from pylegend.core.language.shared.literal_expressions import PyLegendStringLiteralExpression
|
|
26
29
|
from pylegend.core.sql.metamodel import (
|
|
@@ -47,9 +50,26 @@ from pylegend.core.language.shared.operations.string_operation_expressions impor
|
|
|
47
50
|
PyLegendStringLessThanEqualExpression,
|
|
48
51
|
PyLegendStringGreaterThanExpression,
|
|
49
52
|
PyLegendStringGreaterThanEqualExpression,
|
|
53
|
+
PyLegendStringParseBooleanExpression,
|
|
54
|
+
PyLegendStringParseDateTimeExpression,
|
|
55
|
+
PyLegendStringAsciiExpression,
|
|
56
|
+
PyLegendStringDecodeBase64Expression,
|
|
57
|
+
PyLegendStringEncodeBase64Expression,
|
|
58
|
+
PyLegendStringReverseExpression,
|
|
59
|
+
PyLegendStringToLowerFirstCharacterExpression,
|
|
60
|
+
PyLegendStringToUpperFirstCharacterExpression,
|
|
61
|
+
PyLegendStringLeftExpression,
|
|
62
|
+
PyLegendStringRightExpression,
|
|
63
|
+
PyLegendStringSubStringExpression,
|
|
64
|
+
PyLegendStringReplaceExpression,
|
|
65
|
+
PyLegendStringLpadExpression,
|
|
66
|
+
PyLegendStringRpadExpression,
|
|
67
|
+
PyLegendStringSplitPartExpression,
|
|
68
|
+
PyLegendStringFullMatchExpression,
|
|
69
|
+
PyLegendStringRepeatStringExpression,
|
|
70
|
+
PyLegendStringMatchExpression
|
|
50
71
|
)
|
|
51
72
|
|
|
52
|
-
|
|
53
73
|
__all__: PyLegendSequence[str] = [
|
|
54
74
|
"PyLegendString"
|
|
55
75
|
]
|
|
@@ -120,6 +140,108 @@ class PyLegendString(PyLegendPrimitive):
|
|
|
120
140
|
def parse_float(self) -> "PyLegendFloat":
|
|
121
141
|
return PyLegendFloat(PyLegendStringParseFloatExpression(self.__value))
|
|
122
142
|
|
|
143
|
+
def parse_boolean(self) -> "PyLegendBoolean":
|
|
144
|
+
return PyLegendBoolean(PyLegendStringParseBooleanExpression(self.__value))
|
|
145
|
+
|
|
146
|
+
def parse_datetime(self) -> "PyLegendDateTime":
|
|
147
|
+
return PyLegendDateTime(PyLegendStringParseDateTimeExpression(self.__value))
|
|
148
|
+
|
|
149
|
+
def ascii(self) -> "PyLegendInteger":
|
|
150
|
+
return PyLegendInteger(PyLegendStringAsciiExpression(self.__value))
|
|
151
|
+
|
|
152
|
+
def b64decode(self) -> "PyLegendString":
|
|
153
|
+
return PyLegendString(PyLegendStringDecodeBase64Expression(self.__value))
|
|
154
|
+
|
|
155
|
+
def b64encode(self) -> "PyLegendString":
|
|
156
|
+
return PyLegendString(PyLegendStringEncodeBase64Expression(self.__value))
|
|
157
|
+
|
|
158
|
+
def reverse(self) -> "PyLegendString":
|
|
159
|
+
return PyLegendString(PyLegendStringReverseExpression(self.__value))
|
|
160
|
+
|
|
161
|
+
def to_lower_first_character(self) -> "PyLegendString":
|
|
162
|
+
return PyLegendString(PyLegendStringToLowerFirstCharacterExpression(self.__value))
|
|
163
|
+
|
|
164
|
+
def to_upper_first_character(self) -> "PyLegendString":
|
|
165
|
+
return PyLegendString(PyLegendStringToUpperFirstCharacterExpression(self.__value))
|
|
166
|
+
|
|
167
|
+
def left(self, count: PyLegendUnion[int, "PyLegendInteger"]) -> "PyLegendString":
|
|
168
|
+
PyLegendString.__validate_param_to_be_int_or_int_expr(count, "left parameter")
|
|
169
|
+
count_op = PyLegendIntegerLiteralExpression(count) if isinstance(count, int) else count.value()
|
|
170
|
+
return PyLegendString(PyLegendStringLeftExpression(self.__value, count_op))
|
|
171
|
+
|
|
172
|
+
def right(self, count: PyLegendUnion[int, "PyLegendInteger"]) -> "PyLegendString":
|
|
173
|
+
PyLegendString.__validate_param_to_be_int_or_int_expr(count, "right parameter")
|
|
174
|
+
count_op = PyLegendIntegerLiteralExpression(count) if isinstance(count, int) else count.value()
|
|
175
|
+
return PyLegendString(PyLegendStringRightExpression(self.__value, count_op))
|
|
176
|
+
|
|
177
|
+
def substring(
|
|
178
|
+
self,
|
|
179
|
+
start: PyLegendUnion[int, "PyLegendInteger"],
|
|
180
|
+
end: PyLegendOptional[PyLegendUnion[int, "PyLegendInteger"]] = None) -> "PyLegendString":
|
|
181
|
+
PyLegendString.__validate_param_to_be_int_or_int_expr(start, "substring start parameter")
|
|
182
|
+
start_op = PyLegendIntegerLiteralExpression(start) if isinstance(start, int) else start.value()
|
|
183
|
+
if end is None:
|
|
184
|
+
return PyLegendString(PyLegendStringSubStringExpression([self.__value, start_op]))
|
|
185
|
+
PyLegendString.__validate_param_to_be_int_or_int_expr(end, "substring end parameter")
|
|
186
|
+
end_op = PyLegendIntegerLiteralExpression(end) if isinstance(end, int) else end.value()
|
|
187
|
+
return PyLegendString(PyLegendStringSubStringExpression([self.__value, start_op, end_op]))
|
|
188
|
+
|
|
189
|
+
def replace(
|
|
190
|
+
self,
|
|
191
|
+
to_replace: PyLegendUnion[str, "PyLegendString"],
|
|
192
|
+
replacement: PyLegendUnion[str, "PyLegendString"]) -> "PyLegendString":
|
|
193
|
+
PyLegendString.__validate_param_to_be_str_or_str_expr(to_replace, "replace to_replace parameter")
|
|
194
|
+
to_replace_op = PyLegendStringLiteralExpression(to_replace) if isinstance(to_replace,
|
|
195
|
+
str) else to_replace.__value
|
|
196
|
+
PyLegendString.__validate_param_to_be_str_or_str_expr(replacement, "replace replacement parameter")
|
|
197
|
+
replacement_op = PyLegendStringLiteralExpression(replacement) if isinstance(replacement,
|
|
198
|
+
str) else replacement.__value
|
|
199
|
+
return PyLegendString(PyLegendStringReplaceExpression([self.__value, to_replace_op, replacement_op]))
|
|
200
|
+
|
|
201
|
+
def rjust(
|
|
202
|
+
self,
|
|
203
|
+
length: PyLegendUnion[int, "PyLegendInteger"],
|
|
204
|
+
fill_char: PyLegendUnion[str, "PyLegendString"] = ' ') -> "PyLegendString":
|
|
205
|
+
PyLegendString.__validate_param_to_be_int_or_int_expr(length, "rjust length parameter")
|
|
206
|
+
length_op = PyLegendIntegerLiteralExpression(length) if isinstance(length, int) else length.value()
|
|
207
|
+
fill_char_op = PyLegendStringLiteralExpression(fill_char) if isinstance(fill_char, str) else fill_char.__value
|
|
208
|
+
return PyLegendString(PyLegendStringLpadExpression([self.__value, length_op, fill_char_op]))
|
|
209
|
+
|
|
210
|
+
def ljust(
|
|
211
|
+
self,
|
|
212
|
+
length: PyLegendUnion[int, "PyLegendInteger"],
|
|
213
|
+
fill_char: PyLegendUnion[str, "PyLegendString"] = ' ') -> "PyLegendString":
|
|
214
|
+
PyLegendString.__validate_param_to_be_int_or_int_expr(length, "ljust length parameter")
|
|
215
|
+
length_op = PyLegendIntegerLiteralExpression(length) if isinstance(length, int) else length.value()
|
|
216
|
+
fill_char_op = PyLegendStringLiteralExpression(fill_char) if isinstance(fill_char, str) else fill_char.__value
|
|
217
|
+
return PyLegendString(PyLegendStringRpadExpression([self.__value, length_op, fill_char_op]))
|
|
218
|
+
|
|
219
|
+
def split_part(
|
|
220
|
+
self,
|
|
221
|
+
sep: PyLegendUnion[str, "PyLegendString"],
|
|
222
|
+
part: PyLegendUnion[int, "PyLegendInteger"]) -> "PyLegendString | None":
|
|
223
|
+
PyLegendString.__validate_param_to_be_str_or_str_expr(sep, "split_part sep parameter")
|
|
224
|
+
sep_op = PyLegendStringLiteralExpression(sep) if isinstance(sep, str) else sep.value()
|
|
225
|
+
PyLegendString.__validate_param_to_be_int_or_int_expr(part, "split_part part parameter")
|
|
226
|
+
part_op = PyLegendIntegerLiteralExpression(part) if isinstance(part, int) else part.value()
|
|
227
|
+
return PyLegendString(PyLegendStringSplitPartExpression([self.__value, sep_op, part_op]))
|
|
228
|
+
|
|
229
|
+
def full_match(self, pattern: PyLegendUnion[str, "PyLegendString"]) -> PyLegendBoolean:
|
|
230
|
+
PyLegendString.__validate_param_to_be_str_or_str_expr(pattern, "full_match parameter")
|
|
231
|
+
pattern_op = PyLegendStringLiteralExpression(pattern) if isinstance(pattern, str) else pattern.__value
|
|
232
|
+
return PyLegendBoolean(PyLegendStringFullMatchExpression(self.__value, pattern_op))
|
|
233
|
+
|
|
234
|
+
def match(self, pattern: PyLegendUnion[str, "PyLegendString"]) -> PyLegendBoolean:
|
|
235
|
+
PyLegendString.__validate_param_to_be_str_or_str_expr(pattern, "match parameter") # pragma: no cover
|
|
236
|
+
pattern_op = PyLegendStringLiteralExpression(pattern) if isinstance(
|
|
237
|
+
pattern, str) else pattern.__value # pragma: no cover
|
|
238
|
+
return PyLegendBoolean(PyLegendStringMatchExpression(self.__value, pattern_op)) # pragma: no cover
|
|
239
|
+
|
|
240
|
+
def repeat_string(self, times: PyLegendUnion[int, "PyLegendInteger"]) -> "PyLegendString":
|
|
241
|
+
PyLegendString.__validate_param_to_be_int_or_int_expr(times, "repeatString parameter")
|
|
242
|
+
times_op = PyLegendIntegerLiteralExpression(times) if isinstance(times, int) else times.value()
|
|
243
|
+
return PyLegendString(PyLegendStringRepeatStringExpression(self.__value, times_op))
|
|
244
|
+
|
|
123
245
|
def __add__(self, other: PyLegendUnion[str, "PyLegendString"]) -> "PyLegendString":
|
|
124
246
|
PyLegendString.__validate_param_to_be_str_or_str_expr(other, "String plus (+) parameter")
|
|
125
247
|
other_op = PyLegendStringLiteralExpression(other) if isinstance(other, str) else other.__value
|
|
@@ -174,3 +296,9 @@ class PyLegendString(PyLegendPrimitive):
|
|
|
174
296
|
if not isinstance(param, str):
|
|
175
297
|
raise TypeError(desc + " should be a str."
|
|
176
298
|
" Got value " + str(param) + " of type: " + str(type(param)))
|
|
299
|
+
|
|
300
|
+
@staticmethod
|
|
301
|
+
def __validate_param_to_be_int_or_int_expr(param: PyLegendUnion[int, "PyLegendInteger"], desc: str) -> None:
|
|
302
|
+
if not isinstance(param, (int, PyLegendInteger)):
|
|
303
|
+
raise TypeError(desc + " should be a int or a int expression (PyLegendInteger)."
|
|
304
|
+
" Got value " + str(param) + " of type: " + str(type(param)))
|
pylegend/core/sql/metamodel.py
CHANGED
|
@@ -81,6 +81,7 @@ __all__: PyLegendSequence[str] = [
|
|
|
81
81
|
"EpochExpression",
|
|
82
82
|
"WindowExpression",
|
|
83
83
|
"ConstantExpression",
|
|
84
|
+
"StringSubStringExpression"
|
|
84
85
|
]
|
|
85
86
|
|
|
86
87
|
|
|
@@ -745,3 +746,20 @@ class ConstantExpression(Expression):
|
|
|
745
746
|
) -> None:
|
|
746
747
|
super().__init__(_type="constantExpression")
|
|
747
748
|
self.name = name
|
|
749
|
+
|
|
750
|
+
|
|
751
|
+
class StringSubStringExpression(Expression):
|
|
752
|
+
value: "Expression"
|
|
753
|
+
start: "Expression"
|
|
754
|
+
end: PyLegendOptional["Expression"]
|
|
755
|
+
|
|
756
|
+
def __init__(
|
|
757
|
+
self,
|
|
758
|
+
value: "Expression",
|
|
759
|
+
start: "Expression",
|
|
760
|
+
end: PyLegendOptional["Expression"] = None
|
|
761
|
+
) -> None:
|
|
762
|
+
super().__init__(_type="stringSubStringExpression")
|
|
763
|
+
self.value = value
|
|
764
|
+
self.start = start
|
|
765
|
+
self.end = end
|
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
# Copyright 2025 Goldman Sachs
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
import collections.abc
|
|
17
|
+
from pylegend._typing import (
|
|
18
|
+
PyLegendCallable,
|
|
19
|
+
PyLegendSequence,
|
|
20
|
+
PyLegendTuple,
|
|
21
|
+
PyLegendUnion,
|
|
22
|
+
PyLegendList,
|
|
23
|
+
PyLegendMapping,
|
|
24
|
+
)
|
|
25
|
+
from pylegend.core.language.pandas_api.pandas_api_aggregate_specification import (
|
|
26
|
+
PyLegendAggFunc,
|
|
27
|
+
PyLegendAggInput
|
|
28
|
+
)
|
|
29
|
+
from pylegend.core.language.pandas_api.pandas_api_tds_row import PandasApiTdsRow
|
|
30
|
+
from pylegend.core.language.shared.helpers import escape_column_name, generate_pure_lambda
|
|
31
|
+
from pylegend.core.language.shared.literal_expressions import convert_literal_to_literal_expression
|
|
32
|
+
from pylegend.core.language.shared.primitive_collection import PyLegendPrimitiveCollection, create_primitive_collection
|
|
33
|
+
from pylegend.core.language.shared.primitives.primitive import PyLegendPrimitive, PyLegendPrimitiveOrPythonPrimitive
|
|
34
|
+
from pylegend.core.sql.metamodel import QuerySpecification, SelectItem, SingleColumn
|
|
35
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import PandasApiAppliedFunction
|
|
36
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_base_tds_frame import PandasApiBaseTdsFrame
|
|
37
|
+
from pylegend.core.tds.sql_query_helpers import copy_query, create_sub_query
|
|
38
|
+
from pylegend.core.tds.tds_column import TdsColumn
|
|
39
|
+
from pylegend.core.tds.tds_frame import FrameToPureConfig, FrameToSqlConfig
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class AggregateFunction(PandasApiAppliedFunction):
|
|
43
|
+
__base_frame: PandasApiBaseTdsFrame
|
|
44
|
+
__func: PyLegendAggInput
|
|
45
|
+
__axis: PyLegendUnion[int, str]
|
|
46
|
+
__args: PyLegendSequence[PyLegendPrimitiveOrPythonPrimitive]
|
|
47
|
+
__kwargs: PyLegendMapping[str, PyLegendPrimitiveOrPythonPrimitive]
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def name(cls) -> str:
|
|
51
|
+
return "aggregate" # pragma: no cover
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
base_frame: PandasApiBaseTdsFrame,
|
|
56
|
+
func: PyLegendAggInput,
|
|
57
|
+
axis: PyLegendUnion[int, str],
|
|
58
|
+
*args: PyLegendPrimitiveOrPythonPrimitive,
|
|
59
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
60
|
+
) -> None:
|
|
61
|
+
self.__base_frame = base_frame
|
|
62
|
+
self.__func = func
|
|
63
|
+
self.__axis = axis
|
|
64
|
+
self.__args = args
|
|
65
|
+
self.__kwargs = kwargs
|
|
66
|
+
|
|
67
|
+
def to_sql(self, config: FrameToSqlConfig) -> QuerySpecification:
|
|
68
|
+
db_extension = config.sql_to_string_generator().get_db_extension()
|
|
69
|
+
base_query: QuerySpecification = self.__base_frame.to_sql_query_object(config)
|
|
70
|
+
|
|
71
|
+
should_create_sub_query = (
|
|
72
|
+
len(base_query.groupBy) > 0 or
|
|
73
|
+
base_query.select.distinct or
|
|
74
|
+
base_query.offset is not None or
|
|
75
|
+
base_query.limit is not None
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
new_query: QuerySpecification
|
|
79
|
+
if should_create_sub_query:
|
|
80
|
+
new_query = create_sub_query(base_query, config, "root")
|
|
81
|
+
else:
|
|
82
|
+
new_query = copy_query(base_query)
|
|
83
|
+
|
|
84
|
+
new_select_items: PyLegendList[SelectItem] = []
|
|
85
|
+
|
|
86
|
+
for agg in self.__aggregates_list:
|
|
87
|
+
agg_sql_expr = agg[2].to_sql_expression({"r": new_query}, config)
|
|
88
|
+
new_select_items.append(
|
|
89
|
+
SingleColumn(alias=db_extension.quote_identifier(agg[0]), expression=agg_sql_expr)
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
new_query.select.selectItems = new_select_items
|
|
93
|
+
return new_query
|
|
94
|
+
|
|
95
|
+
def to_pure(self, config: FrameToPureConfig) -> str:
|
|
96
|
+
agg_strings = []
|
|
97
|
+
for agg in self.__aggregates_list:
|
|
98
|
+
map_expr_string = (agg[1].to_pure_expression(config) if isinstance(agg[1], PyLegendPrimitive)
|
|
99
|
+
else convert_literal_to_literal_expression(agg[1]).to_pure_expression(config))
|
|
100
|
+
agg_expr_string = agg[2].to_pure_expression(config).replace(map_expr_string, "$c")
|
|
101
|
+
agg_strings.append(f"{escape_column_name(agg[0])}:{generate_pure_lambda('r', map_expr_string)}:"
|
|
102
|
+
f"{generate_pure_lambda('c', agg_expr_string)}")
|
|
103
|
+
|
|
104
|
+
return (f"{self.__base_frame.to_pure(config)}{config.separator(1)}"
|
|
105
|
+
f"->aggregate({config.separator(2)}"
|
|
106
|
+
f"~[{', '.join(agg_strings)}]{config.separator(1)}"
|
|
107
|
+
f")")
|
|
108
|
+
|
|
109
|
+
def base_frame(self) -> PandasApiBaseTdsFrame:
|
|
110
|
+
return self.__base_frame
|
|
111
|
+
|
|
112
|
+
def tds_frame_parameters(self) -> PyLegendList["PandasApiBaseTdsFrame"]:
|
|
113
|
+
return []
|
|
114
|
+
|
|
115
|
+
def calculate_columns(self) -> PyLegendSequence["TdsColumn"]:
|
|
116
|
+
return [c.copy() for c in self.__base_frame.columns()]
|
|
117
|
+
|
|
118
|
+
def validate(self) -> bool:
|
|
119
|
+
if self.__axis not in [0, "index"]:
|
|
120
|
+
raise NotImplementedError(
|
|
121
|
+
f"The 'axis' parameter of the aggregate function must be 0 or 'index', but got: {self.__axis}"
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
if len(self.__args) > 0 or len(self.__kwargs) > 0:
|
|
125
|
+
raise NotImplementedError(
|
|
126
|
+
"AggregateFunction currently does not support additional positional "
|
|
127
|
+
"or keyword arguments. Please remove extra *args/**kwargs."
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
self.__aggregates_list: PyLegendList[
|
|
131
|
+
PyLegendTuple[str, PyLegendPrimitiveOrPythonPrimitive, PyLegendPrimitive]
|
|
132
|
+
] = []
|
|
133
|
+
|
|
134
|
+
normalized_func: dict[str, PyLegendAggFunc] = self.__normalize_input_func_to_standard_dict(self.__func)
|
|
135
|
+
tds_row = PandasApiTdsRow.from_tds_frame("r", self.__base_frame)
|
|
136
|
+
|
|
137
|
+
for column_name, aggregate_function in normalized_func.items():
|
|
138
|
+
mapper_function: PyLegendCallable[[PandasApiTdsRow], PyLegendPrimitiveOrPythonPrimitive] = eval(
|
|
139
|
+
f'lambda r: r["{column_name}"]')
|
|
140
|
+
map_result: PyLegendPrimitiveOrPythonPrimitive = mapper_function(tds_row)
|
|
141
|
+
collection: PyLegendPrimitiveCollection = create_primitive_collection(map_result)
|
|
142
|
+
|
|
143
|
+
normalized_aggregate_function = self.__normalize_agg_func_to_lambda_function(aggregate_function)
|
|
144
|
+
agg_result: PyLegendPrimitive = normalized_aggregate_function(collection)
|
|
145
|
+
|
|
146
|
+
self.__aggregates_list.append((column_name, map_result, agg_result))
|
|
147
|
+
|
|
148
|
+
return True
|
|
149
|
+
|
|
150
|
+
def __normalize_input_func_to_standard_dict(
|
|
151
|
+
self,
|
|
152
|
+
func_input: PyLegendAggInput
|
|
153
|
+
) -> dict[str, PyLegendAggFunc]:
|
|
154
|
+
|
|
155
|
+
column_names = [col.get_name() for col in self.calculate_columns()]
|
|
156
|
+
|
|
157
|
+
if isinstance(func_input, collections.abc.Mapping):
|
|
158
|
+
normalized: dict[str, PyLegendAggFunc] = {}
|
|
159
|
+
|
|
160
|
+
for key, value in func_input.items():
|
|
161
|
+
if not isinstance(key, str):
|
|
162
|
+
raise TypeError(
|
|
163
|
+
f"Invalid `func` argument for the aggregate function.\n"
|
|
164
|
+
f"When a dictionary is provided, all keys must be strings.\n"
|
|
165
|
+
f"But got key: {key!r} (type: {type(key).__name__})\n"
|
|
166
|
+
)
|
|
167
|
+
if key not in column_names:
|
|
168
|
+
raise ValueError(
|
|
169
|
+
f"Invalid `func` argument for the aggregate function.\n"
|
|
170
|
+
f"When a dictionary is provided, all keys must be column names.\n"
|
|
171
|
+
f"Available columns are: {sorted(column_names)}\n"
|
|
172
|
+
f"But got key: {key!r} (type: {type(key).__name__})\n"
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
if isinstance(value, collections.abc.Sequence) and not isinstance(value, str):
|
|
176
|
+
if len(value) != 1:
|
|
177
|
+
raise ValueError(
|
|
178
|
+
f"Invalid `func` argument for the aggregate function.\n"
|
|
179
|
+
f"When providing a list of functions for a specific column, "
|
|
180
|
+
f"the list must contain exactly one element (single aggregation only).\n"
|
|
181
|
+
f"Column: {key!r}\n"
|
|
182
|
+
f"List Length: {len(value)}\n"
|
|
183
|
+
f"Value: {value!r}\n"
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
single_func = value[0]
|
|
187
|
+
|
|
188
|
+
if not (callable(single_func) or isinstance(single_func, str) or isinstance(single_func, np.ufunc)):
|
|
189
|
+
raise TypeError(
|
|
190
|
+
f"Invalid `func` argument for the aggregate function.\n"
|
|
191
|
+
f"The single element in the list for key {key!r} must be a callable, str, or np.ufunc.\n"
|
|
192
|
+
f"But got element: {single_func!r} (type: {type(single_func).__name__})\n"
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
normalized[key] = single_func
|
|
196
|
+
|
|
197
|
+
else:
|
|
198
|
+
if not (callable(value) or isinstance(value, str) or isinstance(value, np.ufunc)):
|
|
199
|
+
raise TypeError(
|
|
200
|
+
f"Invalid `func` argument for the aggregate function.\n"
|
|
201
|
+
f"When a dictionary is provided, the value must be a callable, str, or np.ufunc "
|
|
202
|
+
f"(or a list containing exactly one of these).\n"
|
|
203
|
+
f"But got value for key {key!r}: {value!r} (type: {type(value).__name__})\n"
|
|
204
|
+
)
|
|
205
|
+
normalized[key] = value
|
|
206
|
+
|
|
207
|
+
return normalized
|
|
208
|
+
|
|
209
|
+
elif isinstance(func_input, collections.abc.Sequence) and not isinstance(func_input, str):
|
|
210
|
+
|
|
211
|
+
if len(func_input) != 1:
|
|
212
|
+
raise ValueError(
|
|
213
|
+
f"Invalid `func` argument for the aggregate function.\n"
|
|
214
|
+
f"When providing a list as the func argument, it must contain exactly one element "
|
|
215
|
+
f"(which will be applied to all columns).\n"
|
|
216
|
+
f"Multiple functions are not supported.\n"
|
|
217
|
+
f"List Length: {len(func_input)}\n"
|
|
218
|
+
f"Input: {func_input!r}\n"
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
single_func = func_input[0]
|
|
222
|
+
|
|
223
|
+
if not (callable(single_func) or isinstance(single_func, str) or isinstance(single_func, np.ufunc)):
|
|
224
|
+
raise TypeError(
|
|
225
|
+
f"Invalid `func` argument for the aggregate function.\n"
|
|
226
|
+
f"The single element in the top-level list must be a callable, str, or np.ufunc.\n"
|
|
227
|
+
f"But got element: {single_func!r} (type: {type(single_func).__name__})\n"
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
return {col: single_func for col in column_names}
|
|
231
|
+
|
|
232
|
+
elif callable(func_input) or isinstance(func_input, str) or isinstance(func_input, np.ufunc):
|
|
233
|
+
return {col: func_input for col in column_names}
|
|
234
|
+
|
|
235
|
+
else:
|
|
236
|
+
raise TypeError(
|
|
237
|
+
"Invalid `func` argument for aggregate function. "
|
|
238
|
+
"Expected a callable, str, np.ufunc, a list containing exactly one of these, "
|
|
239
|
+
"or a mapping[str -> callable/str/ufunc/a list containing exactly one of these]. "
|
|
240
|
+
f"But got: {func_input!r} (type: {type(func_input).__name__})"
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
def __normalize_agg_func_to_lambda_function(
|
|
244
|
+
self,
|
|
245
|
+
func: PyLegendAggFunc
|
|
246
|
+
) -> PyLegendCallable[[PyLegendPrimitiveCollection], PyLegendPrimitive]:
|
|
247
|
+
|
|
248
|
+
PYTHON_FUNCTION_TO_LEGEND_FUNCTION_MAPPING: PyLegendMapping[str, PyLegendList[str]] = {
|
|
249
|
+
"average": ["mean", "average", "nanmean"],
|
|
250
|
+
"sum": ["sum", "nansum"],
|
|
251
|
+
"min": ["min", "amin", "minimum", "nanmin"],
|
|
252
|
+
"max": ["max", "amax", "maximum", "nanmax"],
|
|
253
|
+
"std_dev_sample": ["std", "std_dev", "nanstd"],
|
|
254
|
+
"variance_sample": ["var", "variance", "nanvar"],
|
|
255
|
+
"median": ["median", "nanmedian"],
|
|
256
|
+
"count": ["count", "size", "len", "length"],
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
FLATTENED_FUNCTION_MAPPING: dict[str, str] = {}
|
|
260
|
+
for target_method, source_list in PYTHON_FUNCTION_TO_LEGEND_FUNCTION_MAPPING.items():
|
|
261
|
+
for alias in source_list:
|
|
262
|
+
FLATTENED_FUNCTION_MAPPING[alias] = target_method
|
|
263
|
+
|
|
264
|
+
lambda_source: str
|
|
265
|
+
final_lambda: PyLegendCallable[[PyLegendPrimitiveCollection], PyLegendPrimitive]
|
|
266
|
+
|
|
267
|
+
if isinstance(func, str):
|
|
268
|
+
func_lower = func.lower()
|
|
269
|
+
if func_lower in FLATTENED_FUNCTION_MAPPING:
|
|
270
|
+
internal_method_name = FLATTENED_FUNCTION_MAPPING[func_lower]
|
|
271
|
+
else:
|
|
272
|
+
raise NotImplementedError(
|
|
273
|
+
f"Invalid `func` argument for the aggregate function.\n"
|
|
274
|
+
f"The string {func!r} does not correspond to any supported aggregation.\n"
|
|
275
|
+
f"Available string functions are: {sorted(FLATTENED_FUNCTION_MAPPING.keys())}"
|
|
276
|
+
) # pragma: no cover
|
|
277
|
+
lambda_source = self._generate_lambda_source(internal_method_name)
|
|
278
|
+
final_lambda = eval(lambda_source)
|
|
279
|
+
return final_lambda
|
|
280
|
+
|
|
281
|
+
elif isinstance(func, np.ufunc):
|
|
282
|
+
func_name = func.__name__
|
|
283
|
+
if func_name in FLATTENED_FUNCTION_MAPPING:
|
|
284
|
+
internal_method_name = FLATTENED_FUNCTION_MAPPING[func_name]
|
|
285
|
+
else:
|
|
286
|
+
raise NotImplementedError(
|
|
287
|
+
f"Invalid `func` argument for the aggregate function.\n"
|
|
288
|
+
f"The NumPy function {func_name!r} is not supported.\n"
|
|
289
|
+
f"Supported aggregate functions are: {sorted(FLATTENED_FUNCTION_MAPPING.keys())}"
|
|
290
|
+
) # pragma: no cover
|
|
291
|
+
lambda_source = self._generate_lambda_source(internal_method_name)
|
|
292
|
+
final_lambda = eval(lambda_source)
|
|
293
|
+
return final_lambda
|
|
294
|
+
|
|
295
|
+
else:
|
|
296
|
+
func_name = getattr(func, "__name__", "").lower()
|
|
297
|
+
if func_name in FLATTENED_FUNCTION_MAPPING and func_name != "<lambda>":
|
|
298
|
+
internal_method_name = FLATTENED_FUNCTION_MAPPING[func_name]
|
|
299
|
+
lambda_source = self._generate_lambda_source(internal_method_name)
|
|
300
|
+
final_lambda = eval(lambda_source)
|
|
301
|
+
return final_lambda
|
|
302
|
+
else:
|
|
303
|
+
def validation_wrapper(x: PyLegendPrimitiveCollection) -> PyLegendPrimitive:
|
|
304
|
+
result = func(x)
|
|
305
|
+
if not isinstance(result, PyLegendPrimitive):
|
|
306
|
+
raise TypeError(
|
|
307
|
+
f"Custom aggregation function must return a PyLegendPrimitive (Expression).\n"
|
|
308
|
+
f"But got type: {type(result).__name__}\n"
|
|
309
|
+
f"Value: {result!r}"
|
|
310
|
+
) # pragma: no cover
|
|
311
|
+
return result
|
|
312
|
+
|
|
313
|
+
return validation_wrapper
|
|
314
|
+
|
|
315
|
+
def _generate_lambda_source(self, internal_method_name: str) -> str:
|
|
316
|
+
return f"lambda x: x.{internal_method_name}()"
|