pylegend 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylegend/core/database/sql_to_string/db_extension.py +68 -6
- pylegend/core/language/legendql_api/legendql_api_custom_expressions.py +190 -5
- pylegend/core/language/pandas_api/pandas_api_series.py +3 -0
- pylegend/core/sql/metamodel.py +4 -1
- pylegend/core/tds/legendql_api/frames/functions/legendql_api_distinct_function.py +53 -7
- pylegend/core/tds/legendql_api/frames/legendql_api_base_tds_frame.py +146 -4
- pylegend/core/tds/legendql_api/frames/legendql_api_tds_frame.py +33 -2
- pylegend/core/tds/pandas_api/frames/functions/aggregate_function.py +221 -96
- pylegend/core/tds/pandas_api/frames/functions/assign_function.py +65 -23
- pylegend/core/tds/pandas_api/frames/functions/drop.py +3 -3
- pylegend/core/tds/pandas_api/frames/functions/dropna.py +167 -0
- pylegend/core/tds/pandas_api/frames/functions/fillna.py +162 -0
- pylegend/core/tds/pandas_api/frames/functions/filter.py +10 -5
- pylegend/core/tds/pandas_api/frames/functions/merge.py +513 -0
- pylegend/core/tds/pandas_api/frames/functions/rename.py +214 -0
- pylegend/core/tds/pandas_api/frames/functions/truncate_function.py +151 -120
- pylegend/core/tds/pandas_api/frames/pandas_api_applied_function_tds_frame.py +7 -3
- pylegend/core/tds/pandas_api/frames/pandas_api_base_tds_frame.py +559 -18
- pylegend/core/tds/pandas_api/frames/pandas_api_groupby_tds_frame.py +325 -0
- pylegend/core/tds/pandas_api/frames/pandas_api_tds_frame.py +218 -12
- pylegend/extensions/tds/abstract/csv_tds_frame.py +95 -0
- pylegend/extensions/tds/legendql_api/frames/legendql_api_csv_input_frame.py +36 -0
- pylegend/extensions/tds/pandas_api/frames/pandas_api_legend_function_input_frame.py +9 -4
- pylegend/extensions/tds/pandas_api/frames/pandas_api_legend_service_input_frame.py +12 -5
- pylegend/extensions/tds/pandas_api/frames/pandas_api_table_spec_input_frame.py +12 -4
- {pylegend-0.10.0.dist-info → pylegend-0.12.0.dist-info}/METADATA +1 -1
- {pylegend-0.10.0.dist-info → pylegend-0.12.0.dist-info}/RECORD +31 -24
- {pylegend-0.10.0.dist-info → pylegend-0.12.0.dist-info}/WHEEL +0 -0
- {pylegend-0.10.0.dist-info → pylegend-0.12.0.dist-info}/licenses/LICENSE +0 -0
- {pylegend-0.10.0.dist-info → pylegend-0.12.0.dist-info}/licenses/LICENSE.spdx +0 -0
- {pylegend-0.10.0.dist-info → pylegend-0.12.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
# Copyright 2025 Goldman Sachs
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from pylegend._typing import (
|
|
17
|
+
PyLegendOptional,
|
|
18
|
+
PyLegendUnion,
|
|
19
|
+
PyLegendList,
|
|
20
|
+
PyLegendDict,
|
|
21
|
+
TYPE_CHECKING,
|
|
22
|
+
)
|
|
23
|
+
from pylegend.core.language.pandas_api.pandas_api_aggregate_specification import PyLegendAggInput
|
|
24
|
+
from pylegend.core.language.shared.primitives.primitive import PyLegendPrimitiveOrPythonPrimitive
|
|
25
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_base_tds_frame import PandasApiBaseTdsFrame
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_tds_frame import PandasApiTdsFrame
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class PandasApiGroupbyTdsFrame:
|
|
32
|
+
__base_frame: PandasApiBaseTdsFrame
|
|
33
|
+
__by: PyLegendUnion[str, PyLegendList[str]]
|
|
34
|
+
__level: PyLegendOptional[PyLegendUnion[str, int, PyLegendList[str]]]
|
|
35
|
+
__as_index: bool
|
|
36
|
+
__sort: bool
|
|
37
|
+
__group_keys: bool
|
|
38
|
+
__observed: bool
|
|
39
|
+
__dropna: bool
|
|
40
|
+
|
|
41
|
+
__grouping_column_name_list: PyLegendList[str]
|
|
42
|
+
__selected_columns: PyLegendOptional[PyLegendList[str]]
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def name(cls) -> str:
|
|
46
|
+
return "groupby" # pragma: no cover
|
|
47
|
+
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
base_frame: PandasApiBaseTdsFrame,
|
|
51
|
+
by: PyLegendUnion[str, PyLegendList[str]],
|
|
52
|
+
level: PyLegendOptional[PyLegendUnion[str, int, PyLegendList[str]]] = None,
|
|
53
|
+
as_index: bool = False,
|
|
54
|
+
sort: bool = True,
|
|
55
|
+
group_keys: bool = False,
|
|
56
|
+
observed: bool = False,
|
|
57
|
+
dropna: bool = False,
|
|
58
|
+
) -> None:
|
|
59
|
+
self.__base_frame = base_frame
|
|
60
|
+
self.__by = by
|
|
61
|
+
self.__level = level
|
|
62
|
+
self.__as_index = as_index
|
|
63
|
+
self.__sort = sort
|
|
64
|
+
self.__group_keys = group_keys
|
|
65
|
+
self.__observed = observed
|
|
66
|
+
self.__dropna = dropna
|
|
67
|
+
|
|
68
|
+
self.__selected_columns = None
|
|
69
|
+
|
|
70
|
+
self.__validate()
|
|
71
|
+
|
|
72
|
+
def __validate(self) -> None:
|
|
73
|
+
|
|
74
|
+
if self.__level is not None:
|
|
75
|
+
raise NotImplementedError(
|
|
76
|
+
"The 'level' parameter of the groupby function is not supported yet. "
|
|
77
|
+
"Please specify groupby column names using the 'by' parameter."
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
if self.__as_index is not False:
|
|
81
|
+
raise NotImplementedError(
|
|
82
|
+
f"The 'as_index' parameter of the groupby function must be False, "
|
|
83
|
+
f"but got: {self.__as_index} (type: {type(self.__as_index).__name__})"
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
if self.__group_keys is not False:
|
|
87
|
+
raise NotImplementedError(
|
|
88
|
+
f"The 'group_keys' parameter of the groupby function must be False, "
|
|
89
|
+
f"but got: {self.__group_keys} (type: {type(self.__group_keys).__name__})"
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
if self.__observed is not False:
|
|
93
|
+
raise NotImplementedError(
|
|
94
|
+
f"The 'observed' parameter of the groupby function must be False, "
|
|
95
|
+
f"but got: {self.__observed} (type: {type(self.__observed).__name__})"
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
if self.__dropna is not False:
|
|
99
|
+
raise NotImplementedError(
|
|
100
|
+
f"The 'dropna' parameter of the groupby function must be False, "
|
|
101
|
+
f"but got: {self.__dropna} (type: {type(self.__dropna).__name__})"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
input_cols: PyLegendList[str] = []
|
|
105
|
+
if isinstance(self.__by, str):
|
|
106
|
+
input_cols = [self.__by]
|
|
107
|
+
elif isinstance(self.__by, list):
|
|
108
|
+
input_cols = self.__by
|
|
109
|
+
else:
|
|
110
|
+
raise TypeError(
|
|
111
|
+
f"The 'by' parameter in groupby function must be a string or a list of strings."
|
|
112
|
+
f"but got: {self.__by} (type: {type(self.__by).__name__})"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
if len(input_cols) == 0:
|
|
116
|
+
raise ValueError("The 'by' parameter in groupby function must contain at least one column name.")
|
|
117
|
+
|
|
118
|
+
available_columns = {c.get_name() for c in self.__base_frame.columns()}
|
|
119
|
+
missing_cols = [col for col in input_cols if col not in available_columns]
|
|
120
|
+
|
|
121
|
+
if len(missing_cols) > 0:
|
|
122
|
+
raise KeyError(
|
|
123
|
+
f"Column(s) {missing_cols} in groupby function's provided columns list "
|
|
124
|
+
f"do not exist in the current frame. "
|
|
125
|
+
f"Current frame columns: {sorted(available_columns)}"
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
self.__grouping_column_name_list = input_cols.copy()
|
|
129
|
+
|
|
130
|
+
def __getitem__(self, item: PyLegendUnion[str, PyLegendList[str]]) -> "PandasApiGroupbyTdsFrame":
|
|
131
|
+
columns_to_select: PyLegendList[str] = []
|
|
132
|
+
|
|
133
|
+
if isinstance(item, str):
|
|
134
|
+
columns_to_select = [item]
|
|
135
|
+
elif isinstance(item, list):
|
|
136
|
+
columns_to_select = item
|
|
137
|
+
else:
|
|
138
|
+
raise TypeError(
|
|
139
|
+
f"Column selection after groupby function must be a string or a list of strings, "
|
|
140
|
+
f"but got: {item} (type: {type(item).__name__})"
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
if len(columns_to_select) == 0:
|
|
144
|
+
raise ValueError("When performing column selection after groupby, at least one column must be selected.")
|
|
145
|
+
|
|
146
|
+
available_columns = {c.get_name() for c in self.__base_frame.columns()}
|
|
147
|
+
missing_cols = [col for col in columns_to_select if col not in available_columns]
|
|
148
|
+
|
|
149
|
+
if len(missing_cols) > 0:
|
|
150
|
+
raise KeyError(
|
|
151
|
+
f"Column(s) {missing_cols} selected after groupby do not exist in the current frame. "
|
|
152
|
+
f"Current frame columns: {sorted(available_columns)}"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
new_frame = PandasApiGroupbyTdsFrame(
|
|
156
|
+
base_frame=self.__base_frame,
|
|
157
|
+
by=self.__by,
|
|
158
|
+
level=self.__level,
|
|
159
|
+
as_index=self.__as_index,
|
|
160
|
+
sort=self.__sort,
|
|
161
|
+
group_keys=self.__group_keys,
|
|
162
|
+
observed=self.__observed,
|
|
163
|
+
dropna=self.__dropna,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
new_frame.__selected_columns = columns_to_select.copy()
|
|
167
|
+
return new_frame
|
|
168
|
+
|
|
169
|
+
def base_frame(self) -> PandasApiBaseTdsFrame:
|
|
170
|
+
return self.__base_frame
|
|
171
|
+
|
|
172
|
+
def grouping_column_name_list(self) -> PyLegendList[str]:
|
|
173
|
+
return self.__grouping_column_name_list.copy()
|
|
174
|
+
|
|
175
|
+
def selected_columns(self) -> PyLegendOptional[PyLegendList[str]]:
|
|
176
|
+
if self.__selected_columns is None:
|
|
177
|
+
return None
|
|
178
|
+
return self.__selected_columns.copy()
|
|
179
|
+
|
|
180
|
+
def aggregate(
|
|
181
|
+
self,
|
|
182
|
+
func: PyLegendAggInput,
|
|
183
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
184
|
+
*args: PyLegendPrimitiveOrPythonPrimitive,
|
|
185
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive,
|
|
186
|
+
) -> "PandasApiTdsFrame":
|
|
187
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import PandasApiAppliedFunctionTdsFrame
|
|
188
|
+
from pylegend.core.tds.pandas_api.frames.functions.aggregate_function import AggregateFunction
|
|
189
|
+
|
|
190
|
+
aggregated_result: PandasApiAppliedFunctionTdsFrame = PandasApiAppliedFunctionTdsFrame(
|
|
191
|
+
AggregateFunction(self, func, axis, *args, **kwargs)
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
if self.__sort:
|
|
195
|
+
from pylegend.core.tds.pandas_api.frames.functions.sort_values_function import SortValuesFunction
|
|
196
|
+
|
|
197
|
+
aggregated_result = PandasApiAppliedFunctionTdsFrame(
|
|
198
|
+
SortValuesFunction(
|
|
199
|
+
base_frame=aggregated_result,
|
|
200
|
+
by=self.grouping_column_name_list(),
|
|
201
|
+
axis=0,
|
|
202
|
+
ascending=True,
|
|
203
|
+
inplace=False,
|
|
204
|
+
kind=None,
|
|
205
|
+
na_position="last",
|
|
206
|
+
ignore_index=True,
|
|
207
|
+
key=None,
|
|
208
|
+
)
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
return aggregated_result
|
|
212
|
+
|
|
213
|
+
def agg(
|
|
214
|
+
self,
|
|
215
|
+
func: PyLegendAggInput,
|
|
216
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
217
|
+
*args: PyLegendPrimitiveOrPythonPrimitive,
|
|
218
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive,
|
|
219
|
+
) -> "PandasApiTdsFrame":
|
|
220
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import PandasApiAppliedFunctionTdsFrame
|
|
221
|
+
from pylegend.core.tds.pandas_api.frames.functions.aggregate_function import AggregateFunction
|
|
222
|
+
|
|
223
|
+
return PandasApiAppliedFunctionTdsFrame(AggregateFunction(self, func, axis, *args, **kwargs))
|
|
224
|
+
|
|
225
|
+
def sum(
|
|
226
|
+
self,
|
|
227
|
+
numeric_only: bool = False,
|
|
228
|
+
min_count: int = 0,
|
|
229
|
+
engine: PyLegendOptional[str] = None,
|
|
230
|
+
engine_kwargs: PyLegendOptional[PyLegendDict[str, bool]] = None,
|
|
231
|
+
) -> "PandasApiTdsFrame":
|
|
232
|
+
if numeric_only is not False:
|
|
233
|
+
raise NotImplementedError("numeric_only=True is not currently supported in sum function.")
|
|
234
|
+
if min_count != 0:
|
|
235
|
+
raise NotImplementedError(f"min_count must be 0 in sum function, but got: {min_count}")
|
|
236
|
+
if engine is not None:
|
|
237
|
+
raise NotImplementedError("engine parameter is not supported in sum function.")
|
|
238
|
+
if engine_kwargs is not None:
|
|
239
|
+
raise NotImplementedError("engine_kwargs parameter is not supported in sum function.")
|
|
240
|
+
return self.aggregate("sum", 0)
|
|
241
|
+
|
|
242
|
+
def mean(
|
|
243
|
+
self,
|
|
244
|
+
numeric_only: bool = False,
|
|
245
|
+
engine: PyLegendOptional[str] = None,
|
|
246
|
+
engine_kwargs: PyLegendOptional[PyLegendDict[str, bool]] = None,
|
|
247
|
+
) -> "PandasApiTdsFrame":
|
|
248
|
+
if numeric_only is not False:
|
|
249
|
+
raise NotImplementedError("numeric_only=True is not currently supported in mean function.")
|
|
250
|
+
if engine is not None:
|
|
251
|
+
raise NotImplementedError("engine parameter is not supported in mean function.")
|
|
252
|
+
if engine_kwargs is not None:
|
|
253
|
+
raise NotImplementedError("engine_kwargs parameter is not supported in mean function.")
|
|
254
|
+
return self.aggregate("mean", 0)
|
|
255
|
+
|
|
256
|
+
def min(
|
|
257
|
+
self,
|
|
258
|
+
numeric_only: bool = False,
|
|
259
|
+
min_count: int = -1,
|
|
260
|
+
engine: PyLegendOptional[str] = None,
|
|
261
|
+
engine_kwargs: PyLegendOptional[PyLegendDict[str, bool]] = None,
|
|
262
|
+
) -> "PandasApiTdsFrame":
|
|
263
|
+
if numeric_only is not False:
|
|
264
|
+
raise NotImplementedError("numeric_only=True is not currently supported in min function.")
|
|
265
|
+
if min_count != -1:
|
|
266
|
+
raise NotImplementedError(f"min_count must be -1 (default) in min function, but got: {min_count}")
|
|
267
|
+
if engine is not None:
|
|
268
|
+
raise NotImplementedError("engine parameter is not supported in min function.")
|
|
269
|
+
if engine_kwargs is not None:
|
|
270
|
+
raise NotImplementedError("engine_kwargs parameter is not supported in min function.")
|
|
271
|
+
return self.aggregate("min", 0)
|
|
272
|
+
|
|
273
|
+
def max(
|
|
274
|
+
self,
|
|
275
|
+
numeric_only: bool = False,
|
|
276
|
+
min_count: int = -1,
|
|
277
|
+
engine: PyLegendOptional[str] = None,
|
|
278
|
+
engine_kwargs: PyLegendOptional[PyLegendDict[str, bool]] = None,
|
|
279
|
+
) -> "PandasApiTdsFrame":
|
|
280
|
+
if numeric_only is not False:
|
|
281
|
+
raise NotImplementedError("numeric_only=True is not currently supported in max function.")
|
|
282
|
+
if min_count != -1:
|
|
283
|
+
raise NotImplementedError(f"min_count must be -1 (default) in max function, but got: {min_count}")
|
|
284
|
+
if engine is not None:
|
|
285
|
+
raise NotImplementedError("engine parameter is not supported in max function.")
|
|
286
|
+
if engine_kwargs is not None:
|
|
287
|
+
raise NotImplementedError("engine_kwargs parameter is not supported in max function.")
|
|
288
|
+
return self.aggregate("max", 0)
|
|
289
|
+
|
|
290
|
+
def std(
|
|
291
|
+
self,
|
|
292
|
+
ddof: int = 1,
|
|
293
|
+
engine: PyLegendOptional[str] = None,
|
|
294
|
+
engine_kwargs: PyLegendOptional[PyLegendDict[str, bool]] = None,
|
|
295
|
+
numeric_only: bool = False,
|
|
296
|
+
) -> "PandasApiTdsFrame":
|
|
297
|
+
if ddof != 1:
|
|
298
|
+
raise NotImplementedError(f"Only ddof=1 (Sample Standard Deviation) is supported in std function, but got: {ddof}")
|
|
299
|
+
if engine is not None:
|
|
300
|
+
raise NotImplementedError("engine parameter is not supported in std function.")
|
|
301
|
+
if engine_kwargs is not None:
|
|
302
|
+
raise NotImplementedError("engine_kwargs parameter is not supported in std function.")
|
|
303
|
+
if numeric_only is not False:
|
|
304
|
+
raise NotImplementedError("numeric_only=True is not currently supported in std function.")
|
|
305
|
+
return self.aggregate("std", 0)
|
|
306
|
+
|
|
307
|
+
def var(
|
|
308
|
+
self,
|
|
309
|
+
ddof: int = 1,
|
|
310
|
+
engine: PyLegendOptional[str] = None,
|
|
311
|
+
engine_kwargs: PyLegendOptional[PyLegendDict[str, bool]] = None,
|
|
312
|
+
numeric_only: bool = False,
|
|
313
|
+
) -> "PandasApiTdsFrame":
|
|
314
|
+
if ddof != 1:
|
|
315
|
+
raise NotImplementedError(f"Only ddof=1 (Sample Variance) is supported in var function, but got: {ddof}")
|
|
316
|
+
if engine is not None:
|
|
317
|
+
raise NotImplementedError("engine parameter is not supported in var function.")
|
|
318
|
+
if engine_kwargs is not None:
|
|
319
|
+
raise NotImplementedError("engine_kwargs parameter is not supported in var function.")
|
|
320
|
+
if numeric_only is not False:
|
|
321
|
+
raise NotImplementedError("numeric_only=True is not currently supported in var function.")
|
|
322
|
+
return self.aggregate("var", 0)
|
|
323
|
+
|
|
324
|
+
def count(self) -> "PandasApiTdsFrame":
|
|
325
|
+
return self.aggregate("count", 0)
|
|
@@ -16,6 +16,13 @@ from abc import abstractmethod
|
|
|
16
16
|
from datetime import date, datetime
|
|
17
17
|
from typing import TYPE_CHECKING
|
|
18
18
|
|
|
19
|
+
from typing_extensions import Concatenate
|
|
20
|
+
|
|
21
|
+
try:
|
|
22
|
+
from typing import ParamSpec
|
|
23
|
+
except Exception:
|
|
24
|
+
from typing_extensions import ParamSpec # type: ignore
|
|
25
|
+
|
|
19
26
|
from pylegend._typing import (
|
|
20
27
|
PyLegendCallable,
|
|
21
28
|
PyLegendSequence,
|
|
@@ -23,11 +30,13 @@ from pylegend._typing import (
|
|
|
23
30
|
PyLegendOptional,
|
|
24
31
|
PyLegendList,
|
|
25
32
|
PyLegendSet,
|
|
33
|
+
PyLegendTuple,
|
|
34
|
+
PyLegendDict
|
|
26
35
|
)
|
|
27
|
-
from pylegend.core.language.pandas_api.pandas_api_aggregate_specification import PyLegendAggInput
|
|
28
36
|
from pylegend.core.language import (
|
|
29
37
|
PyLegendPrimitive,
|
|
30
38
|
)
|
|
39
|
+
from pylegend.core.language.pandas_api.pandas_api_aggregate_specification import PyLegendAggInput
|
|
31
40
|
from pylegend.core.language.pandas_api.pandas_api_tds_row import PandasApiTdsRow
|
|
32
41
|
from pylegend.core.language.shared.primitives.boolean import PyLegendBoolean
|
|
33
42
|
from pylegend.core.language.shared.primitives.integer import PyLegendInteger
|
|
@@ -37,11 +46,14 @@ from pylegend.core.tds.tds_frame import PyLegendTdsFrame
|
|
|
37
46
|
|
|
38
47
|
if TYPE_CHECKING:
|
|
39
48
|
from pylegend.core.language.pandas_api.pandas_api_series import Series
|
|
49
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_groupby_tds_frame import PandasApiGroupbyTdsFrame
|
|
40
50
|
|
|
41
51
|
__all__: PyLegendSequence[str] = [
|
|
42
52
|
"PandasApiTdsFrame"
|
|
43
53
|
]
|
|
44
54
|
|
|
55
|
+
P = ParamSpec("P")
|
|
56
|
+
|
|
45
57
|
|
|
46
58
|
class PandasApiTdsFrame(PyLegendTdsFrame):
|
|
47
59
|
|
|
@@ -52,6 +64,14 @@ class PandasApiTdsFrame(PyLegendTdsFrame):
|
|
|
52
64
|
) -> PyLegendUnion["PandasApiTdsFrame", "Series"]:
|
|
53
65
|
pass # pragma: no cover
|
|
54
66
|
|
|
67
|
+
@abstractmethod
|
|
68
|
+
def __setitem__(
|
|
69
|
+
self,
|
|
70
|
+
key: str,
|
|
71
|
+
value: PyLegendUnion["Series", PyLegendPrimitiveOrPythonPrimitive]
|
|
72
|
+
) -> None:
|
|
73
|
+
pass # pragma: no cover
|
|
74
|
+
|
|
55
75
|
@abstractmethod
|
|
56
76
|
def assign(
|
|
57
77
|
self,
|
|
@@ -104,27 +124,213 @@ class PandasApiTdsFrame(PyLegendTdsFrame):
|
|
|
104
124
|
index: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str], PyLegendSet[str]]] = None,
|
|
105
125
|
columns: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str], PyLegendSet[str]]] = None,
|
|
106
126
|
level: PyLegendOptional[PyLegendUnion[int, PyLegendInteger, str]] = None,
|
|
107
|
-
inplace: PyLegendUnion[bool, PyLegendBoolean] =
|
|
127
|
+
inplace: PyLegendUnion[bool, PyLegendBoolean] = False,
|
|
108
128
|
errors: str = "raise",
|
|
109
129
|
) -> "PandasApiTdsFrame":
|
|
110
130
|
pass # pragma: no cover
|
|
111
131
|
|
|
112
132
|
@abstractmethod
|
|
113
133
|
def aggregate(
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
134
|
+
self,
|
|
135
|
+
func: PyLegendAggInput,
|
|
136
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
137
|
+
*args: PyLegendPrimitiveOrPythonPrimitive,
|
|
138
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
119
139
|
) -> "PandasApiTdsFrame":
|
|
120
140
|
pass # pragma: no cover
|
|
121
141
|
|
|
122
142
|
@abstractmethod
|
|
123
143
|
def agg(
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
144
|
+
self,
|
|
145
|
+
func: PyLegendAggInput,
|
|
146
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
147
|
+
*args: PyLegendPrimitiveOrPythonPrimitive,
|
|
148
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
149
|
+
) -> "PandasApiTdsFrame":
|
|
150
|
+
pass # pragma: no cover
|
|
151
|
+
|
|
152
|
+
@abstractmethod
|
|
153
|
+
def merge(
|
|
154
|
+
self,
|
|
155
|
+
other: "PandasApiTdsFrame",
|
|
156
|
+
how: PyLegendOptional[str] = "inner",
|
|
157
|
+
on: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
|
|
158
|
+
left_on: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
|
|
159
|
+
right_on: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
|
|
160
|
+
left_index: PyLegendOptional[bool] = False,
|
|
161
|
+
right_index: PyLegendOptional[bool] = False,
|
|
162
|
+
sort: PyLegendOptional[bool] = False,
|
|
163
|
+
suffixes: PyLegendOptional[
|
|
164
|
+
PyLegendUnion[
|
|
165
|
+
PyLegendTuple[PyLegendUnion[str, None], PyLegendUnion[str, None]],
|
|
166
|
+
PyLegendList[PyLegendUnion[str, None]],
|
|
167
|
+
]
|
|
168
|
+
] = ("_x", "_y"),
|
|
169
|
+
indicator: PyLegendOptional[PyLegendUnion[bool, str]] = False,
|
|
170
|
+
validate: PyLegendOptional[str] = None
|
|
171
|
+
) -> "PandasApiTdsFrame":
|
|
172
|
+
pass # pragma: no cover
|
|
173
|
+
|
|
174
|
+
@abstractmethod
|
|
175
|
+
def join(
|
|
176
|
+
self,
|
|
177
|
+
other: "PandasApiTdsFrame",
|
|
178
|
+
on: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
|
|
179
|
+
how: PyLegendOptional[str] = "left",
|
|
180
|
+
lsuffix: str = "",
|
|
181
|
+
rsuffix: str = "",
|
|
182
|
+
sort: PyLegendOptional[bool] = False,
|
|
183
|
+
validate: PyLegendOptional[str] = None
|
|
184
|
+
) -> "PandasApiTdsFrame":
|
|
185
|
+
pass # pragma: no cover
|
|
186
|
+
|
|
187
|
+
@abstractmethod
|
|
188
|
+
def rename(
|
|
189
|
+
self,
|
|
190
|
+
mapper: PyLegendOptional[PyLegendUnion[PyLegendDict[str, str], PyLegendCallable[[str], str]]] = None,
|
|
191
|
+
index: PyLegendOptional[PyLegendUnion[PyLegendDict[str, str], PyLegendCallable[[str], str]]] = None,
|
|
192
|
+
columns: PyLegendOptional[PyLegendUnion[PyLegendDict[str, str], PyLegendCallable[[str], str]]] = None,
|
|
193
|
+
axis: PyLegendUnion[str, int] = 1,
|
|
194
|
+
inplace: PyLegendUnion[bool] = False,
|
|
195
|
+
copy: PyLegendUnion[bool] = True,
|
|
196
|
+
level: PyLegendOptional[PyLegendUnion[int, str]] = None,
|
|
197
|
+
errors: str = "ignore",
|
|
198
|
+
) -> "PandasApiTdsFrame":
|
|
199
|
+
pass # pragma: no cover
|
|
200
|
+
|
|
201
|
+
@abstractmethod
|
|
202
|
+
def groupby(
|
|
203
|
+
self,
|
|
204
|
+
by: PyLegendUnion[str, PyLegendList[str]],
|
|
205
|
+
level: PyLegendOptional[PyLegendUnion[str, int, PyLegendList[str]]] = None,
|
|
206
|
+
as_index: bool = False,
|
|
207
|
+
sort: bool = True,
|
|
208
|
+
group_keys: bool = False,
|
|
209
|
+
observed: bool = False,
|
|
210
|
+
dropna: bool = False,
|
|
211
|
+
) -> "PandasApiGroupbyTdsFrame":
|
|
212
|
+
pass # pragma: no cover
|
|
213
|
+
|
|
214
|
+
@abstractmethod
|
|
215
|
+
def sum(
|
|
216
|
+
self,
|
|
217
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
218
|
+
skipna: bool = True,
|
|
219
|
+
numeric_only: bool = False,
|
|
220
|
+
min_count: int = 0,
|
|
221
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
222
|
+
) -> "PandasApiTdsFrame":
|
|
223
|
+
pass # pragma: no cover
|
|
224
|
+
|
|
225
|
+
@abstractmethod
|
|
226
|
+
def mean(
|
|
227
|
+
self,
|
|
228
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
229
|
+
skipna: bool = True,
|
|
230
|
+
numeric_only: bool = False,
|
|
231
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
232
|
+
) -> "PandasApiTdsFrame":
|
|
233
|
+
pass # pragma: no cover
|
|
234
|
+
|
|
235
|
+
@abstractmethod
|
|
236
|
+
def min(
|
|
237
|
+
self,
|
|
238
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
239
|
+
skipna: bool = True,
|
|
240
|
+
numeric_only: bool = False,
|
|
241
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
242
|
+
) -> "PandasApiTdsFrame":
|
|
243
|
+
pass # pragma: no cover
|
|
244
|
+
|
|
245
|
+
@abstractmethod
|
|
246
|
+
def max(
|
|
247
|
+
self,
|
|
248
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
249
|
+
skipna: bool = True,
|
|
250
|
+
numeric_only: bool = False,
|
|
251
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
252
|
+
) -> "PandasApiTdsFrame":
|
|
253
|
+
pass # pragma: no cover
|
|
254
|
+
|
|
255
|
+
@abstractmethod
|
|
256
|
+
def std(
|
|
257
|
+
self,
|
|
258
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
259
|
+
skipna: bool = True,
|
|
260
|
+
ddof: int = 1,
|
|
261
|
+
numeric_only: bool = False,
|
|
262
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
263
|
+
) -> "PandasApiTdsFrame":
|
|
264
|
+
pass # pragma: no cover
|
|
265
|
+
|
|
266
|
+
@abstractmethod
|
|
267
|
+
def var(
|
|
268
|
+
self,
|
|
269
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
270
|
+
skipna: bool = True,
|
|
271
|
+
ddof: int = 1,
|
|
272
|
+
numeric_only: bool = False,
|
|
273
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
274
|
+
) -> "PandasApiTdsFrame":
|
|
275
|
+
pass # pragma: no cover
|
|
276
|
+
|
|
277
|
+
@abstractmethod
|
|
278
|
+
def count(
|
|
279
|
+
self,
|
|
280
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
281
|
+
numeric_only: bool = False,
|
|
282
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
283
|
+
) -> "PandasApiTdsFrame":
|
|
284
|
+
pass # pragma: no cover
|
|
285
|
+
|
|
286
|
+
@abstractmethod
|
|
287
|
+
def apply(
|
|
288
|
+
self,
|
|
289
|
+
func: PyLegendUnion[
|
|
290
|
+
PyLegendCallable[Concatenate["Series", P], PyLegendPrimitiveOrPythonPrimitive],
|
|
291
|
+
str
|
|
292
|
+
],
|
|
293
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
294
|
+
raw: bool = False,
|
|
295
|
+
result_type: PyLegendOptional[str] = None,
|
|
296
|
+
args: PyLegendTuple[PyLegendPrimitiveOrPythonPrimitive, ...] = (),
|
|
297
|
+
by_row: PyLegendUnion[bool, str] = "compat",
|
|
298
|
+
engine: str = "python",
|
|
299
|
+
engine_kwargs: PyLegendOptional[PyLegendDict[str, PyLegendPrimitiveOrPythonPrimitive]] = None,
|
|
300
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
301
|
+
) -> "PandasApiTdsFrame":
|
|
302
|
+
pass # pragma: no cover
|
|
303
|
+
|
|
304
|
+
@abstractmethod
|
|
305
|
+
def head(self, n: int = 5) -> "PandasApiTdsFrame":
|
|
306
|
+
pass # pragma: no cover
|
|
307
|
+
|
|
308
|
+
@property
|
|
309
|
+
@abstractmethod
|
|
310
|
+
def shape(self) -> PyLegendTuple[int, int]:
|
|
311
|
+
pass # pragma: no cover
|
|
312
|
+
|
|
313
|
+
@abstractmethod
|
|
314
|
+
def dropna(
|
|
315
|
+
self,
|
|
316
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
317
|
+
how: str = "any",
|
|
318
|
+
thresh: PyLegendOptional[int] = None,
|
|
319
|
+
subset: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
|
|
320
|
+
inplace: bool = False,
|
|
321
|
+
ignore_index: bool = False
|
|
322
|
+
) -> "PandasApiTdsFrame":
|
|
323
|
+
pass # pragma: no cover
|
|
324
|
+
|
|
325
|
+
@abstractmethod
|
|
326
|
+
def fillna(
|
|
327
|
+
self,
|
|
328
|
+
value: PyLegendUnion[
|
|
329
|
+
int, float, str, bool, date, datetime,
|
|
330
|
+
PyLegendDict[str, PyLegendUnion[int, float, str, bool, date, datetime]]
|
|
331
|
+
] = None, # type: ignore
|
|
332
|
+
axis: PyLegendOptional[PyLegendUnion[int, str]] = 0,
|
|
333
|
+
inplace: bool = False,
|
|
334
|
+
limit: PyLegendOptional[int] = None
|
|
129
335
|
) -> "PandasApiTdsFrame":
|
|
130
336
|
pass # pragma: no cover
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# Copyright 2025 Goldman Sachs
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
from abc import ABCMeta
|
|
15
|
+
from pylegend._typing import (
|
|
16
|
+
PyLegendSequence,
|
|
17
|
+
PyLegendList,
|
|
18
|
+
)
|
|
19
|
+
from io import StringIO
|
|
20
|
+
from pylegend.core.tds.tds_column import (
|
|
21
|
+
PrimitiveType,
|
|
22
|
+
PrimitiveTdsColumn)
|
|
23
|
+
from pylegend.core.tds.tds_frame import FrameToPureConfig, FrameToSqlConfig, PyLegendTdsFrame
|
|
24
|
+
from pylegend.core.sql.metamodel import (
|
|
25
|
+
QuerySpecification,
|
|
26
|
+
)
|
|
27
|
+
import pandas as pd
|
|
28
|
+
|
|
29
|
+
__all__: PyLegendSequence[str] = [
|
|
30
|
+
"CsvInputFrameAbstract",
|
|
31
|
+
"tds_columns_from_csv_string"
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class CsvInputFrameAbstract(PyLegendTdsFrame, metaclass=ABCMeta):
|
|
36
|
+
__csv_string: str
|
|
37
|
+
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
csv_string: str,
|
|
41
|
+
) -> None:
|
|
42
|
+
super().__init__(columns=tds_columns_from_csv_string(csv_string)) # type: ignore[call-arg]
|
|
43
|
+
self.__csv_string = csv_string
|
|
44
|
+
|
|
45
|
+
def to_sql_query_object(self, config: FrameToSqlConfig) -> QuerySpecification:
|
|
46
|
+
raise RuntimeError("SQL generation for csv tds frames is not supported yet.")
|
|
47
|
+
|
|
48
|
+
def to_pure(self, config: FrameToPureConfig) -> str:
|
|
49
|
+
return f"#TDS\n{self.__csv_string}#"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def tds_columns_from_csv_string(
|
|
53
|
+
csv_string: str
|
|
54
|
+
) -> PyLegendList[PrimitiveTdsColumn]:
|
|
55
|
+
df = pd.read_csv(StringIO(csv_string))
|
|
56
|
+
tds_columns = []
|
|
57
|
+
dt = pd.api.types
|
|
58
|
+
|
|
59
|
+
for col in df.columns:
|
|
60
|
+
dtype = df[col].dtype
|
|
61
|
+
|
|
62
|
+
if dt.is_bool_dtype(dtype):
|
|
63
|
+
primitive_type = PrimitiveType.Boolean
|
|
64
|
+
|
|
65
|
+
elif dt.is_integer_dtype(dtype):
|
|
66
|
+
primitive_type = PrimitiveType.Integer
|
|
67
|
+
|
|
68
|
+
elif dt.is_float_dtype(dtype):
|
|
69
|
+
primitive_type = PrimitiveType.Float
|
|
70
|
+
|
|
71
|
+
elif is_strict_date_or_datetime(df[col]):
|
|
72
|
+
primitive_type = PrimitiveType.Date
|
|
73
|
+
|
|
74
|
+
else:
|
|
75
|
+
primitive_type = PrimitiveType.String
|
|
76
|
+
|
|
77
|
+
tds_columns.append(
|
|
78
|
+
PrimitiveTdsColumn(name=col, _type=primitive_type)
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
return tds_columns
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def is_strict_date_or_datetime(col: pd.Series) -> bool: # type: ignore[explicit-any]
|
|
85
|
+
try:
|
|
86
|
+
pd.to_datetime(col, format="%Y-%m-%d %H:%M:%S", exact=True, errors="raise")
|
|
87
|
+
return True
|
|
88
|
+
except (ValueError, TypeError):
|
|
89
|
+
pass
|
|
90
|
+
|
|
91
|
+
try:
|
|
92
|
+
pd.to_datetime(col, format="%Y-%m-%d", exact=True, errors="raise")
|
|
93
|
+
return True
|
|
94
|
+
except (ValueError, TypeError):
|
|
95
|
+
return False
|