pylegend 0.10.0__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylegend/core/tds/pandas_api/frames/functions/aggregate_function.py +221 -96
- pylegend/core/tds/pandas_api/frames/functions/merge.py +513 -0
- pylegend/core/tds/pandas_api/frames/functions/rename.py +214 -0
- pylegend/core/tds/pandas_api/frames/pandas_api_base_tds_frame.py +275 -0
- pylegend/core/tds/pandas_api/frames/pandas_api_groupby_tds_frame.py +325 -0
- pylegend/core/tds/pandas_api/frames/pandas_api_tds_frame.py +147 -10
- pylegend/extensions/tds/abstract/csv_tds_frame.py +95 -0
- pylegend/extensions/tds/legendql_api/frames/legendql_api_csv_input_frame.py +36 -0
- {pylegend-0.10.0.dist-info → pylegend-0.11.0.dist-info}/METADATA +1 -1
- {pylegend-0.10.0.dist-info → pylegend-0.11.0.dist-info}/RECORD +14 -9
- {pylegend-0.10.0.dist-info → pylegend-0.11.0.dist-info}/WHEEL +0 -0
- {pylegend-0.10.0.dist-info → pylegend-0.11.0.dist-info}/licenses/LICENSE +0 -0
- {pylegend-0.10.0.dist-info → pylegend-0.11.0.dist-info}/licenses/LICENSE.spdx +0 -0
- {pylegend-0.10.0.dist-info → pylegend-0.11.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
# Copyright 2025 Goldman Sachs
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from pylegend._typing import (
|
|
17
|
+
PyLegendOptional,
|
|
18
|
+
PyLegendUnion,
|
|
19
|
+
PyLegendList,
|
|
20
|
+
PyLegendDict,
|
|
21
|
+
TYPE_CHECKING,
|
|
22
|
+
)
|
|
23
|
+
from pylegend.core.language.pandas_api.pandas_api_aggregate_specification import PyLegendAggInput
|
|
24
|
+
from pylegend.core.language.shared.primitives.primitive import PyLegendPrimitiveOrPythonPrimitive
|
|
25
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_base_tds_frame import PandasApiBaseTdsFrame
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_tds_frame import PandasApiTdsFrame
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class PandasApiGroupbyTdsFrame:
|
|
32
|
+
__base_frame: PandasApiBaseTdsFrame
|
|
33
|
+
__by: PyLegendUnion[str, PyLegendList[str]]
|
|
34
|
+
__level: PyLegendOptional[PyLegendUnion[str, int, PyLegendList[str]]]
|
|
35
|
+
__as_index: bool
|
|
36
|
+
__sort: bool
|
|
37
|
+
__group_keys: bool
|
|
38
|
+
__observed: bool
|
|
39
|
+
__dropna: bool
|
|
40
|
+
|
|
41
|
+
__grouping_column_name_list: PyLegendList[str]
|
|
42
|
+
__selected_columns: PyLegendOptional[PyLegendList[str]]
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def name(cls) -> str:
|
|
46
|
+
return "groupby" # pragma: no cover
|
|
47
|
+
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
base_frame: PandasApiBaseTdsFrame,
|
|
51
|
+
by: PyLegendUnion[str, PyLegendList[str]],
|
|
52
|
+
level: PyLegendOptional[PyLegendUnion[str, int, PyLegendList[str]]] = None,
|
|
53
|
+
as_index: bool = False,
|
|
54
|
+
sort: bool = True,
|
|
55
|
+
group_keys: bool = False,
|
|
56
|
+
observed: bool = False,
|
|
57
|
+
dropna: bool = False,
|
|
58
|
+
) -> None:
|
|
59
|
+
self.__base_frame = base_frame
|
|
60
|
+
self.__by = by
|
|
61
|
+
self.__level = level
|
|
62
|
+
self.__as_index = as_index
|
|
63
|
+
self.__sort = sort
|
|
64
|
+
self.__group_keys = group_keys
|
|
65
|
+
self.__observed = observed
|
|
66
|
+
self.__dropna = dropna
|
|
67
|
+
|
|
68
|
+
self.__selected_columns = None
|
|
69
|
+
|
|
70
|
+
self.__validate()
|
|
71
|
+
|
|
72
|
+
def __validate(self) -> None:
|
|
73
|
+
|
|
74
|
+
if self.__level is not None:
|
|
75
|
+
raise NotImplementedError(
|
|
76
|
+
"The 'level' parameter of the groupby function is not supported yet. "
|
|
77
|
+
"Please specify groupby column names using the 'by' parameter."
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
if self.__as_index is not False:
|
|
81
|
+
raise NotImplementedError(
|
|
82
|
+
f"The 'as_index' parameter of the groupby function must be False, "
|
|
83
|
+
f"but got: {self.__as_index} (type: {type(self.__as_index).__name__})"
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
if self.__group_keys is not False:
|
|
87
|
+
raise NotImplementedError(
|
|
88
|
+
f"The 'group_keys' parameter of the groupby function must be False, "
|
|
89
|
+
f"but got: {self.__group_keys} (type: {type(self.__group_keys).__name__})"
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
if self.__observed is not False:
|
|
93
|
+
raise NotImplementedError(
|
|
94
|
+
f"The 'observed' parameter of the groupby function must be False, "
|
|
95
|
+
f"but got: {self.__observed} (type: {type(self.__observed).__name__})"
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
if self.__dropna is not False:
|
|
99
|
+
raise NotImplementedError(
|
|
100
|
+
f"The 'dropna' parameter of the groupby function must be False, "
|
|
101
|
+
f"but got: {self.__dropna} (type: {type(self.__dropna).__name__})"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
input_cols: PyLegendList[str] = []
|
|
105
|
+
if isinstance(self.__by, str):
|
|
106
|
+
input_cols = [self.__by]
|
|
107
|
+
elif isinstance(self.__by, list):
|
|
108
|
+
input_cols = self.__by
|
|
109
|
+
else:
|
|
110
|
+
raise TypeError(
|
|
111
|
+
f"The 'by' parameter in groupby function must be a string or a list of strings."
|
|
112
|
+
f"but got: {self.__by} (type: {type(self.__by).__name__})"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
if len(input_cols) == 0:
|
|
116
|
+
raise ValueError("The 'by' parameter in groupby function must contain at least one column name.")
|
|
117
|
+
|
|
118
|
+
available_columns = {c.get_name() for c in self.__base_frame.columns()}
|
|
119
|
+
missing_cols = [col for col in input_cols if col not in available_columns]
|
|
120
|
+
|
|
121
|
+
if len(missing_cols) > 0:
|
|
122
|
+
raise KeyError(
|
|
123
|
+
f"Column(s) {missing_cols} in groupby function's provided columns list "
|
|
124
|
+
f"do not exist in the current frame. "
|
|
125
|
+
f"Current frame columns: {sorted(available_columns)}"
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
self.__grouping_column_name_list = input_cols.copy()
|
|
129
|
+
|
|
130
|
+
def __getitem__(self, item: PyLegendUnion[str, PyLegendList[str]]) -> "PandasApiGroupbyTdsFrame":
|
|
131
|
+
columns_to_select: PyLegendList[str] = []
|
|
132
|
+
|
|
133
|
+
if isinstance(item, str):
|
|
134
|
+
columns_to_select = [item]
|
|
135
|
+
elif isinstance(item, list):
|
|
136
|
+
columns_to_select = item
|
|
137
|
+
else:
|
|
138
|
+
raise TypeError(
|
|
139
|
+
f"Column selection after groupby function must be a string or a list of strings, "
|
|
140
|
+
f"but got: {item} (type: {type(item).__name__})"
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
if len(columns_to_select) == 0:
|
|
144
|
+
raise ValueError("When performing column selection after groupby, at least one column must be selected.")
|
|
145
|
+
|
|
146
|
+
available_columns = {c.get_name() for c in self.__base_frame.columns()}
|
|
147
|
+
missing_cols = [col for col in columns_to_select if col not in available_columns]
|
|
148
|
+
|
|
149
|
+
if len(missing_cols) > 0:
|
|
150
|
+
raise KeyError(
|
|
151
|
+
f"Column(s) {missing_cols} selected after groupby do not exist in the current frame. "
|
|
152
|
+
f"Current frame columns: {sorted(available_columns)}"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
new_frame = PandasApiGroupbyTdsFrame(
|
|
156
|
+
base_frame=self.__base_frame,
|
|
157
|
+
by=self.__by,
|
|
158
|
+
level=self.__level,
|
|
159
|
+
as_index=self.__as_index,
|
|
160
|
+
sort=self.__sort,
|
|
161
|
+
group_keys=self.__group_keys,
|
|
162
|
+
observed=self.__observed,
|
|
163
|
+
dropna=self.__dropna,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
new_frame.__selected_columns = columns_to_select.copy()
|
|
167
|
+
return new_frame
|
|
168
|
+
|
|
169
|
+
def base_frame(self) -> PandasApiBaseTdsFrame:
|
|
170
|
+
return self.__base_frame
|
|
171
|
+
|
|
172
|
+
def grouping_column_name_list(self) -> PyLegendList[str]:
|
|
173
|
+
return self.__grouping_column_name_list.copy()
|
|
174
|
+
|
|
175
|
+
def selected_columns(self) -> PyLegendOptional[PyLegendList[str]]:
|
|
176
|
+
if self.__selected_columns is None:
|
|
177
|
+
return None
|
|
178
|
+
return self.__selected_columns.copy()
|
|
179
|
+
|
|
180
|
+
def aggregate(
|
|
181
|
+
self,
|
|
182
|
+
func: PyLegendAggInput,
|
|
183
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
184
|
+
*args: PyLegendPrimitiveOrPythonPrimitive,
|
|
185
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive,
|
|
186
|
+
) -> "PandasApiTdsFrame":
|
|
187
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import PandasApiAppliedFunctionTdsFrame
|
|
188
|
+
from pylegend.core.tds.pandas_api.frames.functions.aggregate_function import AggregateFunction
|
|
189
|
+
|
|
190
|
+
aggregated_result: PandasApiAppliedFunctionTdsFrame = PandasApiAppliedFunctionTdsFrame(
|
|
191
|
+
AggregateFunction(self, func, axis, *args, **kwargs)
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
if self.__sort:
|
|
195
|
+
from pylegend.core.tds.pandas_api.frames.functions.sort_values_function import SortValuesFunction
|
|
196
|
+
|
|
197
|
+
aggregated_result = PandasApiAppliedFunctionTdsFrame(
|
|
198
|
+
SortValuesFunction(
|
|
199
|
+
base_frame=aggregated_result,
|
|
200
|
+
by=self.grouping_column_name_list(),
|
|
201
|
+
axis=0,
|
|
202
|
+
ascending=True,
|
|
203
|
+
inplace=False,
|
|
204
|
+
kind=None,
|
|
205
|
+
na_position="last",
|
|
206
|
+
ignore_index=True,
|
|
207
|
+
key=None,
|
|
208
|
+
)
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
return aggregated_result
|
|
212
|
+
|
|
213
|
+
def agg(
|
|
214
|
+
self,
|
|
215
|
+
func: PyLegendAggInput,
|
|
216
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
217
|
+
*args: PyLegendPrimitiveOrPythonPrimitive,
|
|
218
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive,
|
|
219
|
+
) -> "PandasApiTdsFrame":
|
|
220
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import PandasApiAppliedFunctionTdsFrame
|
|
221
|
+
from pylegend.core.tds.pandas_api.frames.functions.aggregate_function import AggregateFunction
|
|
222
|
+
|
|
223
|
+
return PandasApiAppliedFunctionTdsFrame(AggregateFunction(self, func, axis, *args, **kwargs))
|
|
224
|
+
|
|
225
|
+
def sum(
|
|
226
|
+
self,
|
|
227
|
+
numeric_only: bool = False,
|
|
228
|
+
min_count: int = 0,
|
|
229
|
+
engine: PyLegendOptional[str] = None,
|
|
230
|
+
engine_kwargs: PyLegendOptional[PyLegendDict[str, bool]] = None,
|
|
231
|
+
) -> "PandasApiTdsFrame":
|
|
232
|
+
if numeric_only is not False:
|
|
233
|
+
raise NotImplementedError("numeric_only=True is not currently supported in sum function.")
|
|
234
|
+
if min_count != 0:
|
|
235
|
+
raise NotImplementedError(f"min_count must be 0 in sum function, but got: {min_count}")
|
|
236
|
+
if engine is not None:
|
|
237
|
+
raise NotImplementedError("engine parameter is not supported in sum function.")
|
|
238
|
+
if engine_kwargs is not None:
|
|
239
|
+
raise NotImplementedError("engine_kwargs parameter is not supported in sum function.")
|
|
240
|
+
return self.aggregate("sum", 0)
|
|
241
|
+
|
|
242
|
+
def mean(
|
|
243
|
+
self,
|
|
244
|
+
numeric_only: bool = False,
|
|
245
|
+
engine: PyLegendOptional[str] = None,
|
|
246
|
+
engine_kwargs: PyLegendOptional[PyLegendDict[str, bool]] = None,
|
|
247
|
+
) -> "PandasApiTdsFrame":
|
|
248
|
+
if numeric_only is not False:
|
|
249
|
+
raise NotImplementedError("numeric_only=True is not currently supported in mean function.")
|
|
250
|
+
if engine is not None:
|
|
251
|
+
raise NotImplementedError("engine parameter is not supported in mean function.")
|
|
252
|
+
if engine_kwargs is not None:
|
|
253
|
+
raise NotImplementedError("engine_kwargs parameter is not supported in mean function.")
|
|
254
|
+
return self.aggregate("mean", 0)
|
|
255
|
+
|
|
256
|
+
def min(
|
|
257
|
+
self,
|
|
258
|
+
numeric_only: bool = False,
|
|
259
|
+
min_count: int = -1,
|
|
260
|
+
engine: PyLegendOptional[str] = None,
|
|
261
|
+
engine_kwargs: PyLegendOptional[PyLegendDict[str, bool]] = None,
|
|
262
|
+
) -> "PandasApiTdsFrame":
|
|
263
|
+
if numeric_only is not False:
|
|
264
|
+
raise NotImplementedError("numeric_only=True is not currently supported in min function.")
|
|
265
|
+
if min_count != -1:
|
|
266
|
+
raise NotImplementedError(f"min_count must be -1 (default) in min function, but got: {min_count}")
|
|
267
|
+
if engine is not None:
|
|
268
|
+
raise NotImplementedError("engine parameter is not supported in min function.")
|
|
269
|
+
if engine_kwargs is not None:
|
|
270
|
+
raise NotImplementedError("engine_kwargs parameter is not supported in min function.")
|
|
271
|
+
return self.aggregate("min", 0)
|
|
272
|
+
|
|
273
|
+
def max(
|
|
274
|
+
self,
|
|
275
|
+
numeric_only: bool = False,
|
|
276
|
+
min_count: int = -1,
|
|
277
|
+
engine: PyLegendOptional[str] = None,
|
|
278
|
+
engine_kwargs: PyLegendOptional[PyLegendDict[str, bool]] = None,
|
|
279
|
+
) -> "PandasApiTdsFrame":
|
|
280
|
+
if numeric_only is not False:
|
|
281
|
+
raise NotImplementedError("numeric_only=True is not currently supported in max function.")
|
|
282
|
+
if min_count != -1:
|
|
283
|
+
raise NotImplementedError(f"min_count must be -1 (default) in max function, but got: {min_count}")
|
|
284
|
+
if engine is not None:
|
|
285
|
+
raise NotImplementedError("engine parameter is not supported in max function.")
|
|
286
|
+
if engine_kwargs is not None:
|
|
287
|
+
raise NotImplementedError("engine_kwargs parameter is not supported in max function.")
|
|
288
|
+
return self.aggregate("max", 0)
|
|
289
|
+
|
|
290
|
+
def std(
|
|
291
|
+
self,
|
|
292
|
+
ddof: int = 1,
|
|
293
|
+
engine: PyLegendOptional[str] = None,
|
|
294
|
+
engine_kwargs: PyLegendOptional[PyLegendDict[str, bool]] = None,
|
|
295
|
+
numeric_only: bool = False,
|
|
296
|
+
) -> "PandasApiTdsFrame":
|
|
297
|
+
if ddof != 1:
|
|
298
|
+
raise NotImplementedError(f"Only ddof=1 (Sample Standard Deviation) is supported in std function, but got: {ddof}")
|
|
299
|
+
if engine is not None:
|
|
300
|
+
raise NotImplementedError("engine parameter is not supported in std function.")
|
|
301
|
+
if engine_kwargs is not None:
|
|
302
|
+
raise NotImplementedError("engine_kwargs parameter is not supported in std function.")
|
|
303
|
+
if numeric_only is not False:
|
|
304
|
+
raise NotImplementedError("numeric_only=True is not currently supported in std function.")
|
|
305
|
+
return self.aggregate("std", 0)
|
|
306
|
+
|
|
307
|
+
def var(
|
|
308
|
+
self,
|
|
309
|
+
ddof: int = 1,
|
|
310
|
+
engine: PyLegendOptional[str] = None,
|
|
311
|
+
engine_kwargs: PyLegendOptional[PyLegendDict[str, bool]] = None,
|
|
312
|
+
numeric_only: bool = False,
|
|
313
|
+
) -> "PandasApiTdsFrame":
|
|
314
|
+
if ddof != 1:
|
|
315
|
+
raise NotImplementedError(f"Only ddof=1 (Sample Variance) is supported in var function, but got: {ddof}")
|
|
316
|
+
if engine is not None:
|
|
317
|
+
raise NotImplementedError("engine parameter is not supported in var function.")
|
|
318
|
+
if engine_kwargs is not None:
|
|
319
|
+
raise NotImplementedError("engine_kwargs parameter is not supported in var function.")
|
|
320
|
+
if numeric_only is not False:
|
|
321
|
+
raise NotImplementedError("numeric_only=True is not currently supported in var function.")
|
|
322
|
+
return self.aggregate("var", 0)
|
|
323
|
+
|
|
324
|
+
def count(self) -> "PandasApiTdsFrame":
|
|
325
|
+
return self.aggregate("count", 0)
|
|
@@ -23,11 +23,13 @@ from pylegend._typing import (
|
|
|
23
23
|
PyLegendOptional,
|
|
24
24
|
PyLegendList,
|
|
25
25
|
PyLegendSet,
|
|
26
|
+
PyLegendTuple,
|
|
27
|
+
PyLegendDict
|
|
26
28
|
)
|
|
27
|
-
from pylegend.core.language.pandas_api.pandas_api_aggregate_specification import PyLegendAggInput
|
|
28
29
|
from pylegend.core.language import (
|
|
29
30
|
PyLegendPrimitive,
|
|
30
31
|
)
|
|
32
|
+
from pylegend.core.language.pandas_api.pandas_api_aggregate_specification import PyLegendAggInput
|
|
31
33
|
from pylegend.core.language.pandas_api.pandas_api_tds_row import PandasApiTdsRow
|
|
32
34
|
from pylegend.core.language.shared.primitives.boolean import PyLegendBoolean
|
|
33
35
|
from pylegend.core.language.shared.primitives.integer import PyLegendInteger
|
|
@@ -37,6 +39,7 @@ from pylegend.core.tds.tds_frame import PyLegendTdsFrame
|
|
|
37
39
|
|
|
38
40
|
if TYPE_CHECKING:
|
|
39
41
|
from pylegend.core.language.pandas_api.pandas_api_series import Series
|
|
42
|
+
from pylegend.core.tds.pandas_api.frames.pandas_api_groupby_tds_frame import PandasApiGroupbyTdsFrame
|
|
40
43
|
|
|
41
44
|
__all__: PyLegendSequence[str] = [
|
|
42
45
|
"PandasApiTdsFrame"
|
|
@@ -111,20 +114,154 @@ class PandasApiTdsFrame(PyLegendTdsFrame):
|
|
|
111
114
|
|
|
112
115
|
@abstractmethod
|
|
113
116
|
def aggregate(
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
117
|
+
self,
|
|
118
|
+
func: PyLegendAggInput,
|
|
119
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
120
|
+
*args: PyLegendPrimitiveOrPythonPrimitive,
|
|
121
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
119
122
|
) -> "PandasApiTdsFrame":
|
|
120
123
|
pass # pragma: no cover
|
|
121
124
|
|
|
122
125
|
@abstractmethod
|
|
123
126
|
def agg(
|
|
127
|
+
self,
|
|
128
|
+
func: PyLegendAggInput,
|
|
129
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
130
|
+
*args: PyLegendPrimitiveOrPythonPrimitive,
|
|
131
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
132
|
+
) -> "PandasApiTdsFrame":
|
|
133
|
+
pass # pragma: no cover
|
|
134
|
+
|
|
135
|
+
@abstractmethod
|
|
136
|
+
def merge(
|
|
137
|
+
self,
|
|
138
|
+
other: "PandasApiTdsFrame",
|
|
139
|
+
how: PyLegendOptional[str] = "inner",
|
|
140
|
+
on: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
|
|
141
|
+
left_on: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
|
|
142
|
+
right_on: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
|
|
143
|
+
left_index: PyLegendOptional[bool] = False,
|
|
144
|
+
right_index: PyLegendOptional[bool] = False,
|
|
145
|
+
sort: PyLegendOptional[bool] = False,
|
|
146
|
+
suffixes: PyLegendOptional[
|
|
147
|
+
PyLegendUnion[
|
|
148
|
+
PyLegendTuple[PyLegendUnion[str, None], PyLegendUnion[str, None]],
|
|
149
|
+
PyLegendList[PyLegendUnion[str, None]],
|
|
150
|
+
]
|
|
151
|
+
] = ("_x", "_y"),
|
|
152
|
+
indicator: PyLegendOptional[PyLegendUnion[bool, str]] = False,
|
|
153
|
+
validate: PyLegendOptional[str] = None
|
|
154
|
+
) -> "PandasApiTdsFrame":
|
|
155
|
+
pass # pragma: no cover
|
|
156
|
+
|
|
157
|
+
@abstractmethod
|
|
158
|
+
def join(
|
|
159
|
+
self,
|
|
160
|
+
other: "PandasApiTdsFrame",
|
|
161
|
+
on: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
|
|
162
|
+
how: PyLegendOptional[str] = "left",
|
|
163
|
+
lsuffix: str = "",
|
|
164
|
+
rsuffix: str = "",
|
|
165
|
+
sort: PyLegendOptional[bool] = False,
|
|
166
|
+
validate: PyLegendOptional[str] = None
|
|
167
|
+
) -> "PandasApiTdsFrame":
|
|
168
|
+
pass # pragma: no cover
|
|
169
|
+
|
|
170
|
+
@abstractmethod
|
|
171
|
+
def rename(
|
|
172
|
+
self,
|
|
173
|
+
mapper: PyLegendOptional[PyLegendUnion[PyLegendDict[str, str], PyLegendCallable[[str], str]]] = None,
|
|
174
|
+
index: PyLegendOptional[PyLegendUnion[PyLegendDict[str, str], PyLegendCallable[[str], str]]] = None,
|
|
175
|
+
columns: PyLegendOptional[PyLegendUnion[PyLegendDict[str, str], PyLegendCallable[[str], str]]] = None,
|
|
176
|
+
axis: PyLegendUnion[str, int] = 1,
|
|
177
|
+
inplace: PyLegendUnion[bool] = False,
|
|
178
|
+
copy: PyLegendUnion[bool] = True,
|
|
179
|
+
level: PyLegendOptional[PyLegendUnion[int, str]] = None,
|
|
180
|
+
errors: str = "ignore",
|
|
181
|
+
) -> "PandasApiTdsFrame":
|
|
182
|
+
pass # pragma: no cover
|
|
183
|
+
|
|
184
|
+
@abstractmethod
|
|
185
|
+
def groupby(
|
|
124
186
|
self,
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
187
|
+
by: PyLegendUnion[str, PyLegendList[str]],
|
|
188
|
+
level: PyLegendOptional[PyLegendUnion[str, int, PyLegendList[str]]] = None,
|
|
189
|
+
as_index: bool = False,
|
|
190
|
+
sort: bool = True,
|
|
191
|
+
group_keys: bool = False,
|
|
192
|
+
observed: bool = False,
|
|
193
|
+
dropna: bool = False,
|
|
194
|
+
) -> "PandasApiGroupbyTdsFrame":
|
|
195
|
+
pass # pragma: no cover
|
|
196
|
+
|
|
197
|
+
@abstractmethod
|
|
198
|
+
def sum(
|
|
199
|
+
self,
|
|
200
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
201
|
+
skipna: bool = True,
|
|
202
|
+
numeric_only: bool = False,
|
|
203
|
+
min_count: int = 0,
|
|
204
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
205
|
+
) -> "PandasApiTdsFrame":
|
|
206
|
+
pass # pragma: no cover
|
|
207
|
+
|
|
208
|
+
@abstractmethod
|
|
209
|
+
def mean(
|
|
210
|
+
self,
|
|
211
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
212
|
+
skipna: bool = True,
|
|
213
|
+
numeric_only: bool = False,
|
|
214
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
215
|
+
) -> "PandasApiTdsFrame":
|
|
216
|
+
pass # pragma: no cover
|
|
217
|
+
|
|
218
|
+
@abstractmethod
|
|
219
|
+
def min(
|
|
220
|
+
self,
|
|
221
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
222
|
+
skipna: bool = True,
|
|
223
|
+
numeric_only: bool = False,
|
|
224
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
225
|
+
) -> "PandasApiTdsFrame":
|
|
226
|
+
pass # pragma: no cover
|
|
227
|
+
|
|
228
|
+
@abstractmethod
|
|
229
|
+
def max(
|
|
230
|
+
self,
|
|
231
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
232
|
+
skipna: bool = True,
|
|
233
|
+
numeric_only: bool = False,
|
|
234
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
235
|
+
) -> "PandasApiTdsFrame":
|
|
236
|
+
pass # pragma: no cover
|
|
237
|
+
|
|
238
|
+
@abstractmethod
|
|
239
|
+
def std(
|
|
240
|
+
self,
|
|
241
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
242
|
+
skipna: bool = True,
|
|
243
|
+
ddof: int = 1,
|
|
244
|
+
numeric_only: bool = False,
|
|
245
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
246
|
+
) -> "PandasApiTdsFrame":
|
|
247
|
+
pass # pragma: no cover
|
|
248
|
+
|
|
249
|
+
@abstractmethod
|
|
250
|
+
def var(
|
|
251
|
+
self,
|
|
252
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
253
|
+
skipna: bool = True,
|
|
254
|
+
ddof: int = 1,
|
|
255
|
+
numeric_only: bool = False,
|
|
256
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
257
|
+
) -> "PandasApiTdsFrame":
|
|
258
|
+
pass # pragma: no cover
|
|
259
|
+
|
|
260
|
+
@abstractmethod
|
|
261
|
+
def count(
|
|
262
|
+
self,
|
|
263
|
+
axis: PyLegendUnion[int, str] = 0,
|
|
264
|
+
numeric_only: bool = False,
|
|
265
|
+
**kwargs: PyLegendPrimitiveOrPythonPrimitive
|
|
129
266
|
) -> "PandasApiTdsFrame":
|
|
130
267
|
pass # pragma: no cover
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# Copyright 2025 Goldman Sachs
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
from abc import ABCMeta
|
|
15
|
+
from pylegend._typing import (
|
|
16
|
+
PyLegendSequence,
|
|
17
|
+
PyLegendList,
|
|
18
|
+
)
|
|
19
|
+
from io import StringIO
|
|
20
|
+
from pylegend.core.tds.tds_column import (
|
|
21
|
+
PrimitiveType,
|
|
22
|
+
PrimitiveTdsColumn)
|
|
23
|
+
from pylegend.core.tds.tds_frame import FrameToPureConfig, FrameToSqlConfig, PyLegendTdsFrame
|
|
24
|
+
from pylegend.core.sql.metamodel import (
|
|
25
|
+
QuerySpecification,
|
|
26
|
+
)
|
|
27
|
+
import pandas as pd
|
|
28
|
+
|
|
29
|
+
__all__: PyLegendSequence[str] = [
|
|
30
|
+
"CsvInputFrameAbstract",
|
|
31
|
+
"tds_columns_from_csv_string"
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class CsvInputFrameAbstract(PyLegendTdsFrame, metaclass=ABCMeta):
|
|
36
|
+
__csv_string: str
|
|
37
|
+
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
csv_string: str,
|
|
41
|
+
) -> None:
|
|
42
|
+
super().__init__(columns=tds_columns_from_csv_string(csv_string)) # type: ignore[call-arg]
|
|
43
|
+
self.__csv_string = csv_string
|
|
44
|
+
|
|
45
|
+
def to_sql_query_object(self, config: FrameToSqlConfig) -> QuerySpecification:
|
|
46
|
+
raise RuntimeError("SQL generation for csv tds frames is not supported yet.")
|
|
47
|
+
|
|
48
|
+
def to_pure(self, config: FrameToPureConfig) -> str:
|
|
49
|
+
return f"#TDS\n{self.__csv_string}#"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def tds_columns_from_csv_string(
|
|
53
|
+
csv_string: str
|
|
54
|
+
) -> PyLegendList[PrimitiveTdsColumn]:
|
|
55
|
+
df = pd.read_csv(StringIO(csv_string))
|
|
56
|
+
tds_columns = []
|
|
57
|
+
dt = pd.api.types
|
|
58
|
+
|
|
59
|
+
for col in df.columns:
|
|
60
|
+
dtype = df[col].dtype
|
|
61
|
+
|
|
62
|
+
if dt.is_bool_dtype(dtype):
|
|
63
|
+
primitive_type = PrimitiveType.Boolean
|
|
64
|
+
|
|
65
|
+
elif dt.is_integer_dtype(dtype):
|
|
66
|
+
primitive_type = PrimitiveType.Integer
|
|
67
|
+
|
|
68
|
+
elif dt.is_float_dtype(dtype):
|
|
69
|
+
primitive_type = PrimitiveType.Float
|
|
70
|
+
|
|
71
|
+
elif is_strict_date_or_datetime(df[col]):
|
|
72
|
+
primitive_type = PrimitiveType.Date
|
|
73
|
+
|
|
74
|
+
else:
|
|
75
|
+
primitive_type = PrimitiveType.String
|
|
76
|
+
|
|
77
|
+
tds_columns.append(
|
|
78
|
+
PrimitiveTdsColumn(name=col, _type=primitive_type)
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
return tds_columns
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def is_strict_date_or_datetime(col: pd.Series) -> bool: # type: ignore[explicit-any]
|
|
85
|
+
try:
|
|
86
|
+
pd.to_datetime(col, format="%Y-%m-%d %H:%M:%S", exact=True, errors="raise")
|
|
87
|
+
return True
|
|
88
|
+
except (ValueError, TypeError):
|
|
89
|
+
pass
|
|
90
|
+
|
|
91
|
+
try:
|
|
92
|
+
pd.to_datetime(col, format="%Y-%m-%d", exact=True, errors="raise")
|
|
93
|
+
return True
|
|
94
|
+
except (ValueError, TypeError):
|
|
95
|
+
return False
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Copyright 2025 Goldman Sachs
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
from pylegend._typing import (
|
|
15
|
+
PyLegendSequence,
|
|
16
|
+
)
|
|
17
|
+
from pylegend.core.tds.legendql_api.frames.legendql_api_input_tds_frame import (
|
|
18
|
+
LegendQLApiNonExecutableInputTdsFrame,
|
|
19
|
+
)
|
|
20
|
+
from pylegend.extensions.tds.abstract.csv_tds_frame import CsvInputFrameAbstract
|
|
21
|
+
|
|
22
|
+
__all__: PyLegendSequence[str] = [
|
|
23
|
+
"LegendQLApiCsvNonExecutableInputTdsFrame",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class LegendQLApiCsvNonExecutableInputTdsFrame(
|
|
28
|
+
CsvInputFrameAbstract,
|
|
29
|
+
LegendQLApiNonExecutableInputTdsFrame
|
|
30
|
+
):
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
csv_string: str) -> None:
|
|
35
|
+
CsvInputFrameAbstract.__init__(self, csv_string=csv_string)
|
|
36
|
+
LegendQLApiNonExecutableInputTdsFrame.__init__(self, columns=self.columns())
|