pylegend 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. pylegend/core/database/sql_to_string/db_extension.py +68 -6
  2. pylegend/core/language/legendql_api/legendql_api_custom_expressions.py +190 -5
  3. pylegend/core/language/pandas_api/pandas_api_series.py +3 -0
  4. pylegend/core/sql/metamodel.py +4 -1
  5. pylegend/core/tds/legendql_api/frames/functions/legendql_api_distinct_function.py +53 -7
  6. pylegend/core/tds/legendql_api/frames/legendql_api_base_tds_frame.py +146 -4
  7. pylegend/core/tds/legendql_api/frames/legendql_api_tds_frame.py +33 -2
  8. pylegend/core/tds/pandas_api/frames/functions/aggregate_function.py +221 -96
  9. pylegend/core/tds/pandas_api/frames/functions/assign_function.py +65 -23
  10. pylegend/core/tds/pandas_api/frames/functions/drop.py +3 -3
  11. pylegend/core/tds/pandas_api/frames/functions/dropna.py +167 -0
  12. pylegend/core/tds/pandas_api/frames/functions/fillna.py +162 -0
  13. pylegend/core/tds/pandas_api/frames/functions/filter.py +10 -5
  14. pylegend/core/tds/pandas_api/frames/functions/merge.py +513 -0
  15. pylegend/core/tds/pandas_api/frames/functions/rename.py +214 -0
  16. pylegend/core/tds/pandas_api/frames/functions/truncate_function.py +151 -120
  17. pylegend/core/tds/pandas_api/frames/pandas_api_applied_function_tds_frame.py +7 -3
  18. pylegend/core/tds/pandas_api/frames/pandas_api_base_tds_frame.py +559 -18
  19. pylegend/core/tds/pandas_api/frames/pandas_api_groupby_tds_frame.py +325 -0
  20. pylegend/core/tds/pandas_api/frames/pandas_api_tds_frame.py +218 -12
  21. pylegend/extensions/tds/abstract/csv_tds_frame.py +95 -0
  22. pylegend/extensions/tds/legendql_api/frames/legendql_api_csv_input_frame.py +36 -0
  23. pylegend/extensions/tds/pandas_api/frames/pandas_api_legend_function_input_frame.py +9 -4
  24. pylegend/extensions/tds/pandas_api/frames/pandas_api_legend_service_input_frame.py +12 -5
  25. pylegend/extensions/tds/pandas_api/frames/pandas_api_table_spec_input_frame.py +12 -4
  26. {pylegend-0.10.0.dist-info → pylegend-0.12.0.dist-info}/METADATA +1 -1
  27. {pylegend-0.10.0.dist-info → pylegend-0.12.0.dist-info}/RECORD +31 -24
  28. {pylegend-0.10.0.dist-info → pylegend-0.12.0.dist-info}/WHEEL +0 -0
  29. {pylegend-0.10.0.dist-info → pylegend-0.12.0.dist-info}/licenses/LICENSE +0 -0
  30. {pylegend-0.10.0.dist-info → pylegend-0.12.0.dist-info}/licenses/LICENSE.spdx +0 -0
  31. {pylegend-0.10.0.dist-info → pylegend-0.12.0.dist-info}/licenses/NOTICE +0 -0
@@ -0,0 +1,325 @@
1
+ # Copyright 2025 Goldman Sachs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from pylegend._typing import (
17
+ PyLegendOptional,
18
+ PyLegendUnion,
19
+ PyLegendList,
20
+ PyLegendDict,
21
+ TYPE_CHECKING,
22
+ )
23
+ from pylegend.core.language.pandas_api.pandas_api_aggregate_specification import PyLegendAggInput
24
+ from pylegend.core.language.shared.primitives.primitive import PyLegendPrimitiveOrPythonPrimitive
25
+ from pylegend.core.tds.pandas_api.frames.pandas_api_base_tds_frame import PandasApiBaseTdsFrame
26
+
27
+ if TYPE_CHECKING:
28
+ from pylegend.core.tds.pandas_api.frames.pandas_api_tds_frame import PandasApiTdsFrame
29
+
30
+
31
+ class PandasApiGroupbyTdsFrame:
32
+ __base_frame: PandasApiBaseTdsFrame
33
+ __by: PyLegendUnion[str, PyLegendList[str]]
34
+ __level: PyLegendOptional[PyLegendUnion[str, int, PyLegendList[str]]]
35
+ __as_index: bool
36
+ __sort: bool
37
+ __group_keys: bool
38
+ __observed: bool
39
+ __dropna: bool
40
+
41
+ __grouping_column_name_list: PyLegendList[str]
42
+ __selected_columns: PyLegendOptional[PyLegendList[str]]
43
+
44
+ @classmethod
45
+ def name(cls) -> str:
46
+ return "groupby" # pragma: no cover
47
+
48
+ def __init__(
49
+ self,
50
+ base_frame: PandasApiBaseTdsFrame,
51
+ by: PyLegendUnion[str, PyLegendList[str]],
52
+ level: PyLegendOptional[PyLegendUnion[str, int, PyLegendList[str]]] = None,
53
+ as_index: bool = False,
54
+ sort: bool = True,
55
+ group_keys: bool = False,
56
+ observed: bool = False,
57
+ dropna: bool = False,
58
+ ) -> None:
59
+ self.__base_frame = base_frame
60
+ self.__by = by
61
+ self.__level = level
62
+ self.__as_index = as_index
63
+ self.__sort = sort
64
+ self.__group_keys = group_keys
65
+ self.__observed = observed
66
+ self.__dropna = dropna
67
+
68
+ self.__selected_columns = None
69
+
70
+ self.__validate()
71
+
72
+ def __validate(self) -> None:
73
+
74
+ if self.__level is not None:
75
+ raise NotImplementedError(
76
+ "The 'level' parameter of the groupby function is not supported yet. "
77
+ "Please specify groupby column names using the 'by' parameter."
78
+ )
79
+
80
+ if self.__as_index is not False:
81
+ raise NotImplementedError(
82
+ f"The 'as_index' parameter of the groupby function must be False, "
83
+ f"but got: {self.__as_index} (type: {type(self.__as_index).__name__})"
84
+ )
85
+
86
+ if self.__group_keys is not False:
87
+ raise NotImplementedError(
88
+ f"The 'group_keys' parameter of the groupby function must be False, "
89
+ f"but got: {self.__group_keys} (type: {type(self.__group_keys).__name__})"
90
+ )
91
+
92
+ if self.__observed is not False:
93
+ raise NotImplementedError(
94
+ f"The 'observed' parameter of the groupby function must be False, "
95
+ f"but got: {self.__observed} (type: {type(self.__observed).__name__})"
96
+ )
97
+
98
+ if self.__dropna is not False:
99
+ raise NotImplementedError(
100
+ f"The 'dropna' parameter of the groupby function must be False, "
101
+ f"but got: {self.__dropna} (type: {type(self.__dropna).__name__})"
102
+ )
103
+
104
+ input_cols: PyLegendList[str] = []
105
+ if isinstance(self.__by, str):
106
+ input_cols = [self.__by]
107
+ elif isinstance(self.__by, list):
108
+ input_cols = self.__by
109
+ else:
110
+ raise TypeError(
111
+ f"The 'by' parameter in groupby function must be a string or a list of strings."
112
+ f"but got: {self.__by} (type: {type(self.__by).__name__})"
113
+ )
114
+
115
+ if len(input_cols) == 0:
116
+ raise ValueError("The 'by' parameter in groupby function must contain at least one column name.")
117
+
118
+ available_columns = {c.get_name() for c in self.__base_frame.columns()}
119
+ missing_cols = [col for col in input_cols if col not in available_columns]
120
+
121
+ if len(missing_cols) > 0:
122
+ raise KeyError(
123
+ f"Column(s) {missing_cols} in groupby function's provided columns list "
124
+ f"do not exist in the current frame. "
125
+ f"Current frame columns: {sorted(available_columns)}"
126
+ )
127
+
128
+ self.__grouping_column_name_list = input_cols.copy()
129
+
130
+ def __getitem__(self, item: PyLegendUnion[str, PyLegendList[str]]) -> "PandasApiGroupbyTdsFrame":
131
+ columns_to_select: PyLegendList[str] = []
132
+
133
+ if isinstance(item, str):
134
+ columns_to_select = [item]
135
+ elif isinstance(item, list):
136
+ columns_to_select = item
137
+ else:
138
+ raise TypeError(
139
+ f"Column selection after groupby function must be a string or a list of strings, "
140
+ f"but got: {item} (type: {type(item).__name__})"
141
+ )
142
+
143
+ if len(columns_to_select) == 0:
144
+ raise ValueError("When performing column selection after groupby, at least one column must be selected.")
145
+
146
+ available_columns = {c.get_name() for c in self.__base_frame.columns()}
147
+ missing_cols = [col for col in columns_to_select if col not in available_columns]
148
+
149
+ if len(missing_cols) > 0:
150
+ raise KeyError(
151
+ f"Column(s) {missing_cols} selected after groupby do not exist in the current frame. "
152
+ f"Current frame columns: {sorted(available_columns)}"
153
+ )
154
+
155
+ new_frame = PandasApiGroupbyTdsFrame(
156
+ base_frame=self.__base_frame,
157
+ by=self.__by,
158
+ level=self.__level,
159
+ as_index=self.__as_index,
160
+ sort=self.__sort,
161
+ group_keys=self.__group_keys,
162
+ observed=self.__observed,
163
+ dropna=self.__dropna,
164
+ )
165
+
166
+ new_frame.__selected_columns = columns_to_select.copy()
167
+ return new_frame
168
+
169
+ def base_frame(self) -> PandasApiBaseTdsFrame:
170
+ return self.__base_frame
171
+
172
+ def grouping_column_name_list(self) -> PyLegendList[str]:
173
+ return self.__grouping_column_name_list.copy()
174
+
175
+ def selected_columns(self) -> PyLegendOptional[PyLegendList[str]]:
176
+ if self.__selected_columns is None:
177
+ return None
178
+ return self.__selected_columns.copy()
179
+
180
+ def aggregate(
181
+ self,
182
+ func: PyLegendAggInput,
183
+ axis: PyLegendUnion[int, str] = 0,
184
+ *args: PyLegendPrimitiveOrPythonPrimitive,
185
+ **kwargs: PyLegendPrimitiveOrPythonPrimitive,
186
+ ) -> "PandasApiTdsFrame":
187
+ from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import PandasApiAppliedFunctionTdsFrame
188
+ from pylegend.core.tds.pandas_api.frames.functions.aggregate_function import AggregateFunction
189
+
190
+ aggregated_result: PandasApiAppliedFunctionTdsFrame = PandasApiAppliedFunctionTdsFrame(
191
+ AggregateFunction(self, func, axis, *args, **kwargs)
192
+ )
193
+
194
+ if self.__sort:
195
+ from pylegend.core.tds.pandas_api.frames.functions.sort_values_function import SortValuesFunction
196
+
197
+ aggregated_result = PandasApiAppliedFunctionTdsFrame(
198
+ SortValuesFunction(
199
+ base_frame=aggregated_result,
200
+ by=self.grouping_column_name_list(),
201
+ axis=0,
202
+ ascending=True,
203
+ inplace=False,
204
+ kind=None,
205
+ na_position="last",
206
+ ignore_index=True,
207
+ key=None,
208
+ )
209
+ )
210
+
211
+ return aggregated_result
212
+
213
+ def agg(
214
+ self,
215
+ func: PyLegendAggInput,
216
+ axis: PyLegendUnion[int, str] = 0,
217
+ *args: PyLegendPrimitiveOrPythonPrimitive,
218
+ **kwargs: PyLegendPrimitiveOrPythonPrimitive,
219
+ ) -> "PandasApiTdsFrame":
220
+ from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import PandasApiAppliedFunctionTdsFrame
221
+ from pylegend.core.tds.pandas_api.frames.functions.aggregate_function import AggregateFunction
222
+
223
+ return PandasApiAppliedFunctionTdsFrame(AggregateFunction(self, func, axis, *args, **kwargs))
224
+
225
+ def sum(
226
+ self,
227
+ numeric_only: bool = False,
228
+ min_count: int = 0,
229
+ engine: PyLegendOptional[str] = None,
230
+ engine_kwargs: PyLegendOptional[PyLegendDict[str, bool]] = None,
231
+ ) -> "PandasApiTdsFrame":
232
+ if numeric_only is not False:
233
+ raise NotImplementedError("numeric_only=True is not currently supported in sum function.")
234
+ if min_count != 0:
235
+ raise NotImplementedError(f"min_count must be 0 in sum function, but got: {min_count}")
236
+ if engine is not None:
237
+ raise NotImplementedError("engine parameter is not supported in sum function.")
238
+ if engine_kwargs is not None:
239
+ raise NotImplementedError("engine_kwargs parameter is not supported in sum function.")
240
+ return self.aggregate("sum", 0)
241
+
242
+ def mean(
243
+ self,
244
+ numeric_only: bool = False,
245
+ engine: PyLegendOptional[str] = None,
246
+ engine_kwargs: PyLegendOptional[PyLegendDict[str, bool]] = None,
247
+ ) -> "PandasApiTdsFrame":
248
+ if numeric_only is not False:
249
+ raise NotImplementedError("numeric_only=True is not currently supported in mean function.")
250
+ if engine is not None:
251
+ raise NotImplementedError("engine parameter is not supported in mean function.")
252
+ if engine_kwargs is not None:
253
+ raise NotImplementedError("engine_kwargs parameter is not supported in mean function.")
254
+ return self.aggregate("mean", 0)
255
+
256
+ def min(
257
+ self,
258
+ numeric_only: bool = False,
259
+ min_count: int = -1,
260
+ engine: PyLegendOptional[str] = None,
261
+ engine_kwargs: PyLegendOptional[PyLegendDict[str, bool]] = None,
262
+ ) -> "PandasApiTdsFrame":
263
+ if numeric_only is not False:
264
+ raise NotImplementedError("numeric_only=True is not currently supported in min function.")
265
+ if min_count != -1:
266
+ raise NotImplementedError(f"min_count must be -1 (default) in min function, but got: {min_count}")
267
+ if engine is not None:
268
+ raise NotImplementedError("engine parameter is not supported in min function.")
269
+ if engine_kwargs is not None:
270
+ raise NotImplementedError("engine_kwargs parameter is not supported in min function.")
271
+ return self.aggregate("min", 0)
272
+
273
+ def max(
274
+ self,
275
+ numeric_only: bool = False,
276
+ min_count: int = -1,
277
+ engine: PyLegendOptional[str] = None,
278
+ engine_kwargs: PyLegendOptional[PyLegendDict[str, bool]] = None,
279
+ ) -> "PandasApiTdsFrame":
280
+ if numeric_only is not False:
281
+ raise NotImplementedError("numeric_only=True is not currently supported in max function.")
282
+ if min_count != -1:
283
+ raise NotImplementedError(f"min_count must be -1 (default) in max function, but got: {min_count}")
284
+ if engine is not None:
285
+ raise NotImplementedError("engine parameter is not supported in max function.")
286
+ if engine_kwargs is not None:
287
+ raise NotImplementedError("engine_kwargs parameter is not supported in max function.")
288
+ return self.aggregate("max", 0)
289
+
290
+ def std(
291
+ self,
292
+ ddof: int = 1,
293
+ engine: PyLegendOptional[str] = None,
294
+ engine_kwargs: PyLegendOptional[PyLegendDict[str, bool]] = None,
295
+ numeric_only: bool = False,
296
+ ) -> "PandasApiTdsFrame":
297
+ if ddof != 1:
298
+ raise NotImplementedError(f"Only ddof=1 (Sample Standard Deviation) is supported in std function, but got: {ddof}")
299
+ if engine is not None:
300
+ raise NotImplementedError("engine parameter is not supported in std function.")
301
+ if engine_kwargs is not None:
302
+ raise NotImplementedError("engine_kwargs parameter is not supported in std function.")
303
+ if numeric_only is not False:
304
+ raise NotImplementedError("numeric_only=True is not currently supported in std function.")
305
+ return self.aggregate("std", 0)
306
+
307
+ def var(
308
+ self,
309
+ ddof: int = 1,
310
+ engine: PyLegendOptional[str] = None,
311
+ engine_kwargs: PyLegendOptional[PyLegendDict[str, bool]] = None,
312
+ numeric_only: bool = False,
313
+ ) -> "PandasApiTdsFrame":
314
+ if ddof != 1:
315
+ raise NotImplementedError(f"Only ddof=1 (Sample Variance) is supported in var function, but got: {ddof}")
316
+ if engine is not None:
317
+ raise NotImplementedError("engine parameter is not supported in var function.")
318
+ if engine_kwargs is not None:
319
+ raise NotImplementedError("engine_kwargs parameter is not supported in var function.")
320
+ if numeric_only is not False:
321
+ raise NotImplementedError("numeric_only=True is not currently supported in var function.")
322
+ return self.aggregate("var", 0)
323
+
324
+ def count(self) -> "PandasApiTdsFrame":
325
+ return self.aggregate("count", 0)
@@ -16,6 +16,13 @@ from abc import abstractmethod
16
16
  from datetime import date, datetime
17
17
  from typing import TYPE_CHECKING
18
18
 
19
+ from typing_extensions import Concatenate
20
+
21
+ try:
22
+ from typing import ParamSpec
23
+ except Exception:
24
+ from typing_extensions import ParamSpec # type: ignore
25
+
19
26
  from pylegend._typing import (
20
27
  PyLegendCallable,
21
28
  PyLegendSequence,
@@ -23,11 +30,13 @@ from pylegend._typing import (
23
30
  PyLegendOptional,
24
31
  PyLegendList,
25
32
  PyLegendSet,
33
+ PyLegendTuple,
34
+ PyLegendDict
26
35
  )
27
- from pylegend.core.language.pandas_api.pandas_api_aggregate_specification import PyLegendAggInput
28
36
  from pylegend.core.language import (
29
37
  PyLegendPrimitive,
30
38
  )
39
+ from pylegend.core.language.pandas_api.pandas_api_aggregate_specification import PyLegendAggInput
31
40
  from pylegend.core.language.pandas_api.pandas_api_tds_row import PandasApiTdsRow
32
41
  from pylegend.core.language.shared.primitives.boolean import PyLegendBoolean
33
42
  from pylegend.core.language.shared.primitives.integer import PyLegendInteger
@@ -37,11 +46,14 @@ from pylegend.core.tds.tds_frame import PyLegendTdsFrame
37
46
 
38
47
  if TYPE_CHECKING:
39
48
  from pylegend.core.language.pandas_api.pandas_api_series import Series
49
+ from pylegend.core.tds.pandas_api.frames.pandas_api_groupby_tds_frame import PandasApiGroupbyTdsFrame
40
50
 
41
51
  __all__: PyLegendSequence[str] = [
42
52
  "PandasApiTdsFrame"
43
53
  ]
44
54
 
55
+ P = ParamSpec("P")
56
+
45
57
 
46
58
  class PandasApiTdsFrame(PyLegendTdsFrame):
47
59
 
@@ -52,6 +64,14 @@ class PandasApiTdsFrame(PyLegendTdsFrame):
52
64
  ) -> PyLegendUnion["PandasApiTdsFrame", "Series"]:
53
65
  pass # pragma: no cover
54
66
 
67
+ @abstractmethod
68
+ def __setitem__(
69
+ self,
70
+ key: str,
71
+ value: PyLegendUnion["Series", PyLegendPrimitiveOrPythonPrimitive]
72
+ ) -> None:
73
+ pass # pragma: no cover
74
+
55
75
  @abstractmethod
56
76
  def assign(
57
77
  self,
@@ -104,27 +124,213 @@ class PandasApiTdsFrame(PyLegendTdsFrame):
104
124
  index: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str], PyLegendSet[str]]] = None,
105
125
  columns: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str], PyLegendSet[str]]] = None,
106
126
  level: PyLegendOptional[PyLegendUnion[int, PyLegendInteger, str]] = None,
107
- inplace: PyLegendUnion[bool, PyLegendBoolean] = True,
127
+ inplace: PyLegendUnion[bool, PyLegendBoolean] = False,
108
128
  errors: str = "raise",
109
129
  ) -> "PandasApiTdsFrame":
110
130
  pass # pragma: no cover
111
131
 
112
132
  @abstractmethod
113
133
  def aggregate(
114
- self,
115
- func: PyLegendAggInput,
116
- axis: PyLegendUnion[int, str] = 0,
117
- *args: PyLegendPrimitiveOrPythonPrimitive,
118
- **kwargs: PyLegendPrimitiveOrPythonPrimitive
134
+ self,
135
+ func: PyLegendAggInput,
136
+ axis: PyLegendUnion[int, str] = 0,
137
+ *args: PyLegendPrimitiveOrPythonPrimitive,
138
+ **kwargs: PyLegendPrimitiveOrPythonPrimitive
119
139
  ) -> "PandasApiTdsFrame":
120
140
  pass # pragma: no cover
121
141
 
122
142
  @abstractmethod
123
143
  def agg(
124
- self,
125
- func: PyLegendAggInput,
126
- axis: PyLegendUnion[int, str] = 0,
127
- *args: PyLegendPrimitiveOrPythonPrimitive,
128
- **kwargs: PyLegendPrimitiveOrPythonPrimitive
144
+ self,
145
+ func: PyLegendAggInput,
146
+ axis: PyLegendUnion[int, str] = 0,
147
+ *args: PyLegendPrimitiveOrPythonPrimitive,
148
+ **kwargs: PyLegendPrimitiveOrPythonPrimitive
149
+ ) -> "PandasApiTdsFrame":
150
+ pass # pragma: no cover
151
+
152
+ @abstractmethod
153
+ def merge(
154
+ self,
155
+ other: "PandasApiTdsFrame",
156
+ how: PyLegendOptional[str] = "inner",
157
+ on: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
158
+ left_on: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
159
+ right_on: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
160
+ left_index: PyLegendOptional[bool] = False,
161
+ right_index: PyLegendOptional[bool] = False,
162
+ sort: PyLegendOptional[bool] = False,
163
+ suffixes: PyLegendOptional[
164
+ PyLegendUnion[
165
+ PyLegendTuple[PyLegendUnion[str, None], PyLegendUnion[str, None]],
166
+ PyLegendList[PyLegendUnion[str, None]],
167
+ ]
168
+ ] = ("_x", "_y"),
169
+ indicator: PyLegendOptional[PyLegendUnion[bool, str]] = False,
170
+ validate: PyLegendOptional[str] = None
171
+ ) -> "PandasApiTdsFrame":
172
+ pass # pragma: no cover
173
+
174
+ @abstractmethod
175
+ def join(
176
+ self,
177
+ other: "PandasApiTdsFrame",
178
+ on: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
179
+ how: PyLegendOptional[str] = "left",
180
+ lsuffix: str = "",
181
+ rsuffix: str = "",
182
+ sort: PyLegendOptional[bool] = False,
183
+ validate: PyLegendOptional[str] = None
184
+ ) -> "PandasApiTdsFrame":
185
+ pass # pragma: no cover
186
+
187
+ @abstractmethod
188
+ def rename(
189
+ self,
190
+ mapper: PyLegendOptional[PyLegendUnion[PyLegendDict[str, str], PyLegendCallable[[str], str]]] = None,
191
+ index: PyLegendOptional[PyLegendUnion[PyLegendDict[str, str], PyLegendCallable[[str], str]]] = None,
192
+ columns: PyLegendOptional[PyLegendUnion[PyLegendDict[str, str], PyLegendCallable[[str], str]]] = None,
193
+ axis: PyLegendUnion[str, int] = 1,
194
+ inplace: PyLegendUnion[bool] = False,
195
+ copy: PyLegendUnion[bool] = True,
196
+ level: PyLegendOptional[PyLegendUnion[int, str]] = None,
197
+ errors: str = "ignore",
198
+ ) -> "PandasApiTdsFrame":
199
+ pass # pragma: no cover
200
+
201
+ @abstractmethod
202
+ def groupby(
203
+ self,
204
+ by: PyLegendUnion[str, PyLegendList[str]],
205
+ level: PyLegendOptional[PyLegendUnion[str, int, PyLegendList[str]]] = None,
206
+ as_index: bool = False,
207
+ sort: bool = True,
208
+ group_keys: bool = False,
209
+ observed: bool = False,
210
+ dropna: bool = False,
211
+ ) -> "PandasApiGroupbyTdsFrame":
212
+ pass # pragma: no cover
213
+
214
+ @abstractmethod
215
+ def sum(
216
+ self,
217
+ axis: PyLegendUnion[int, str] = 0,
218
+ skipna: bool = True,
219
+ numeric_only: bool = False,
220
+ min_count: int = 0,
221
+ **kwargs: PyLegendPrimitiveOrPythonPrimitive
222
+ ) -> "PandasApiTdsFrame":
223
+ pass # pragma: no cover
224
+
225
+ @abstractmethod
226
+ def mean(
227
+ self,
228
+ axis: PyLegendUnion[int, str] = 0,
229
+ skipna: bool = True,
230
+ numeric_only: bool = False,
231
+ **kwargs: PyLegendPrimitiveOrPythonPrimitive
232
+ ) -> "PandasApiTdsFrame":
233
+ pass # pragma: no cover
234
+
235
+ @abstractmethod
236
+ def min(
237
+ self,
238
+ axis: PyLegendUnion[int, str] = 0,
239
+ skipna: bool = True,
240
+ numeric_only: bool = False,
241
+ **kwargs: PyLegendPrimitiveOrPythonPrimitive
242
+ ) -> "PandasApiTdsFrame":
243
+ pass # pragma: no cover
244
+
245
+ @abstractmethod
246
+ def max(
247
+ self,
248
+ axis: PyLegendUnion[int, str] = 0,
249
+ skipna: bool = True,
250
+ numeric_only: bool = False,
251
+ **kwargs: PyLegendPrimitiveOrPythonPrimitive
252
+ ) -> "PandasApiTdsFrame":
253
+ pass # pragma: no cover
254
+
255
+ @abstractmethod
256
+ def std(
257
+ self,
258
+ axis: PyLegendUnion[int, str] = 0,
259
+ skipna: bool = True,
260
+ ddof: int = 1,
261
+ numeric_only: bool = False,
262
+ **kwargs: PyLegendPrimitiveOrPythonPrimitive
263
+ ) -> "PandasApiTdsFrame":
264
+ pass # pragma: no cover
265
+
266
+ @abstractmethod
267
+ def var(
268
+ self,
269
+ axis: PyLegendUnion[int, str] = 0,
270
+ skipna: bool = True,
271
+ ddof: int = 1,
272
+ numeric_only: bool = False,
273
+ **kwargs: PyLegendPrimitiveOrPythonPrimitive
274
+ ) -> "PandasApiTdsFrame":
275
+ pass # pragma: no cover
276
+
277
+ @abstractmethod
278
+ def count(
279
+ self,
280
+ axis: PyLegendUnion[int, str] = 0,
281
+ numeric_only: bool = False,
282
+ **kwargs: PyLegendPrimitiveOrPythonPrimitive
283
+ ) -> "PandasApiTdsFrame":
284
+ pass # pragma: no cover
285
+
286
+ @abstractmethod
287
+ def apply(
288
+ self,
289
+ func: PyLegendUnion[
290
+ PyLegendCallable[Concatenate["Series", P], PyLegendPrimitiveOrPythonPrimitive],
291
+ str
292
+ ],
293
+ axis: PyLegendUnion[int, str] = 0,
294
+ raw: bool = False,
295
+ result_type: PyLegendOptional[str] = None,
296
+ args: PyLegendTuple[PyLegendPrimitiveOrPythonPrimitive, ...] = (),
297
+ by_row: PyLegendUnion[bool, str] = "compat",
298
+ engine: str = "python",
299
+ engine_kwargs: PyLegendOptional[PyLegendDict[str, PyLegendPrimitiveOrPythonPrimitive]] = None,
300
+ **kwargs: PyLegendPrimitiveOrPythonPrimitive
301
+ ) -> "PandasApiTdsFrame":
302
+ pass # pragma: no cover
303
+
304
+ @abstractmethod
305
+ def head(self, n: int = 5) -> "PandasApiTdsFrame":
306
+ pass # pragma: no cover
307
+
308
+ @property
309
+ @abstractmethod
310
+ def shape(self) -> PyLegendTuple[int, int]:
311
+ pass # pragma: no cover
312
+
313
+ @abstractmethod
314
+ def dropna(
315
+ self,
316
+ axis: PyLegendUnion[int, str] = 0,
317
+ how: str = "any",
318
+ thresh: PyLegendOptional[int] = None,
319
+ subset: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
320
+ inplace: bool = False,
321
+ ignore_index: bool = False
322
+ ) -> "PandasApiTdsFrame":
323
+ pass # pragma: no cover
324
+
325
+ @abstractmethod
326
+ def fillna(
327
+ self,
328
+ value: PyLegendUnion[
329
+ int, float, str, bool, date, datetime,
330
+ PyLegendDict[str, PyLegendUnion[int, float, str, bool, date, datetime]]
331
+ ] = None, # type: ignore
332
+ axis: PyLegendOptional[PyLegendUnion[int, str]] = 0,
333
+ inplace: bool = False,
334
+ limit: PyLegendOptional[int] = None
129
335
  ) -> "PandasApiTdsFrame":
130
336
  pass # pragma: no cover
@@ -0,0 +1,95 @@
1
+ # Copyright 2025 Goldman Sachs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ from abc import ABCMeta
15
+ from pylegend._typing import (
16
+ PyLegendSequence,
17
+ PyLegendList,
18
+ )
19
+ from io import StringIO
20
+ from pylegend.core.tds.tds_column import (
21
+ PrimitiveType,
22
+ PrimitiveTdsColumn)
23
+ from pylegend.core.tds.tds_frame import FrameToPureConfig, FrameToSqlConfig, PyLegendTdsFrame
24
+ from pylegend.core.sql.metamodel import (
25
+ QuerySpecification,
26
+ )
27
+ import pandas as pd
28
+
29
+ __all__: PyLegendSequence[str] = [
30
+ "CsvInputFrameAbstract",
31
+ "tds_columns_from_csv_string"
32
+ ]
33
+
34
+
35
+ class CsvInputFrameAbstract(PyLegendTdsFrame, metaclass=ABCMeta):
36
+ __csv_string: str
37
+
38
+ def __init__(
39
+ self,
40
+ csv_string: str,
41
+ ) -> None:
42
+ super().__init__(columns=tds_columns_from_csv_string(csv_string)) # type: ignore[call-arg]
43
+ self.__csv_string = csv_string
44
+
45
+ def to_sql_query_object(self, config: FrameToSqlConfig) -> QuerySpecification:
46
+ raise RuntimeError("SQL generation for csv tds frames is not supported yet.")
47
+
48
+ def to_pure(self, config: FrameToPureConfig) -> str:
49
+ return f"#TDS\n{self.__csv_string}#"
50
+
51
+
52
+ def tds_columns_from_csv_string(
53
+ csv_string: str
54
+ ) -> PyLegendList[PrimitiveTdsColumn]:
55
+ df = pd.read_csv(StringIO(csv_string))
56
+ tds_columns = []
57
+ dt = pd.api.types
58
+
59
+ for col in df.columns:
60
+ dtype = df[col].dtype
61
+
62
+ if dt.is_bool_dtype(dtype):
63
+ primitive_type = PrimitiveType.Boolean
64
+
65
+ elif dt.is_integer_dtype(dtype):
66
+ primitive_type = PrimitiveType.Integer
67
+
68
+ elif dt.is_float_dtype(dtype):
69
+ primitive_type = PrimitiveType.Float
70
+
71
+ elif is_strict_date_or_datetime(df[col]):
72
+ primitive_type = PrimitiveType.Date
73
+
74
+ else:
75
+ primitive_type = PrimitiveType.String
76
+
77
+ tds_columns.append(
78
+ PrimitiveTdsColumn(name=col, _type=primitive_type)
79
+ )
80
+
81
+ return tds_columns
82
+
83
+
84
+ def is_strict_date_or_datetime(col: pd.Series) -> bool: # type: ignore[explicit-any]
85
+ try:
86
+ pd.to_datetime(col, format="%Y-%m-%d %H:%M:%S", exact=True, errors="raise")
87
+ return True
88
+ except (ValueError, TypeError):
89
+ pass
90
+
91
+ try:
92
+ pd.to_datetime(col, format="%Y-%m-%d", exact=True, errors="raise")
93
+ return True
94
+ except (ValueError, TypeError):
95
+ return False