pylegend 0.9.0__py3-none-any.whl → 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,214 @@
1
+ # Copyright 2025 Goldman Sachs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from pylegend._typing import (
16
+ PyLegendList,
17
+ PyLegendSequence,
18
+ PyLegendUnion,
19
+ PyLegendOptional,
20
+ PyLegendCallable,
21
+ PyLegendDict
22
+ )
23
+ from pylegend.core.language import (
24
+ PyLegendInteger,
25
+ PyLegendBoolean
26
+ )
27
+ from pylegend.core.sql.metamodel import (
28
+ QuerySpecification,
29
+ SelectItem,
30
+ SingleColumn
31
+ )
32
+ from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import PandasApiAppliedFunction
33
+ from pylegend.core.tds.pandas_api.frames.pandas_api_base_tds_frame import PandasApiBaseTdsFrame
34
+ from pylegend.core.tds.sql_query_helpers import copy_query
35
+ from pylegend.core.tds.tds_column import TdsColumn
36
+ from pylegend.core.tds.tds_frame import FrameToPureConfig, FrameToSqlConfig
37
+
38
+
39
+ class PandasApiRenameFunction(PandasApiAppliedFunction):
40
+ __base_frame: PandasApiBaseTdsFrame
41
+ __mapper: PyLegendOptional[PyLegendUnion[PyLegendDict[str, str], PyLegendCallable[[str], str]]]
42
+ __axis: PyLegendUnion[str, int, PyLegendInteger]
43
+ __index: PyLegendOptional[PyLegendUnion[PyLegendDict[str, str], PyLegendCallable[[str], str]]]
44
+ __columns: PyLegendOptional[PyLegendUnion[PyLegendDict[str, str], PyLegendCallable[[str], str]]]
45
+ __level: PyLegendOptional[PyLegendUnion[int, PyLegendInteger, str]]
46
+ __inplace: PyLegendUnion[bool, PyLegendBoolean]
47
+ __copy: PyLegendUnion[bool, PyLegendBoolean]
48
+ __errors: str
49
+
50
+ @classmethod
51
+ def name(cls) -> str:
52
+ return "rename" # pragma: no cover
53
+
54
+ def __init__(
55
+ self,
56
+ base_frame: PandasApiBaseTdsFrame,
57
+ mapper: PyLegendOptional[PyLegendUnion[PyLegendDict[str, str], PyLegendCallable[[str], str]]],
58
+ axis: PyLegendUnion[str, int, PyLegendInteger],
59
+ index: PyLegendOptional[PyLegendUnion[PyLegendDict[str, str], PyLegendCallable[[str], str]]],
60
+ columns: PyLegendOptional[PyLegendUnion[PyLegendDict[str, str], PyLegendCallable[[str], str]]],
61
+ level: PyLegendOptional[PyLegendUnion[int, PyLegendInteger, str]],
62
+ inplace: PyLegendUnion[bool, PyLegendBoolean],
63
+ errors: str,
64
+ copy: PyLegendUnion[bool, PyLegendBoolean]
65
+ ) -> None:
66
+ self.__base_frame = base_frame
67
+ self.__mapper = mapper
68
+ self.__axis = axis
69
+ self.__index = index
70
+ self.__columns = columns
71
+ self.__level = level
72
+ self.__inplace = inplace
73
+ self.__errors = errors
74
+ self.__copy = copy
75
+
76
+ def __resolve_columns_mapping(self) -> PyLegendDict[str, str]:
77
+ base_cols = [c.get_name() for c in self.__base_frame.columns()]
78
+ mapping_source = None
79
+
80
+ axis_is_columns = (self.__axis == 1 or self.__axis == "columns")
81
+
82
+ # Priority: explicit columns=, else mapper when axis targets columns
83
+ if self.__columns is not None:
84
+ mapping_source = self.__columns
85
+ elif self.__mapper is not None and axis_is_columns:
86
+ mapping_source = self.__mapper
87
+
88
+ if mapping_source is None:
89
+ return {}
90
+
91
+ if not callable(mapping_source) and not isinstance(mapping_source, dict):
92
+ raise TypeError(
93
+ f"Rename mapping must be a dict or a callable, got {type(mapping_source)}"
94
+ )
95
+
96
+ out: PyLegendDict[str, str] = {}
97
+ if callable(mapping_source):
98
+ func = mapping_source
99
+ for col in base_cols:
100
+ new = func(col)
101
+ if not isinstance(new, str):
102
+ raise TypeError(
103
+ f"Rename function must return str, got {type(new)} for column {col}") # pragma: no cover
104
+ if new != col:
105
+ out[col] = new
106
+ else:
107
+ # dict-like
108
+ dict_map: PyLegendDict[str, str] = mapping_source
109
+ if self.__errors == "raise":
110
+ missing = [k for k in dict_map.keys() if k not in base_cols]
111
+ if missing:
112
+ raise KeyError(f"{missing} not found in axis")
113
+
114
+ for k, v in dict_map.items():
115
+ if k in base_cols and k != v:
116
+ out[k] = v
117
+
118
+ return out
119
+
120
+ def to_sql(self, config: FrameToSqlConfig) -> QuerySpecification:
121
+ rename_map = self.__resolve_columns_mapping()
122
+ base_query = self.__base_frame.to_sql_query_object(config)
123
+ db_extension = config.sql_to_string_generator().get_db_extension()
124
+
125
+ # Prepare quoted lookup for aliases
126
+ quoted_from = [db_extension.quote_identifier(s) for s in rename_map.keys()]
127
+ quoted_to = [db_extension.quote_identifier(rename_map[s]) for s in rename_map.keys()]
128
+
129
+ new_select_items: PyLegendList[SelectItem] = []
130
+ for col in base_query.select.selectItems:
131
+ if not isinstance(col, SingleColumn):
132
+ raise ValueError("Rename operation not supported for non-SingleColumn select items") # pragma: no cover
133
+ if col.alias is None:
134
+ raise ValueError("Rename operation requires SingleColumn items with aliases") # pragma: no cover
135
+ if col.alias in quoted_from:
136
+ new_alias = quoted_to[quoted_from.index(col.alias)]
137
+ new_select_items.append(SingleColumn(alias=new_alias, expression=col.expression))
138
+ else:
139
+ new_select_items.append(col)
140
+
141
+ new_query = copy_query(base_query)
142
+ new_query.select.selectItems = new_select_items
143
+ return new_query
144
+
145
+ def to_pure(self, config: FrameToPureConfig) -> str:
146
+ rename_map = self.__resolve_columns_mapping()
147
+ base_pure = self.__base_frame.to_pure(config)
148
+
149
+ # Build a single project that aliases columns to new names
150
+ project_items: PyLegendList[str] = []
151
+ for c in self.__base_frame.columns():
152
+ orig = c.get_name()
153
+ new = rename_map.get(orig, orig)
154
+ project_items.append(f"{new}:x|$x.{orig}")
155
+
156
+ project_body = ", ".join(project_items)
157
+ return (
158
+ f"{base_pure}{config.separator(1)}"
159
+ f"->project({config.separator(2)}~[{project_body}]{config.separator(1)})"
160
+ )
161
+
162
+ def base_frame(self) -> PandasApiBaseTdsFrame:
163
+ return self.__base_frame
164
+
165
+ def tds_frame_parameters(self) -> PyLegendList["PandasApiBaseTdsFrame"]:
166
+ return []
167
+
168
+ def calculate_columns(self) -> PyLegendSequence["TdsColumn"]:
169
+ rename_map = self.__resolve_columns_mapping()
170
+ new_cols = []
171
+ for c in self.__base_frame.columns():
172
+ name = c.get_name()
173
+ if name in rename_map:
174
+ new_cols.append(c.copy_with_changed_name(rename_map[name]))
175
+ else:
176
+ new_cols.append(c.copy())
177
+ names = [c.get_name() for c in new_cols]
178
+ if len(names) != len(set(names)):
179
+ raise ValueError("Resulting columns contain duplicates after rename")
180
+ return new_cols
181
+
182
+ def validate(self) -> bool:
183
+ if self.__level is not None:
184
+ raise NotImplementedError("level parameter not supported yet in Pandas API")
185
+
186
+ if not isinstance(self.__inplace, bool):
187
+ raise TypeError(f"inplace must be bool. Got {type(self.__inplace)}")
188
+ if self.__inplace is True:
189
+ raise NotImplementedError("inplace=True not supported yet in Pandas API")
190
+
191
+ if not isinstance(self.__copy, bool):
192
+ raise TypeError(f"copy must be bool. Got {type(self.__copy)}")
193
+ if self.__copy is False:
194
+ raise NotImplementedError("copy=False not supported yet in Pandas API")
195
+
196
+ if self.__errors not in ("ignore", "raise"):
197
+ raise ValueError(f"errors must be 'ignore' or 'raise'. Got {self.__errors}")
198
+
199
+ # axis validation
200
+ if self.__axis not in (1, "columns", 0, "index"):
201
+ raise ValueError(f"Unsupported axis {self.__axis}")
202
+ if self.__axis in (0, "index"):
203
+ raise NotImplementedError("Renaming index not supported yet in Pandas API")
204
+
205
+ # index
206
+ if self.__index is not None:
207
+ raise NotImplementedError("Index mapper not supported yet in Pandas API")
208
+
209
+ # conflict validation
210
+ if self.__mapper and self.__columns:
211
+ raise ValueError("Cannot specify both 'axis' and any of 'index' or 'columns'")
212
+
213
+ self.__resolve_columns_mapping() # runs validation
214
+ return True
@@ -22,10 +22,12 @@ from pylegend._typing import (
22
22
  PyLegendSequence,
23
23
  PyLegendTypeVar,
24
24
  PyLegendList,
25
+ PyLegendTuple,
25
26
  PyLegendSet,
26
27
  PyLegendOptional,
27
28
  PyLegendCallable,
28
29
  PyLegendUnion,
30
+ PyLegendDict
29
31
  )
30
32
  from pylegend.core.database.sql_to_string import (
31
33
  SqlToStringConfig,
@@ -53,6 +55,7 @@ from pylegend.extensions.tds.result_handler import (
53
55
 
54
56
  if TYPE_CHECKING:
55
57
  from pylegend.core.language.pandas_api.pandas_api_series import Series
58
+ from pylegend.core.tds.pandas_api.frames.pandas_api_groupby_tds_frame import PandasApiGroupbyTdsFrame
56
59
 
57
60
  __all__: PyLegendSequence[str] = [
58
61
  "PandasApiBaseTdsFrame"
@@ -272,6 +275,278 @@ class PandasApiBaseTdsFrame(PandasApiTdsFrame, BaseTdsFrame, metaclass=ABCMeta):
272
275
  **kwargs
273
276
  ))
274
277
 
278
+ def sum(
279
+ self,
280
+ axis: PyLegendUnion[int, str] = 0,
281
+ skipna: bool = True,
282
+ numeric_only: bool = False,
283
+ min_count: int = 0,
284
+ **kwargs: PyLegendPrimitiveOrPythonPrimitive
285
+ ) -> "PandasApiTdsFrame":
286
+ if axis not in [0, "index"]:
287
+ raise NotImplementedError(f"The 'axis' parameter must be 0 or 'index' in sum function, but got: {axis}")
288
+ if skipna is not True:
289
+ raise NotImplementedError("skipna=False is not currently supported in sum function. "
290
+ "SQL aggregation ignores nulls by default.")
291
+ if numeric_only is not False:
292
+ raise NotImplementedError("numeric_only=True is not currently supported in sum function.")
293
+ if min_count != 0:
294
+ raise NotImplementedError(f"min_count must be 0 in sum function, but got: {min_count}")
295
+ if len(kwargs) > 0:
296
+ raise NotImplementedError(f"Additional keyword arguments not supported in sum function: {list(kwargs.keys())}")
297
+ return self.aggregate("sum", 0)
298
+
299
+ def mean(
300
+ self,
301
+ axis: PyLegendUnion[int, str] = 0,
302
+ skipna: bool = True,
303
+ numeric_only: bool = False,
304
+ **kwargs: PyLegendPrimitiveOrPythonPrimitive
305
+ ) -> "PandasApiTdsFrame":
306
+ if axis not in [0, "index"]:
307
+ raise NotImplementedError(f"The 'axis' parameter must be 0 or 'index' in mean function, but got: {axis}")
308
+ if skipna is not True:
309
+ raise NotImplementedError("skipna=False is not currently supported in mean function.")
310
+ if numeric_only is not False:
311
+ raise NotImplementedError("numeric_only=True is not currently supported in mean function.")
312
+ if len(kwargs) > 0:
313
+ raise NotImplementedError(f"Additional keyword arguments not supported in mean function: {list(kwargs.keys())}")
314
+ return self.aggregate("mean", 0)
315
+
316
+ def min(
317
+ self,
318
+ axis: PyLegendUnion[int, str] = 0,
319
+ skipna: bool = True,
320
+ numeric_only: bool = False,
321
+ **kwargs: PyLegendPrimitiveOrPythonPrimitive
322
+ ) -> "PandasApiTdsFrame":
323
+ if axis not in [0, "index"]:
324
+ raise NotImplementedError(f"The 'axis' parameter must be 0 or 'index' in min function, but got: {axis}")
325
+ if skipna is not True:
326
+ raise NotImplementedError("skipna=False is not currently supported in min function.")
327
+ if numeric_only is not False:
328
+ raise NotImplementedError("numeric_only=True is not currently supported in min function.")
329
+ if len(kwargs) > 0:
330
+ raise NotImplementedError(f"Additional keyword arguments not supported in min function: {list(kwargs.keys())}")
331
+ return self.aggregate("min", 0)
332
+
333
+ def max(
334
+ self,
335
+ axis: PyLegendUnion[int, str] = 0,
336
+ skipna: bool = True,
337
+ numeric_only: bool = False,
338
+ **kwargs: PyLegendPrimitiveOrPythonPrimitive
339
+ ) -> "PandasApiTdsFrame":
340
+ if axis not in [0, "index"]:
341
+ raise NotImplementedError(f"The 'axis' parameter must be 0 or 'index' in max function, but got: {axis}")
342
+ if skipna is not True:
343
+ raise NotImplementedError("skipna=False is not currently supported in max function.")
344
+ if numeric_only is not False:
345
+ raise NotImplementedError("numeric_only=True is not currently supported in max function.")
346
+ if len(kwargs) > 0:
347
+ raise NotImplementedError(f"Additional keyword arguments not supported in max function: {list(kwargs.keys())}")
348
+ return self.aggregate("max", 0)
349
+
350
+ def std(
351
+ self,
352
+ axis: PyLegendUnion[int, str] = 0,
353
+ skipna: bool = True,
354
+ ddof: int = 1,
355
+ numeric_only: bool = False,
356
+ **kwargs: PyLegendPrimitiveOrPythonPrimitive
357
+ ) -> "PandasApiTdsFrame":
358
+ if axis not in [0, "index"]:
359
+ raise NotImplementedError(f"The 'axis' parameter must be 0 or 'index' in std function, but got: {axis}")
360
+ if skipna is not True:
361
+ raise NotImplementedError("skipna=False is not currently supported in std function.")
362
+ if ddof != 1:
363
+ raise NotImplementedError(f"Only ddof=1 (Sample Standard Deviation) is supported in std function, but got: {ddof}")
364
+ if numeric_only is not False:
365
+ raise NotImplementedError("numeric_only=True is not currently supported in std function.")
366
+ if len(kwargs) > 0:
367
+ raise NotImplementedError(f"Additional keyword arguments not supported in std function: {list(kwargs.keys())}")
368
+ return self.aggregate("std", 0)
369
+
370
+ def var(
371
+ self,
372
+ axis: PyLegendUnion[int, str] = 0,
373
+ skipna: bool = True,
374
+ ddof: int = 1,
375
+ numeric_only: bool = False,
376
+ **kwargs: PyLegendPrimitiveOrPythonPrimitive
377
+ ) -> "PandasApiTdsFrame":
378
+ if axis not in [0, "index"]:
379
+ raise NotImplementedError(f"The 'axis' parameter must be 0 or 'index' in var function, but got: {axis}")
380
+ if skipna is not True:
381
+ raise NotImplementedError("skipna=False is not currently supported in var function.")
382
+ if ddof != 1:
383
+ raise NotImplementedError(f"Only ddof=1 (Sample Variance) is supported in var function, but got: {ddof}")
384
+ if numeric_only is not False:
385
+ raise NotImplementedError("numeric_only=True is not currently supported in var function.")
386
+ if len(kwargs) > 0:
387
+ raise NotImplementedError(f"Additional keyword arguments not supported in var function: {list(kwargs.keys())}")
388
+ return self.aggregate("var", 0)
389
+
390
+ def count(
391
+ self,
392
+ axis: PyLegendUnion[int, str] = 0,
393
+ numeric_only: bool = False,
394
+ **kwargs: PyLegendPrimitiveOrPythonPrimitive
395
+ ) -> "PandasApiTdsFrame":
396
+ if axis not in [0, "index"]:
397
+ raise NotImplementedError(f"The 'axis' parameter must be 0 or 'index' in count function, but got: {axis}")
398
+ if numeric_only is not False:
399
+ raise NotImplementedError("numeric_only=True is not currently supported in count function.")
400
+ if len(kwargs) > 0:
401
+ raise NotImplementedError(f"Additional keyword arguments not supported in count function: {list(kwargs.keys())}")
402
+ return self.aggregate("count", 0)
403
+
404
+ def groupby(
405
+ self,
406
+ by: PyLegendUnion[str, PyLegendList[str]],
407
+ level: PyLegendOptional[PyLegendUnion[str, int, PyLegendList[str]]] = None,
408
+ as_index: bool = False,
409
+ sort: bool = True,
410
+ group_keys: bool = False,
411
+ observed: bool = False,
412
+ dropna: bool = False,
413
+ ) -> "PandasApiGroupbyTdsFrame":
414
+ from pylegend.core.tds.pandas_api.frames.pandas_api_groupby_tds_frame import (
415
+ PandasApiGroupbyTdsFrame
416
+ )
417
+ return PandasApiGroupbyTdsFrame(
418
+ base_frame=self,
419
+ by=by,
420
+ level=level,
421
+ as_index=as_index,
422
+ sort=sort,
423
+ group_keys=group_keys,
424
+ observed=observed,
425
+ dropna=dropna
426
+ )
427
+
428
+ def merge(
429
+ self,
430
+ other: "PandasApiTdsFrame",
431
+ how: PyLegendOptional[str] = "inner",
432
+ on: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
433
+ left_on: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
434
+ right_on: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
435
+ left_index: PyLegendOptional[bool] = False,
436
+ right_index: PyLegendOptional[bool] = False,
437
+ sort: PyLegendOptional[bool] = False,
438
+ suffixes: PyLegendOptional[
439
+ PyLegendUnion[
440
+ PyLegendTuple[PyLegendUnion[str, None], PyLegendUnion[str, None]],
441
+ PyLegendList[PyLegendUnion[str, None]],
442
+ ]
443
+ ] = ("_x", "_y"),
444
+ indicator: PyLegendOptional[PyLegendUnion[bool, str]] = False,
445
+ validate: PyLegendOptional[str] = None
446
+ ) -> "PandasApiTdsFrame":
447
+ """
448
+ Pandas-like merge:
449
+ - Mutually exclusive: `on` vs (`left_on`, `right_on`)
450
+ - If no keys provided, infer intersection of column names
451
+ - `how`: inner | left | right | outer (outer mapped to full)
452
+ - `suffixes`: applied to overlapping non-key columns
453
+ """
454
+ from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import (
455
+ PandasApiAppliedFunctionTdsFrame
456
+ )
457
+ from pylegend.core.tds.pandas_api.frames.functions.merge import (
458
+ PandasApiMergeFunction
459
+ )
460
+ merge_fn = PandasApiMergeFunction(
461
+ self,
462
+ other, # type: ignore
463
+ how=how,
464
+ on=on,
465
+ left_on=left_on,
466
+ right_on=right_on,
467
+ left_index=left_index,
468
+ right_index=right_index,
469
+ sort=sort,
470
+ suffixes=suffixes,
471
+ indicator=indicator,
472
+ validate=validate
473
+ )
474
+ merged = PandasApiAppliedFunctionTdsFrame(merge_fn)
475
+
476
+ if sort:
477
+ return merged.sort_values(
478
+ by=merge_fn.get_sort_keys(),
479
+ axis=0,
480
+ ascending=True,
481
+ inplace=False,
482
+ kind=None,
483
+ na_position="last",
484
+ ignore_index=True,
485
+ key=None
486
+ )
487
+ else:
488
+ return merged
489
+
490
+ def join(
491
+ self,
492
+ other: "PandasApiTdsFrame",
493
+ on: PyLegendOptional[PyLegendUnion[str, PyLegendSequence[str]]] = None,
494
+ how: PyLegendOptional[str] = "left",
495
+ lsuffix: str = "",
496
+ rsuffix: str = "",
497
+ sort: PyLegendOptional[bool] = False,
498
+ validate: PyLegendOptional[str] = None
499
+ ) -> "PandasApiTdsFrame":
500
+ """
501
+ Pandas-like join delegating to merge. No index support, only column-on-column via `on`.
502
+ """
503
+ return self.merge(
504
+ other=other,
505
+ how=how,
506
+ on=on,
507
+ sort=sort,
508
+ suffixes=[lsuffix, rsuffix],
509
+ validate=validate
510
+ )
511
+
512
+ def rename(
513
+ self,
514
+ mapper: PyLegendOptional[PyLegendUnion[PyLegendDict[str, str], PyLegendCallable[[str], str]]] = None,
515
+ index: PyLegendOptional[PyLegendUnion[PyLegendDict[str, str], PyLegendCallable[[str], str]]] = None,
516
+ columns: PyLegendOptional[PyLegendUnion[PyLegendDict[str, str], PyLegendCallable[[str], str]]] = None,
517
+ axis: PyLegendUnion[str, int] = 1,
518
+ inplace: PyLegendUnion[bool] = False,
519
+ copy: PyLegendUnion[bool] = True,
520
+ level: PyLegendOptional[PyLegendUnion[int, str]] = None,
521
+ errors: str = "ignore",
522
+ ) -> "PandasApiTdsFrame":
523
+ """
524
+ Pandas-like rename:
525
+ - Supports mapping via `mapper` or explicit `index`/`columns`
526
+ - Only column renames are applied when `axis` is 1
527
+ - `errors`: ignore | raise
528
+ """
529
+
530
+ from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import (
531
+ PandasApiAppliedFunctionTdsFrame
532
+ )
533
+ from pylegend.core.tds.pandas_api.frames.functions.rename import (
534
+ PandasApiRenameFunction
535
+ )
536
+ return PandasApiAppliedFunctionTdsFrame(
537
+ PandasApiRenameFunction(
538
+ base_frame=self,
539
+ mapper=mapper,
540
+ axis=axis,
541
+ index=index,
542
+ columns=columns,
543
+ copy=copy,
544
+ inplace=inplace,
545
+ level=level,
546
+ errors=errors
547
+ )
548
+ )
549
+
275
550
  @abstractmethod
276
551
  def to_sql_query_object(self, config: FrameToSqlConfig) -> QuerySpecification:
277
552
  pass # pragma: no cover