sdk-seshat-python 0.4.3__tar.gz → 0.4.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/PKG-INFO +1 -1
  2. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/pyproject.toml +1 -1
  3. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/data_class/base.py +1 -1
  4. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/general/config.py +1 -0
  5. sdk_seshat_python-0.4.4/seshat/transformer/aggregator/base.py +160 -0
  6. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/deriver/__init__.py +2 -1
  7. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/deriver/base.py +405 -147
  8. sdk_seshat_python-0.4.4/seshat/transformer/imputer/base.py +57 -0
  9. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/merger/nested_key.py +32 -37
  10. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/pipeline/branch.py +7 -0
  11. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/pseudo/action_gate.py +1 -0
  12. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/trimmer/base.py +14 -0
  13. sdk_seshat_python-0.4.3/seshat/transformer/aggregator/base.py +0 -107
  14. sdk_seshat_python-0.4.3/seshat/transformer/imputer/base.py +0 -6
  15. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/LICENSE +0 -0
  16. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/README.md +0 -0
  17. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/__init__.py +0 -0
  18. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/__main__.py +0 -0
  19. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/data_class/__init__.py +0 -0
  20. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/data_class/pandas.py +0 -0
  21. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/data_class/pyspark.py +0 -0
  22. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/evaluation/__init__.py +0 -0
  23. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/evaluation/base.py +0 -0
  24. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/__init__.py +0 -0
  25. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/base.py +0 -0
  26. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/general/__init__.py +0 -0
  27. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/general/classification.py +0 -0
  28. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/general/clustering.py +0 -0
  29. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/general/regression.py +0 -0
  30. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/recommendation/__init__.py +0 -0
  31. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/recommendation/diversity.py +0 -0
  32. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/recommendation/ranking.py +0 -0
  33. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/feature_view/__init__.py +0 -0
  34. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/feature_view/base.py +0 -0
  35. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/general/__init__.py +0 -0
  36. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/general/command/__init__.py +0 -0
  37. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/general/command/base.py +0 -0
  38. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/general/command/code_inspect.py +0 -0
  39. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/general/command/job_status.py +0 -0
  40. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/general/command/setup_project.py +0 -0
  41. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/general/command/submit_to_network.py +0 -0
  42. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/general/exceptions.py +0 -0
  43. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/general/lazy_config.py +0 -0
  44. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/general/models.py +0 -0
  45. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/general/template/README.md-tmpl +0 -0
  46. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/general/template/config.py-tmpl +0 -0
  47. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/general/template/env-templ +0 -0
  48. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/general/template/jobignore-tmpl +0 -0
  49. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/general/template/pyproject._toml-tmpl +0 -0
  50. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/general/template/recommender-jupyter.ipynb-tmpl +0 -0
  51. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/general/template/recommender.py-tmpl +0 -0
  52. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/profiler/__init__.py +0 -0
  53. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/profiler/base.py +0 -0
  54. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/profiler/decorator.py +0 -0
  55. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/profiler/format.py +0 -0
  56. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/source/__init__.py +0 -0
  57. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/source/base.py +0 -0
  58. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/source/database/__init__.py +0 -0
  59. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/source/database/base.py +0 -0
  60. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/source/exceptions.py +0 -0
  61. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/source/flip_side/__init__.py +0 -0
  62. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/source/flip_side/base.py +0 -0
  63. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/source/local/__init__.py +0 -0
  64. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/source/local/base.py +0 -0
  65. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/source/mixins.py +0 -0
  66. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/source/multisource/__init__.py +0 -0
  67. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/source/multisource/base.py +0 -0
  68. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/source/saver/__init__.py +0 -0
  69. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/source/saver/base.py +0 -0
  70. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/source/saver/database.py +0 -0
  71. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/source/saver/utils/__init__.py +0 -0
  72. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/source/saver/utils/postgres.py +0 -0
  73. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/__init__.py +0 -0
  74. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/aggregator/__init__.py +0 -0
  75. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/augmenter/__init__.py +0 -0
  76. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/augmenter/base.py +0 -0
  77. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/base.py +0 -0
  78. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/deriver/from_database.py +0 -0
  79. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/imputer/__init__.py +0 -0
  80. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/merger/__init__.py +0 -0
  81. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/merger/base.py +0 -0
  82. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/pipeline/__init__.py +0 -0
  83. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/pipeline/base.py +0 -0
  84. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/pipeline/recommendation/__init__.py +0 -0
  85. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/pipeline/recommendation/address_pipeline.py +0 -0
  86. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/pseudo/__init__.py +0 -0
  87. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/pseudo/table_existence.py +0 -0
  88. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/reducer/__init__.py +0 -0
  89. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/reducer/base.py +0 -0
  90. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/scaler/__init__.py +0 -0
  91. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/scaler/base.py +0 -0
  92. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/schema/__init__.py +0 -0
  93. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/schema/base.py +0 -0
  94. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/splitter/__init__.py +0 -0
  95. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/splitter/base.py +0 -0
  96. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/splitter/block/__init__.py +0 -0
  97. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/splitter/block/base.py +0 -0
  98. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/splitter/random/__init__.py +0 -0
  99. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/splitter/random/base.py +0 -0
  100. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/splitter/time_line/__init__.py +0 -0
  101. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/splitter/time_line/base.py +0 -0
  102. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/trimmer/__init__.py +0 -0
  103. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/vectorizer/__init__.py +0 -0
  104. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/vectorizer/base.py +0 -0
  105. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/vectorizer/cosine_similarity.py +0 -0
  106. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/vectorizer/pivot.py +0 -0
  107. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/transformer/vectorizer/utils.py +0 -0
  108. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/__init__.py +0 -0
  109. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/batcher.py +0 -0
  110. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/binary_utils.py +0 -0
  111. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/clean_json.py +0 -0
  112. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/col_to_list.py +0 -0
  113. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/contracts.py +0 -0
  114. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/date_utils.py +0 -0
  115. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/file.py +0 -0
  116. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/file_cryptography.py +0 -0
  117. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/filter_json.py +0 -0
  118. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/grouper.py +0 -0
  119. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/jobignore.py +0 -0
  120. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/join_columns_to_list.py +0 -0
  121. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/join_str.py +0 -0
  122. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/llm_client/__init__.py +0 -0
  123. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/llm_client/chatbot_factory.py +0 -0
  124. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/logging/__init__.py +0 -0
  125. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/logging/base_logger.py +0 -0
  126. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/logging/console_logger.py +0 -0
  127. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/logging/logstash_logger.py +0 -0
  128. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/logging/multi_logger.py +0 -0
  129. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/memory.py +0 -0
  130. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/mixin.py +0 -0
  131. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/obfuscate.py +0 -0
  132. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/package_utils.py +0 -0
  133. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/pandas_func.py +0 -0
  134. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/patching.py +0 -0
  135. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/pyspark_func.py +0 -0
  136. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/rest.py +0 -0
  137. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/singleton.py +0 -0
  138. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/validation.py +0 -0
  139. {sdk_seshat_python-0.4.3 → sdk_seshat_python-0.4.4}/seshat/utils/zip_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sdk-seshat-python
3
- Version: 0.4.3
3
+ Version: 0.4.4
4
4
  Summary: Seshat python SDK is a library to help create ML data pipelines.
5
5
  License: Commercial - see LICENSE.txt
6
6
  Author: SeshatLabs
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "sdk-seshat-python"
3
- version = "0.4.3"
3
+ version = "0.4.4"
4
4
  description = "Seshat python SDK is a library to help create ML data pipelines."
5
5
  authors = ["SeshatLabs <info@seshatlabs.xyz>"]
6
6
  packages = [{ include = "seshat", from = "." }]
@@ -61,7 +61,7 @@ class SFrame:
61
61
  def iterrows(self, column_name: str, key: str = configs.DEFAULT_SF_KEY):
62
62
  pass
63
63
 
64
- def make_group(self, default_key=configs.DEFAULT_SF_KEY):
64
+ def make_group(self, default_key=configs.DEFAULT_SF_KEY) -> "GroupSFrame":
65
65
  pass
66
66
 
67
67
  def convert(
@@ -26,6 +26,7 @@ TOP_ADDRESS_SF_KEY = "top_address"
26
26
  EXCLUSION_SF_KEY = "exclusion"
27
27
  TOKEN_PRICE_SF_KEY = "token_price"
28
28
  PROFIT_LOSS_SF_KEY = "profit_loss"
29
+ DUPLICATED_SF_KEY = "duplicated"
29
30
  SPARK_APP_NAME = "seshat"
30
31
  PANDAS_MODE = "df"
31
32
  PYSPARK_MODE = "spf"
@@ -0,0 +1,160 @@
1
+ from typing import List, Dict, Tuple, Callable, Optional
2
+
3
+ from pandas import DataFrame
4
+ from pyspark.sql import DataFrame as PySparkDataFrame
5
+ from pyspark.sql import functions as F
6
+
7
+ from seshat.data_class import SFrame, DFrame
8
+ from seshat.general import configs
9
+ from seshat.transformer import Transformer
10
+
11
+
12
+ class Aggregator(Transformer):
13
+ ONLY_GROUP = False
14
+ HANDLER_NAME = "aggregate"
15
+ DEFAULT_FRAME = DFrame
16
+ DEFAULT_GROUP_KEYS = {"default": configs.DEFAULT_SF_KEY}
17
+
18
+
19
+ class FieldAggregation(Aggregator):
20
+ """
21
+ This class is used to perform aggregations on dataframes, supporting both grouped
22
+ and global aggregations. When no group_by is specified, aggregations are added
23
+ as new columns to all rows.
24
+
25
+ Parameters
26
+ ----------
27
+ agg : dict
28
+ Dictionary mapping result column names to aggregation specifications.
29
+ Each value can be either:
30
+ - A tuple of (source_column, aggregation_function) for standard aggregations
31
+ - A callable function that takes a DataFrame and returns an aggregated value
32
+ group_by : list of str, optional
33
+ List of column names to group by. If None or empty, performs
34
+ global aggregations across the entire dataset.
35
+ """
36
+
37
+ def __init__(
38
+ self,
39
+ agg: Dict[str, Tuple[str, str] | Callable],
40
+ group_by: Optional[List[str]] = None,
41
+ group_keys=None,
42
+ *args,
43
+ **kwargs,
44
+ ):
45
+ super().__init__(group_keys, *args, **kwargs)
46
+ self.group_on = group_by or []
47
+ self.agg = agg
48
+
49
+ def calculate_complexity(self):
50
+ return 10
51
+
52
+ def validate(self, sf: SFrame):
53
+ super().validate(sf)
54
+
55
+ columns = []
56
+ for _, agg_info in self.agg.items():
57
+ if isinstance(agg_info, tuple):
58
+ source_col, _ = agg_info
59
+ columns.append(source_col)
60
+ self._validate_columns(sf, self.default_sf_key, *columns)
61
+
62
+ def aggregate_df(self, default: DataFrame, *args, **kwargs) -> Dict[str, DataFrame]:
63
+ if self.group_on:
64
+ to_agg_df = default.groupby(self.group_on)
65
+ result_df = (
66
+ default[self.group_on]
67
+ .drop_duplicates()
68
+ .sort_values(self.group_on)
69
+ .reset_index(drop=True)
70
+ )
71
+ else:
72
+ to_agg_df = default
73
+ result_df = default
74
+
75
+ # Find the aggregations
76
+ for result_col, agg_info in self.agg.items():
77
+ if isinstance(agg_info, tuple):
78
+ source_col, agg_func = agg_info
79
+ res = getattr(to_agg_df[source_col], agg_func)()
80
+ if self.group_on:
81
+ res = res.values
82
+ result_df[result_col] = res
83
+ else:
84
+ result_df[result_col] = (
85
+ to_agg_df.apply(agg_info).values
86
+ if self.group_on
87
+ else agg_info(default)
88
+ )
89
+
90
+ return {"default": result_df}
91
+
92
+ def aggregate_spf(
93
+ self, default: PySparkDataFrame, *args, **kwargs
94
+ ) -> Dict[str, PySparkDataFrame]:
95
+ if any(not isinstance(agg_info, tuple) for agg_info in self.agg.values()):
96
+ return self._fallback_to_pandas(default, *args, **kwargs)
97
+
98
+ spark_agg_funcs = {
99
+ "first": F.first,
100
+ "last": F.last,
101
+ "sum": F.sum,
102
+ "mean": F.avg,
103
+ "avg": F.avg,
104
+ "min": F.min,
105
+ "max": F.max,
106
+ "count": F.count,
107
+ "std": F.stddev,
108
+ "var": F.variance,
109
+ "nunique": F.countDistinct,
110
+ }
111
+
112
+ # Prepare aggregations
113
+ standard_aggs = []
114
+ for result_col, agg_info in self.agg.items():
115
+ if isinstance(agg_info, tuple) and len(agg_info) == 2:
116
+ source_col, agg_func = agg_info
117
+ if agg_func in spark_agg_funcs:
118
+ standard_aggs.append(
119
+ spark_agg_funcs[agg_func](source_col).alias(result_col)
120
+ )
121
+ else:
122
+ return self._fallback_to_pandas(default, *args, **kwargs)
123
+ else:
124
+ return self._fallback_to_pandas(default, *args, **kwargs)
125
+
126
+ if not self.group_on:
127
+ if standard_aggs:
128
+ agg_result = default.agg(*standard_aggs)
129
+ agg_row = agg_result.collect()[0]
130
+
131
+ result_df = default
132
+ for result_col in self.agg.keys():
133
+ result_df = result_df.withColumn(
134
+ result_col, F.lit(agg_row[result_col])
135
+ )
136
+ else:
137
+ result_df = default
138
+ else:
139
+ if isinstance(self.group_on, str):
140
+ group_cols = [self.group_on]
141
+ else:
142
+ group_cols = list(self.group_on)
143
+
144
+ result_df = default.groupBy(*group_cols).agg(*standard_aggs)
145
+
146
+ final_cols = group_cols + list(self.agg.keys())
147
+ result_df = result_df.select(*final_cols)
148
+
149
+ return {"default": result_df}
150
+
151
+ def _fallback_to_pandas(
152
+ self, default: PySparkDataFrame, *args, **kwargs
153
+ ) -> Dict[str, PySparkDataFrame]:
154
+ pandas_df = default.toPandas()
155
+ result_dict = self.aggregate_df(pandas_df, *args, **kwargs)
156
+ spark = default.sparkSession
157
+ result_df = spark.createDataFrame(
158
+ result_dict[self.DEFAULT_GROUP_KEYS["default"]]
159
+ )
160
+ return {"default": result_df}
@@ -5,7 +5,6 @@ from .base import (
5
5
  PercentileTransactionValueDeriver,
6
6
  SFrameFromColsDeriver,
7
7
  StaticValueColumnAdder,
8
- TimeWindowTransformer,
9
8
  FractionDeriver,
10
9
  ProfitLossDeriver,
11
10
  DateTimeTypeDeriver,
@@ -20,6 +19,8 @@ from .base import (
20
19
  TokenPriceDeriver,
21
20
  TokenSwapTradeDeriver,
22
21
  TokenFeatureTransformationDeriver,
22
+ BranchClassifier,
23
+ Tagger,
23
24
  )
24
25
 
25
26
  from .from_database import FromSQLDBDeriver