sdk-seshat-python 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. sdk_seshat_python-0.3.4/LICENSE +42 -0
  2. sdk_seshat_python-0.3.4/PKG-INFO +24 -0
  3. sdk_seshat_python-0.3.4/README.md +0 -0
  4. sdk_seshat_python-0.3.4/pyproject.toml +32 -0
  5. sdk_seshat_python-0.3.4/seshat/__init__.py +43 -0
  6. sdk_seshat_python-0.3.4/seshat/__main__.py +11 -0
  7. sdk_seshat_python-0.3.4/seshat/data_class/__init__.py +9 -0
  8. sdk_seshat_python-0.3.4/seshat/data_class/base.py +267 -0
  9. sdk_seshat_python-0.3.4/seshat/data_class/pandas.py +65 -0
  10. sdk_seshat_python-0.3.4/seshat/data_class/pyspark.py +63 -0
  11. sdk_seshat_python-0.3.4/seshat/evaluation/__init__.py +1 -0
  12. sdk_seshat_python-0.3.4/seshat/evaluation/base.py +42 -0
  13. sdk_seshat_python-0.3.4/seshat/evaluation/evaluator/__init__.py +1 -0
  14. sdk_seshat_python-0.3.4/seshat/evaluation/evaluator/base.py +15 -0
  15. sdk_seshat_python-0.3.4/seshat/evaluation/evaluator/general/__init__.py +3 -0
  16. sdk_seshat_python-0.3.4/seshat/evaluation/evaluator/general/classification.py +172 -0
  17. sdk_seshat_python-0.3.4/seshat/evaluation/evaluator/general/clustering.py +20 -0
  18. sdk_seshat_python-0.3.4/seshat/evaluation/evaluator/general/regression.py +112 -0
  19. sdk_seshat_python-0.3.4/seshat/evaluation/evaluator/recommendation/__init__.py +2 -0
  20. sdk_seshat_python-0.3.4/seshat/evaluation/evaluator/recommendation/diversity.py +73 -0
  21. sdk_seshat_python-0.3.4/seshat/evaluation/evaluator/recommendation/ranking.py +95 -0
  22. sdk_seshat_python-0.3.4/seshat/feature_view/__init__.py +0 -0
  23. sdk_seshat_python-0.3.4/seshat/feature_view/base.py +141 -0
  24. sdk_seshat_python-0.3.4/seshat/general/__init__.py +1 -0
  25. sdk_seshat_python-0.3.4/seshat/general/command/__init__.py +1 -0
  26. sdk_seshat_python-0.3.4/seshat/general/command/base.py +98 -0
  27. sdk_seshat_python-0.3.4/seshat/general/config.py +33 -0
  28. sdk_seshat_python-0.3.4/seshat/general/exceptions.py +62 -0
  29. sdk_seshat_python-0.3.4/seshat/general/lazy_config.py +41 -0
  30. sdk_seshat_python-0.3.4/seshat/general/template/README.md-tmpl +0 -0
  31. sdk_seshat_python-0.3.4/seshat/general/template/config.py-tmpl +17 -0
  32. sdk_seshat_python-0.3.4/seshat/general/template/env-templ +3 -0
  33. sdk_seshat_python-0.3.4/seshat/general/template/pyproject._toml-tmpl +15 -0
  34. sdk_seshat_python-0.3.4/seshat/general/template/recommender-jupyter.ipynb-tmpl +52 -0
  35. sdk_seshat_python-0.3.4/seshat/general/template/recommender.py-tmpl +56 -0
  36. sdk_seshat_python-0.3.4/seshat/profiler/__init__.py +2 -0
  37. sdk_seshat_python-0.3.4/seshat/profiler/base.py +222 -0
  38. sdk_seshat_python-0.3.4/seshat/profiler/decorator.py +7 -0
  39. sdk_seshat_python-0.3.4/seshat/source/__init__.py +1 -0
  40. sdk_seshat_python-0.3.4/seshat/source/base.py +46 -0
  41. sdk_seshat_python-0.3.4/seshat/source/database/__init__.py +1 -0
  42. sdk_seshat_python-0.3.4/seshat/source/database/base.py +52 -0
  43. sdk_seshat_python-0.3.4/seshat/source/exceptions.py +16 -0
  44. sdk_seshat_python-0.3.4/seshat/source/flip_side/__init__.py +1 -0
  45. sdk_seshat_python-0.3.4/seshat/source/flip_side/base.py +247 -0
  46. sdk_seshat_python-0.3.4/seshat/source/local/__init__.py +1 -0
  47. sdk_seshat_python-0.3.4/seshat/source/local/base.py +26 -0
  48. sdk_seshat_python-0.3.4/seshat/source/mixins.py +70 -0
  49. sdk_seshat_python-0.3.4/seshat/source/multisource/__init__.py +1 -0
  50. sdk_seshat_python-0.3.4/seshat/source/multisource/base.py +28 -0
  51. sdk_seshat_python-0.3.4/seshat/source/saver/__init__.py +2 -0
  52. sdk_seshat_python-0.3.4/seshat/source/saver/base.py +47 -0
  53. sdk_seshat_python-0.3.4/seshat/source/saver/database.py +197 -0
  54. sdk_seshat_python-0.3.4/seshat/source/saver/utils/__init__.py +1 -0
  55. sdk_seshat_python-0.3.4/seshat/source/saver/utils/postgres.py +22 -0
  56. sdk_seshat_python-0.3.4/seshat/transformer/__init__.py +1 -0
  57. sdk_seshat_python-0.3.4/seshat/transformer/augmenter/__init__.py +0 -0
  58. sdk_seshat_python-0.3.4/seshat/transformer/augmenter/base.py +6 -0
  59. sdk_seshat_python-0.3.4/seshat/transformer/base.py +144 -0
  60. sdk_seshat_python-0.3.4/seshat/transformer/deriver/__init__.py +9 -0
  61. sdk_seshat_python-0.3.4/seshat/transformer/deriver/base.py +997 -0
  62. sdk_seshat_python-0.3.4/seshat/transformer/deriver/from_database.py +66 -0
  63. sdk_seshat_python-0.3.4/seshat/transformer/imputer/__init__.py +0 -0
  64. sdk_seshat_python-0.3.4/seshat/transformer/imputer/base.py +6 -0
  65. sdk_seshat_python-0.3.4/seshat/transformer/merger/__init__.py +1 -0
  66. sdk_seshat_python-0.3.4/seshat/transformer/merger/base.py +221 -0
  67. sdk_seshat_python-0.3.4/seshat/transformer/pipeline/__init__.py +1 -0
  68. sdk_seshat_python-0.3.4/seshat/transformer/pipeline/base.py +60 -0
  69. sdk_seshat_python-0.3.4/seshat/transformer/pipeline/branch.py +162 -0
  70. sdk_seshat_python-0.3.4/seshat/transformer/pipeline/recommendation/__init__.py +0 -0
  71. sdk_seshat_python-0.3.4/seshat/transformer/pipeline/recommendation/address_pipeline.py +79 -0
  72. sdk_seshat_python-0.3.4/seshat/transformer/reducer/__init__.py +0 -0
  73. sdk_seshat_python-0.3.4/seshat/transformer/reducer/base.py +6 -0
  74. sdk_seshat_python-0.3.4/seshat/transformer/scaler/__init__.py +0 -0
  75. sdk_seshat_python-0.3.4/seshat/transformer/scaler/base.py +6 -0
  76. sdk_seshat_python-0.3.4/seshat/transformer/schema/__init__.py +1 -0
  77. sdk_seshat_python-0.3.4/seshat/transformer/schema/base.py +127 -0
  78. sdk_seshat_python-0.3.4/seshat/transformer/splitter/__init__.py +1 -0
  79. sdk_seshat_python-0.3.4/seshat/transformer/splitter/base.py +70 -0
  80. sdk_seshat_python-0.3.4/seshat/transformer/splitter/block/__init__.py +1 -0
  81. sdk_seshat_python-0.3.4/seshat/transformer/splitter/block/base.py +39 -0
  82. sdk_seshat_python-0.3.4/seshat/transformer/splitter/random/__init__.py +1 -0
  83. sdk_seshat_python-0.3.4/seshat/transformer/splitter/random/base.py +28 -0
  84. sdk_seshat_python-0.3.4/seshat/transformer/splitter/time_line/__init__.py +1 -0
  85. sdk_seshat_python-0.3.4/seshat/transformer/splitter/time_line/base.py +8 -0
  86. sdk_seshat_python-0.3.4/seshat/transformer/trimmer/__init__.py +6 -0
  87. sdk_seshat_python-0.3.4/seshat/transformer/trimmer/base.py +321 -0
  88. sdk_seshat_python-0.3.4/seshat/transformer/vectorizer/__init__.py +2 -0
  89. sdk_seshat_python-0.3.4/seshat/transformer/vectorizer/base.py +8 -0
  90. sdk_seshat_python-0.3.4/seshat/transformer/vectorizer/cosine_similarity.py +231 -0
  91. sdk_seshat_python-0.3.4/seshat/transformer/vectorizer/pivot.py +115 -0
  92. sdk_seshat_python-0.3.4/seshat/transformer/vectorizer/utils.py +134 -0
  93. sdk_seshat_python-0.3.4/seshat/utils/__init__.py +0 -0
  94. sdk_seshat_python-0.3.4/seshat/utils/col_to_list.py +18 -0
  95. sdk_seshat_python-0.3.4/seshat/utils/contracts.py +32 -0
  96. sdk_seshat_python-0.3.4/seshat/utils/file.py +9 -0
  97. sdk_seshat_python-0.3.4/seshat/utils/join_columns_to_list.py +29 -0
  98. sdk_seshat_python-0.3.4/seshat/utils/join_str.py +5 -0
  99. sdk_seshat_python-0.3.4/seshat/utils/memory.py +5 -0
  100. sdk_seshat_python-0.3.4/seshat/utils/mixin.py +49 -0
  101. sdk_seshat_python-0.3.4/seshat/utils/pandas_func.py +22 -0
  102. sdk_seshat_python-0.3.4/seshat/utils/patching.py +29 -0
  103. sdk_seshat_python-0.3.4/seshat/utils/pyspark_func.py +16 -0
  104. sdk_seshat_python-0.3.4/seshat/utils/singleton.py +7 -0
  105. sdk_seshat_python-0.3.4/seshat/utils/validation.py +38 -0
@@ -0,0 +1,42 @@
1
+ LICENSE AGREEMENT
2
+
3
+ This License Agreement ("Agreement") is made between Seshat, ("Licensor") and you, the licensee ("Licensee"), effective as of the date of digital acceptance or installation of this software ("Effective Date").
4
+
5
+ 1. GRANT OF LICENSE:
6
+ Seshat grants to Licensee a non-exclusive, non-transferable, limited license to use [Software Name] ("Software") solely for Licensee's internal business operations subject to the terms of this Agreement.
7
+
8
+ 2. LICENSE RESTRICTIONS:
9
+ - Licensee shall not modify, make derivative works of, disassemble, reverse compile, or reverse engineer any part of the Software, or attempt to do any of the foregoing.
10
+ - Licensee shall not sublicense, distribute, sell, or lease the Software.
11
+ - Licensee may not use the Software for providing time-sharing services, service bureau services, or as part of an application services provider or software as a service offering.
12
+
13
+ 3. PAYMENT:
14
+ - Licensee agrees to pay Seshat the amount of [Amount] ("License Fee") for the license of the Software as stipulated in [Payment Terms Section or Document].
15
+
16
+ 4. PROPRIETARY RIGHTS:
17
+ Licensee acknowledges that Seshat owns all right, title, and interest in and to the Software, including all related intellectual property rights. The Software is protected by copyright and other intellectual property laws and treaties.
18
+
19
+ 5. TERM AND TERMINATION:
20
+ - This Agreement commences on the Effective Date and continues until terminated as provided herein.
21
+ - Seshat may terminate this Agreement immediately upon notice to Licensee if Licensee breaches any provision of this Agreement.
22
+ - Upon termination, Licensee must cease all use of the Software and delete all copies.
23
+
24
+ 6. DISCLAIMER OF WARRANTIES:
25
+ The Software is provided "as is", with all faults and without warranty of any kind. Seshat disclaims all warranties, express or implied, including any implied warranties of merchantability or fitness for a particular purpose.
26
+
27
+ 7. LIMITATION OF LIABILITY:
28
+ Seshat shall not be liable for any indirect, special, incidental, or consequential damages, including lost profits, arising out of or related to this Agreement or the Software, even if Seshat has been advised of the possibility of such damages.
29
+
30
+ 8. GENERAL:
31
+ - This Agreement constitutes the entire agreement between the parties relating to the Software and supersedes all prior or contemporaneous oral or written communications, proposals, and representations with respect to its subject matter.
32
+ - No amendment to or modification of this License will be binding unless in writing and signed by a duly authorized representative of Seshat.
33
+
34
+ IN WITNESS WHEREOF, the parties hereto have executed this License Agreement as of the Effective Date.
35
+
36
+ Seshat Licensee
37
+
38
+ By: ___________________________ By: ___________________________
39
+
40
+ Title: _________________________ Title: _________________________
41
+
42
+ Date: __________________________ Date: __________________________
@@ -0,0 +1,24 @@
1
+ Metadata-Version: 2.1
2
+ Name: sdk-seshat-python
3
+ Version: 0.3.4
4
+ Summary: Seshat python SDK is a library to help create data pipelines with minimum effort for blockchain data to be trained later for any AI model.
5
+ License: Commercial - see LICENSE.txt
6
+ Author: sajadgilga
7
+ Author-email: majidstic@yahoo.com
8
+ Requires-Python: >=3.11,<4.0
9
+ Classifier: License :: Other/Proprietary License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Provides-Extra: flipside-support
14
+ Provides-Extra: postgres-support
15
+ Requires-Dist: flipside (>=2.0.8,<3.0.0) ; extra == "flipside-support"
16
+ Requires-Dist: memory-profiler (>=0.61.0,<0.62.0)
17
+ Requires-Dist: pandas (>=2.2.1,<3.0.0)
18
+ Requires-Dist: pyspark (>=3.5.1,<4.0.0)
19
+ Requires-Dist: scikit-learn (>=1.4.1.post1,<2.0.0)
20
+ Requires-Dist: sqlalchemy (>=2.0.29,<3.0.0)
21
+ Requires-Dist: typer (>=0.12.3,<0.13.0)
22
+ Description-Content-Type: text/markdown
23
+
24
+
File without changes
@@ -0,0 +1,32 @@
1
+ [tool.poetry]
2
+ name = "sdk-seshat-python"
3
+ version = "0.3.4"
4
+ description = "Seshat python SDK is a library to help create data pipelines with minimum effort for blockchain data to be trained later for any AI model."
5
+ authors = ["sajadgilga <majidstic@yahoo.com>", "aliomidvarrr <aliomidvarrrrr@gmail.com>"]
6
+ packages = [{ include = "seshat", from = "." }]
7
+ readme = "README.md"
8
+ license = "Commercial - see LICENSE.txt"
9
+
10
+
11
+ [tool.poetry.dependencies]
12
+ python = "^3.11"
13
+ pandas = "^2.2.1"
14
+ scikit-learn = "^1.4.1.post1"
15
+ pyspark = "^3.5.1"
16
+ flipside = { version = "^2.0.8", optional = true }
17
+ sqlalchemy = "^2.0.29"
18
+ memory-profiler = "^0.61.0"
19
+ typer = "^0.12.3"
20
+
21
+ [tool.poetry.extras]
22
+ flipside_support = ["flipside"]
23
+ postgres_support = ["psycopg2-binary"]
24
+
25
+ [tool.poetry.group.dev.dependencies]
26
+ flake8 = "^7.0.0"
27
+ black = "^24.3.0"
28
+ pre-commit = "^3.7.0"
29
+
30
+ [build-system]
31
+ requires = ["poetry-core>=1.0.0"]
32
+ build-backend = "poetry.core.masonry.api"
@@ -0,0 +1,43 @@
1
+ import os.path
2
+
3
+ import typer
4
+
5
+ from seshat.general.command.base import RECOMMENDATION, SetUpProjectCommand
6
+
7
+ app = typer.Typer()
8
+
9
+ state = {"verbose": False}
10
+
11
+
12
+ @app.command(name="create-project")
13
+ def create_project(name: str, usecase=typer.Option(default=RECOMMENDATION)):
14
+ command = SetUpProjectCommand(name, usecase, os.getcwd(), report=state["verbose"])
15
+ try:
16
+ command.handle()
17
+ except Exception as exc:
18
+ cli_msg = typer.style(
19
+ f"Setup project in usecase {usecase} failed because of {str(exc)}",
20
+ fg=typer.colors.RED,
21
+ bold=True,
22
+ )
23
+ else:
24
+ cli_msg = typer.style(
25
+ f"""
26
+ Setup project in usecase {usecase} done!\n
27
+ You can deploy your project by this command 🚀:
28
+ 'python -m seshat deploy`
29
+ """,
30
+ fg=typer.colors.GREEN,
31
+ bold=True,
32
+ )
33
+ typer.echo(cli_msg)
34
+
35
+
36
+ @app.callback()
37
+ def main(verbose: bool = False):
38
+ state["verbose"] = verbose
39
+
40
+
41
+ @app.command()
42
+ def deploy():
43
+ pass
@@ -0,0 +1,11 @@
1
+ import sys
2
+
3
+ from seshat import app
4
+
5
+ rc = 1
6
+ try:
7
+ app()
8
+ rc = 0
9
+ except Exception as e:
10
+ print("Error: %s" % e, file=sys.stderr)
11
+ sys.exit(rc)
@@ -0,0 +1,9 @@
1
+ from pandas import DataFrame
2
+ from pyspark.sql import DataFrame as PySparkDataFrame
3
+
4
+ from .base import SFrame, GroupSFrame
5
+ from .pandas import DFrame
6
+ from .pyspark import SPFrame
7
+
8
+ SF_MAP = {DFrame.frame_name: DFrame, SPFrame.frame_name: SPFrame}
9
+ RAW_MAP = {DFrame.frame_name: DataFrame, SPFrame.frame_name: PySparkDataFrame}
@@ -0,0 +1,267 @@
1
+ import copy
2
+ from typing import Dict, Iterable, Type, List
3
+
4
+ from seshat.general import configs
5
+ from seshat.general.exceptions import (
6
+ SFrameDoesNotExistError,
7
+ UnknownDataClassError,
8
+ InvalidArgumentsError,
9
+ )
10
+
11
+
12
+ class SFrame:
13
+ """
14
+ An interface for Seshat frames, providing a unified interface
15
+ for either pandas or PySpark dataframes or others.
16
+ This class facilitates additional functionalities over the standard dataframe operations,
17
+ making it versatile for various data manipulation tasks.
18
+
19
+ Attributes
20
+ ----------
21
+ data : pandas.DataFrame or pyspark.sql.DataFrame
22
+ The actual dataframe object that is wrapped by this class.
23
+ Depending on initialization,this can be a pandas or PySpark dataframe.
24
+ frame_name : str
25
+ A name assigned to the dataframe. This name is used to identify the dataframe
26
+ and can be particularly useful working with multiple dataframes simultaneously.
27
+ """
28
+
29
+ frame_name: str
30
+ data: object
31
+
32
+ def __init__(self, data=None, *args, **kwargs):
33
+ self.data = data
34
+
35
+ def __add__(self, other):
36
+ if other:
37
+ self.extend(other.data)
38
+ return self
39
+
40
+ def __copy__(self):
41
+ return type(self)(self.data)
42
+
43
+ def __deepcopy__(self, memo):
44
+ return type(self)(copy.deepcopy(self.data, memo))
45
+
46
+ def to_raw(self) -> object:
47
+ return self.data
48
+
49
+ def set_raw(self, key: str, data: object):
50
+ self.data = data
51
+
52
+ def get(self, key: str) -> "SFrame":
53
+ return self
54
+
55
+ def get_columns(self, *args) -> Iterable[str]:
56
+ pass
57
+
58
+ def to_dict(self, *cols: str, key: str = configs.DEFAULT_SF_KEY) -> List[Dict]:
59
+ pass
60
+
61
+ def iterrows(self, column_name: str, key: str = configs.DEFAULT_SF_KEY):
62
+ pass
63
+
64
+ def make_group(self, default_key=configs.DEFAULT_SF_KEY):
65
+ pass
66
+
67
+ def convert(
68
+ self, to: "SFrame", default_key: str = configs.DEFAULT_SF_KEY
69
+ ) -> "SFrame":
70
+ """
71
+ Converts the current SFrame to match to the another SFrame.
72
+
73
+ Parameters
74
+ ----------
75
+ to: SFrame
76
+ The SFrame to which the current SFrame's data will be converted.
77
+
78
+ Returns
79
+ -------
80
+ SFrame
81
+ A converted SFrame instance.
82
+ """
83
+ if self.frame_name == to.frame_name:
84
+ return self
85
+ return self._convert(to)
86
+
87
+ def _convert(self, to: "SFrame") -> "SFrame":
88
+ return self.call_conversion_handler(self, to)
89
+
90
+ def extend_from_csv(self, path, *args, **kwargs):
91
+ new_data = self.read_csv(path, *args, **kwargs)
92
+ self.extend(new_data)
93
+
94
+ def extend(
95
+ self,
96
+ other: object,
97
+ axis: int = 0,
98
+ on: str = None,
99
+ left_on: str = None,
100
+ right_on: str = None,
101
+ how: str = "left",
102
+ ) -> object:
103
+ if self.data is None:
104
+ self.data = other
105
+ elif axis == 0:
106
+ self.extend_vertically(other)
107
+ elif axis == 1:
108
+ self.extend_horizontally(other, on, left_on, right_on, how)
109
+
110
+ return self.data
111
+
112
+ def extend_vertically(self, other: object):
113
+ pass
114
+
115
+ def extend_horizontally(
116
+ self, other: object, on: str, left_on: str, right_on: str, how: str
117
+ ):
118
+ if on is None and (left_on is None or right_on is None):
119
+ raise InvalidArgumentsError(
120
+ "`on` or `left_on` and `right_on` cannot be None while trying to extend horizontally"
121
+ )
122
+
123
+ @classmethod
124
+ def read_csv(cls, path, *args, **kwargs):
125
+ pass
126
+
127
+ @staticmethod
128
+ def call_conversion_handler(from_: "SFrame", to: "SFrame") -> "SFrame":
129
+ handler_name = f"to_{to.frame_name}"
130
+ try:
131
+ handler = getattr(from_, handler_name)
132
+ return handler()
133
+ except AttributeError:
134
+ raise NotImplementedError(
135
+ "handler for conversion to %s is not implemented" % to.frame_name
136
+ )
137
+
138
+ @classmethod
139
+ def from_raw(cls, *args, **kwargs) -> "SFrame":
140
+ return cls(*args, **kwargs)
141
+
142
+ def __getitem__(self, item):
143
+ return self
144
+
145
+ def __setitem__(self, key, value):
146
+ self.data = value.to_raw()
147
+
148
+
149
+ class GroupSFrame(SFrame):
150
+ """
151
+ A specialized class derived from SFrame that manages a collection of SFrames
152
+ stored in a dictionary. Each SFrame within the dictionary can be accessed
153
+ using a unique key. This class is designed to handle grouped data where
154
+ each group is represented as an individual SFrame, allowing for operations
155
+ to be performed on specific subsets of data efficiently.
156
+
157
+ Parameters
158
+ ----------
159
+ children : dict
160
+ A dictionary where each key-value pair consists of a string key
161
+ and an SFrame as the value. This structure allows for easy access
162
+ to each group's sframe by using its corresponding key.
163
+ """
164
+
165
+ # TODO: handle multiple types as children
166
+ def __init__(
167
+ self,
168
+ children: Dict[str, SFrame] = None,
169
+ sframe_class: Type[SFrame] = None,
170
+ *args,
171
+ **kwargs,
172
+ ):
173
+ super().__init__()
174
+ if children is None:
175
+ children = {}
176
+
177
+ self.children = children
178
+ self.sframe_class = sframe_class or self.find_sframe_class(
179
+ raise_exception=False
180
+ )
181
+
182
+ def __copy__(self):
183
+ return GroupSFrame(self.children, self.sframe_class)
184
+
185
+ def __deepcopy__(self, memo):
186
+ return type(self)(copy.deepcopy(self.children, memo), self.sframe_class)
187
+
188
+ def to_raw(self) -> Dict[str, object]:
189
+ raw = {}
190
+ for key, sf in self.children.items():
191
+ raw[key] = sf.to_raw()
192
+ return raw
193
+
194
+ def set_raw(self, key, data: object):
195
+ if self.sframe_class is None:
196
+ self.sframe_class = self.find_sframe_class()
197
+ if not isinstance(data, SFrame):
198
+ data = self.sframe_class.from_raw(data)
199
+ self.children[key] = data
200
+
201
+ def get(self, key: str) -> SFrame:
202
+ return self.children.get(key)
203
+
204
+ def set_frame(self, key: str, new_frame: "SFrame") -> None:
205
+ if isinstance(new_frame, GroupSFrame):
206
+ for k, v in new_frame.children.items():
207
+ self.children[k] = v
208
+ else:
209
+ self.children[key] = new_frame
210
+
211
+ def get_columns(self, key) -> Iterable[str]:
212
+ return self.get(key).get_columns()
213
+
214
+ def to_dict(self, *cols: str, key: str = configs.DEFAULT_SF_KEY) -> List[Dict]:
215
+ return self.get(key).to_dict(*cols)
216
+
217
+ def iterrows(self, column_name: str, key: str = configs.DEFAULT_SF_KEY):
218
+ return self.get(key).iterrows(column_name)
219
+
220
+ def convert(
221
+ self, to: "GroupSFrame", default_key: str = configs.DEFAULT_SF_KEY
222
+ ) -> SFrame:
223
+ if not isinstance(to, GroupSFrame):
224
+ to = to.make_group(default_key)
225
+ return super().convert(to)
226
+
227
+ def _convert(self, to: "GroupSFrame") -> SFrame:
228
+ for k, v in self.children.items():
229
+ to[k] = self.call_conversion_handler(v, to)
230
+ return to
231
+
232
+ def make_group(self, default_key=configs.DEFAULT_SF_KEY):
233
+ return self
234
+
235
+ def raise_unknown_sf_exception(self):
236
+ raise UnknownDataClassError(
237
+ "one of the `children` or `sframe_class` must be set while calling `set_raw`"
238
+ )
239
+
240
+ def find_sframe_class(self, raise_exception=True):
241
+ if hasattr(self, "children") and len(self.children) > 0:
242
+ return list(self.children.values())[0].__class__
243
+ if raise_exception:
244
+ raise self.raise_unknown_sf_exception()
245
+
246
+ @property
247
+ def keys(self):
248
+ for key in self.children.keys():
249
+ yield key
250
+
251
+ @property
252
+ def frame_name(self) -> str:
253
+ if self.sframe_class is None:
254
+ self.sframe_class = self.find_sframe_class()
255
+ return self.sframe_class.frame_name
256
+
257
+ def __getitem__(self, key):
258
+ try:
259
+ return self.children[key]
260
+ except KeyError:
261
+ raise SFrameDoesNotExistError(self.__class__.__name__, key)
262
+
263
+ def __setitem__(self, key, value):
264
+ self.children[key] = value
265
+
266
+ def __add__(self, other):
267
+ return {**self.children, **other.children}
@@ -0,0 +1,65 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Iterable, List, Dict
4
+
5
+ import pandas as pd
6
+ from pandas import DataFrame
7
+ from pyspark.sql import SparkSession
8
+
9
+ from seshat.data_class import SFrame
10
+ from seshat.data_class.base import GroupSFrame
11
+ from seshat.general import configs
12
+
13
+
14
+ class DFrame(SFrame):
15
+ frame_name = "df"
16
+ data: DataFrame
17
+
18
+ def make_group(self, default_key=configs.DEFAULT_SF_KEY):
19
+ return GroupSFrame({default_key: self}, sframe_class=self.__class__)
20
+
21
+ def get_columns(self, *args) -> Iterable[str]:
22
+ return self.data.columns
23
+
24
+ def to_dict(self, *cols: str, key=configs.DEFAULT_SF_KEY) -> List[Dict]:
25
+ selected = self.data[list(cols)] if cols else self.data
26
+ selected = selected.loc[selected.astype(str).drop_duplicates().index]
27
+ return selected.to_dict("records")
28
+
29
+ def iterrows(self, column_name: str, key: str = configs.DEFAULT_SF_KEY):
30
+ for row in self.data[column_name]:
31
+ yield row
32
+
33
+ def to_spf(self) -> SFrame:
34
+ from seshat.data_class import SPFrame
35
+
36
+ spark = SparkSession.builder.appName(configs.SPARK_APP_NAME).getOrCreate()
37
+ return SPFrame.from_raw(spark.createDataFrame(self.data))
38
+
39
+ def extend_vertically(self, other: DataFrame):
40
+ super().extend_vertically(other)
41
+ self.data = pd.concat([self.data, other], axis=0)
42
+
43
+ def extend_horizontally(
44
+ self, other: DataFrame, on: str, left_on: str, right_on: str, how: str
45
+ ):
46
+ super().extend_horizontally(other, on, left_on, right_on, how)
47
+ self.data = pd.merge(
48
+ self.data,
49
+ other,
50
+ on=on,
51
+ left_on=left_on,
52
+ right_on=right_on,
53
+ how=how,
54
+ )
55
+
56
+ @classmethod
57
+ def read_csv(cls, path, *args, **kwargs) -> "DataFrame":
58
+ kwargs.setdefault("on_bad_lines", "skip")
59
+ return pd.read_csv(path, *args, **kwargs)
60
+
61
+ @classmethod
62
+ def from_raw(cls, data, *args, **kwargs) -> "DFrame":
63
+ if not isinstance(data, DataFrame):
64
+ data = DataFrame(data)
65
+ return cls(data)
@@ -0,0 +1,63 @@
1
+ from typing import Iterable, List, Dict
2
+
3
+ from pandas import DataFrame
4
+ from pyspark.sql import DataFrame as PySparkDataFrame, SparkSession
5
+
6
+ from seshat.data_class import SFrame, DFrame
7
+ from seshat.data_class.base import GroupSFrame
8
+ from seshat.general import configs
9
+
10
+
11
+ class SPFrame(SFrame):
12
+ frame_name = "spf"
13
+ data: PySparkDataFrame
14
+
15
+ def make_group(self, default_key=configs.DEFAULT_SF_KEY):
16
+ return GroupSFrame({default_key: self}, sframe_class=self.__class__)
17
+
18
+ def get_columns(self, *args) -> Iterable[str]:
19
+ return self.data.columns
20
+
21
+ def to_dict(self, *cols: str, key: str = configs.DEFAULT_SF_KEY) -> List[Dict]:
22
+ selected = self.data.select(*cols).distinct() if cols else self.data
23
+ return [row.asDict() for row in selected.collect()]
24
+
25
+ def iterrows(self, column_name: str, key: str = configs.DEFAULT_SF_KEY):
26
+ for row in self.data.collect():
27
+ yield row[column_name]
28
+
29
+ def to_df(self) -> SFrame:
30
+ from seshat.data_class import DFrame
31
+
32
+ return DFrame.from_raw(self.data.toPandas())
33
+
34
+ def extend_vertically(self, other: PySparkDataFrame):
35
+ super().extend_vertically(other)
36
+ self.data = self.data.unionByName(other)
37
+
38
+ def extend_horizontally(
39
+ self, other: PySparkDataFrame, on: str, left_on: str, right_on: str, how: str
40
+ ):
41
+ super().extend_horizontally(other, on, left_on, right_on, how)
42
+
43
+ if left_on and right_on:
44
+ on = getattr(self.data, left_on) == getattr(other, right_on)
45
+
46
+ self.data = self.data.drop_duplicates().join(
47
+ other.drop_duplicates(), on=on, how=how
48
+ )
49
+
50
+ @classmethod
51
+ def read_csv(cls, path, *args, **kwargs) -> "PySparkDataFrame":
52
+ kwargs.setdefault("header", True)
53
+ return cls.get_spark().read.csv(path, *args, **kwargs)
54
+
55
+ @classmethod
56
+ def from_raw(cls, data, *args, **kwargs) -> "SPFrame":
57
+ if isinstance(data, DataFrame):
58
+ data = DFrame.from_raw(data).convert(cls).to_raw()
59
+ return cls(data)
60
+
61
+ @staticmethod
62
+ def get_spark():
63
+ return SparkSession.builder.appName(configs.SPARK_APP_NAME).getOrCreate()
@@ -0,0 +1 @@
1
+ from .base import Evaluation
@@ -0,0 +1,42 @@
1
+ import os
2
+ from typing import List, Callable
3
+
4
+ from seshat.data_class import SFrame
5
+ from seshat.evaluation.evaluator import Evaluator
6
+ from seshat.general.exceptions import InvalidArgumentsError
7
+
8
+
9
+ class Evaluation:
10
+ evaluators: List[Evaluator]
11
+ test_sf: SFrame
12
+ prediction_sf: SFrame
13
+ model_func: Callable
14
+ report_path: str
15
+
16
+ def __init__(self, evaluators, report_path):
17
+ self.evaluators = evaluators
18
+ self.report_path = report_path
19
+
20
+ def __call__(self, test_sf, model_func=None, **prediction_kwargs):
21
+ if not prediction_kwargs and not model_func:
22
+ raise InvalidArgumentsError(
23
+ "Must provide either prediction_kwargs or mode_func"
24
+ )
25
+ elif not prediction_kwargs:
26
+ prediction_kwargs = model_func(test_sf)
27
+
28
+ report = {}
29
+ for evaluator in self.evaluators:
30
+ report |= evaluator(test_sf=test_sf, **prediction_kwargs)
31
+ self.write_report(report, self.report_path)
32
+ return report
33
+
34
+ @staticmethod
35
+ def write_report(report, report_path):
36
+ report_content = ""
37
+ for metric, result in report.items():
38
+ report_content += f"Metric {metric}: {result}\n"
39
+ directory = os.path.dirname(report_path)
40
+ os.makedirs(directory, exist_ok=True)
41
+ with open(report_path, "w") as file:
42
+ file.write(report_content)
@@ -0,0 +1 @@
1
+ from .base import Evaluator
@@ -0,0 +1,15 @@
1
+ from typing import Dict
2
+
3
+ from seshat.data_class import SFrame
4
+ from seshat.general import configs
5
+ from seshat.utils.mixin import SFHandlerDispatcherMixin
6
+
7
+
8
+ class Evaluator(SFHandlerDispatcherMixin):
9
+ HANDLER_NAME = "evaluate"
10
+ input_sf: SFrame
11
+ DEFAULT_GROUP_KEYS: Dict[str, str] = {"test": configs.DEFAULT_SF_KEY}
12
+
13
+ def __call__(self, test_sf: SFrame, **prediction_kwargs: object):
14
+ test_kwargs = self.extract_raw(test_sf)
15
+ return self.call_handler(test_sf, **prediction_kwargs, **test_kwargs)
@@ -0,0 +1,3 @@
1
+ from .classification import ClassificationEvaluator
2
+ from .clustering import ClusteringEvaluator
3
+ from .regression import RegressionEvaluator