sdk-seshat-python 0.3.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sdk_seshat_python-0.3.4/LICENSE +42 -0
- sdk_seshat_python-0.3.4/PKG-INFO +24 -0
- sdk_seshat_python-0.3.4/README.md +0 -0
- sdk_seshat_python-0.3.4/pyproject.toml +32 -0
- sdk_seshat_python-0.3.4/seshat/__init__.py +43 -0
- sdk_seshat_python-0.3.4/seshat/__main__.py +11 -0
- sdk_seshat_python-0.3.4/seshat/data_class/__init__.py +9 -0
- sdk_seshat_python-0.3.4/seshat/data_class/base.py +267 -0
- sdk_seshat_python-0.3.4/seshat/data_class/pandas.py +65 -0
- sdk_seshat_python-0.3.4/seshat/data_class/pyspark.py +63 -0
- sdk_seshat_python-0.3.4/seshat/evaluation/__init__.py +1 -0
- sdk_seshat_python-0.3.4/seshat/evaluation/base.py +42 -0
- sdk_seshat_python-0.3.4/seshat/evaluation/evaluator/__init__.py +1 -0
- sdk_seshat_python-0.3.4/seshat/evaluation/evaluator/base.py +15 -0
- sdk_seshat_python-0.3.4/seshat/evaluation/evaluator/general/__init__.py +3 -0
- sdk_seshat_python-0.3.4/seshat/evaluation/evaluator/general/classification.py +172 -0
- sdk_seshat_python-0.3.4/seshat/evaluation/evaluator/general/clustering.py +20 -0
- sdk_seshat_python-0.3.4/seshat/evaluation/evaluator/general/regression.py +112 -0
- sdk_seshat_python-0.3.4/seshat/evaluation/evaluator/recommendation/__init__.py +2 -0
- sdk_seshat_python-0.3.4/seshat/evaluation/evaluator/recommendation/diversity.py +73 -0
- sdk_seshat_python-0.3.4/seshat/evaluation/evaluator/recommendation/ranking.py +95 -0
- sdk_seshat_python-0.3.4/seshat/feature_view/__init__.py +0 -0
- sdk_seshat_python-0.3.4/seshat/feature_view/base.py +141 -0
- sdk_seshat_python-0.3.4/seshat/general/__init__.py +1 -0
- sdk_seshat_python-0.3.4/seshat/general/command/__init__.py +1 -0
- sdk_seshat_python-0.3.4/seshat/general/command/base.py +98 -0
- sdk_seshat_python-0.3.4/seshat/general/config.py +33 -0
- sdk_seshat_python-0.3.4/seshat/general/exceptions.py +62 -0
- sdk_seshat_python-0.3.4/seshat/general/lazy_config.py +41 -0
- sdk_seshat_python-0.3.4/seshat/general/template/README.md-tmpl +0 -0
- sdk_seshat_python-0.3.4/seshat/general/template/config.py-tmpl +17 -0
- sdk_seshat_python-0.3.4/seshat/general/template/env-templ +3 -0
- sdk_seshat_python-0.3.4/seshat/general/template/pyproject._toml-tmpl +15 -0
- sdk_seshat_python-0.3.4/seshat/general/template/recommender-jupyter.ipynb-tmpl +52 -0
- sdk_seshat_python-0.3.4/seshat/general/template/recommender.py-tmpl +56 -0
- sdk_seshat_python-0.3.4/seshat/profiler/__init__.py +2 -0
- sdk_seshat_python-0.3.4/seshat/profiler/base.py +222 -0
- sdk_seshat_python-0.3.4/seshat/profiler/decorator.py +7 -0
- sdk_seshat_python-0.3.4/seshat/source/__init__.py +1 -0
- sdk_seshat_python-0.3.4/seshat/source/base.py +46 -0
- sdk_seshat_python-0.3.4/seshat/source/database/__init__.py +1 -0
- sdk_seshat_python-0.3.4/seshat/source/database/base.py +52 -0
- sdk_seshat_python-0.3.4/seshat/source/exceptions.py +16 -0
- sdk_seshat_python-0.3.4/seshat/source/flip_side/__init__.py +1 -0
- sdk_seshat_python-0.3.4/seshat/source/flip_side/base.py +247 -0
- sdk_seshat_python-0.3.4/seshat/source/local/__init__.py +1 -0
- sdk_seshat_python-0.3.4/seshat/source/local/base.py +26 -0
- sdk_seshat_python-0.3.4/seshat/source/mixins.py +70 -0
- sdk_seshat_python-0.3.4/seshat/source/multisource/__init__.py +1 -0
- sdk_seshat_python-0.3.4/seshat/source/multisource/base.py +28 -0
- sdk_seshat_python-0.3.4/seshat/source/saver/__init__.py +2 -0
- sdk_seshat_python-0.3.4/seshat/source/saver/base.py +47 -0
- sdk_seshat_python-0.3.4/seshat/source/saver/database.py +197 -0
- sdk_seshat_python-0.3.4/seshat/source/saver/utils/__init__.py +1 -0
- sdk_seshat_python-0.3.4/seshat/source/saver/utils/postgres.py +22 -0
- sdk_seshat_python-0.3.4/seshat/transformer/__init__.py +1 -0
- sdk_seshat_python-0.3.4/seshat/transformer/augmenter/__init__.py +0 -0
- sdk_seshat_python-0.3.4/seshat/transformer/augmenter/base.py +6 -0
- sdk_seshat_python-0.3.4/seshat/transformer/base.py +144 -0
- sdk_seshat_python-0.3.4/seshat/transformer/deriver/__init__.py +9 -0
- sdk_seshat_python-0.3.4/seshat/transformer/deriver/base.py +997 -0
- sdk_seshat_python-0.3.4/seshat/transformer/deriver/from_database.py +66 -0
- sdk_seshat_python-0.3.4/seshat/transformer/imputer/__init__.py +0 -0
- sdk_seshat_python-0.3.4/seshat/transformer/imputer/base.py +6 -0
- sdk_seshat_python-0.3.4/seshat/transformer/merger/__init__.py +1 -0
- sdk_seshat_python-0.3.4/seshat/transformer/merger/base.py +221 -0
- sdk_seshat_python-0.3.4/seshat/transformer/pipeline/__init__.py +1 -0
- sdk_seshat_python-0.3.4/seshat/transformer/pipeline/base.py +60 -0
- sdk_seshat_python-0.3.4/seshat/transformer/pipeline/branch.py +162 -0
- sdk_seshat_python-0.3.4/seshat/transformer/pipeline/recommendation/__init__.py +0 -0
- sdk_seshat_python-0.3.4/seshat/transformer/pipeline/recommendation/address_pipeline.py +79 -0
- sdk_seshat_python-0.3.4/seshat/transformer/reducer/__init__.py +0 -0
- sdk_seshat_python-0.3.4/seshat/transformer/reducer/base.py +6 -0
- sdk_seshat_python-0.3.4/seshat/transformer/scaler/__init__.py +0 -0
- sdk_seshat_python-0.3.4/seshat/transformer/scaler/base.py +6 -0
- sdk_seshat_python-0.3.4/seshat/transformer/schema/__init__.py +1 -0
- sdk_seshat_python-0.3.4/seshat/transformer/schema/base.py +127 -0
- sdk_seshat_python-0.3.4/seshat/transformer/splitter/__init__.py +1 -0
- sdk_seshat_python-0.3.4/seshat/transformer/splitter/base.py +70 -0
- sdk_seshat_python-0.3.4/seshat/transformer/splitter/block/__init__.py +1 -0
- sdk_seshat_python-0.3.4/seshat/transformer/splitter/block/base.py +39 -0
- sdk_seshat_python-0.3.4/seshat/transformer/splitter/random/__init__.py +1 -0
- sdk_seshat_python-0.3.4/seshat/transformer/splitter/random/base.py +28 -0
- sdk_seshat_python-0.3.4/seshat/transformer/splitter/time_line/__init__.py +1 -0
- sdk_seshat_python-0.3.4/seshat/transformer/splitter/time_line/base.py +8 -0
- sdk_seshat_python-0.3.4/seshat/transformer/trimmer/__init__.py +6 -0
- sdk_seshat_python-0.3.4/seshat/transformer/trimmer/base.py +321 -0
- sdk_seshat_python-0.3.4/seshat/transformer/vectorizer/__init__.py +2 -0
- sdk_seshat_python-0.3.4/seshat/transformer/vectorizer/base.py +8 -0
- sdk_seshat_python-0.3.4/seshat/transformer/vectorizer/cosine_similarity.py +231 -0
- sdk_seshat_python-0.3.4/seshat/transformer/vectorizer/pivot.py +115 -0
- sdk_seshat_python-0.3.4/seshat/transformer/vectorizer/utils.py +134 -0
- sdk_seshat_python-0.3.4/seshat/utils/__init__.py +0 -0
- sdk_seshat_python-0.3.4/seshat/utils/col_to_list.py +18 -0
- sdk_seshat_python-0.3.4/seshat/utils/contracts.py +32 -0
- sdk_seshat_python-0.3.4/seshat/utils/file.py +9 -0
- sdk_seshat_python-0.3.4/seshat/utils/join_columns_to_list.py +29 -0
- sdk_seshat_python-0.3.4/seshat/utils/join_str.py +5 -0
- sdk_seshat_python-0.3.4/seshat/utils/memory.py +5 -0
- sdk_seshat_python-0.3.4/seshat/utils/mixin.py +49 -0
- sdk_seshat_python-0.3.4/seshat/utils/pandas_func.py +22 -0
- sdk_seshat_python-0.3.4/seshat/utils/patching.py +29 -0
- sdk_seshat_python-0.3.4/seshat/utils/pyspark_func.py +16 -0
- sdk_seshat_python-0.3.4/seshat/utils/singleton.py +7 -0
- sdk_seshat_python-0.3.4/seshat/utils/validation.py +38 -0
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
LICENSE AGREEMENT
|
|
2
|
+
|
|
3
|
+
This License Agreement ("Agreement") is made between Seshat, ("Licensor") and you, the licensee ("Licensee"), effective as of the date of digital acceptance or installation of this software ("Effective Date").
|
|
4
|
+
|
|
5
|
+
1. GRANT OF LICENSE:
|
|
6
|
+
Seshat grants to Licensee a non-exclusive, non-transferable, limited license to use [Software Name] ("Software") solely for Licensee's internal business operations subject to the terms of this Agreement.
|
|
7
|
+
|
|
8
|
+
2. LICENSE RESTRICTIONS:
|
|
9
|
+
- Licensee shall not modify, make derivative works of, disassemble, reverse compile, or reverse engineer any part of the Software, or attempt to do any of the foregoing.
|
|
10
|
+
- Licensee shall not sublicense, distribute, sell, or lease the Software.
|
|
11
|
+
- Licensee may not use the Software for providing time-sharing services, service bureau services, or as part of an application services provider or software as a service offering.
|
|
12
|
+
|
|
13
|
+
3. PAYMENT:
|
|
14
|
+
- Licensee agrees to pay Seshat the amount of [Amount] ("License Fee") for the license of the Software as stipulated in [Payment Terms Section or Document].
|
|
15
|
+
|
|
16
|
+
4. PROPRIETARY RIGHTS:
|
|
17
|
+
Licensee acknowledges that Seshat owns all right, title, and interest in and to the Software, including all related intellectual property rights. The Software is protected by copyright and other intellectual property laws and treaties.
|
|
18
|
+
|
|
19
|
+
5. TERM AND TERMINATION:
|
|
20
|
+
- This Agreement commences on the Effective Date and continues until terminated as provided herein.
|
|
21
|
+
- Seshat may terminate this Agreement immediately upon notice to Licensee if Licensee breaches any provision of this Agreement.
|
|
22
|
+
- Upon termination, Licensee must cease all use of the Software and delete all copies.
|
|
23
|
+
|
|
24
|
+
6. DISCLAIMER OF WARRANTIES:
|
|
25
|
+
The Software is provided "as is", with all faults and without warranty of any kind. Seshat disclaims all warranties, express or implied, including any implied warranties of merchantability or fitness for a particular purpose.
|
|
26
|
+
|
|
27
|
+
7. LIMITATION OF LIABILITY:
|
|
28
|
+
Seshat shall not be liable for any indirect, special, incidental, or consequential damages, including lost profits, arising out of or related to this Agreement or the Software, even if Seshat has been advised of the possibility of such damages.
|
|
29
|
+
|
|
30
|
+
8. GENERAL:
|
|
31
|
+
- This Agreement constitutes the entire agreement between the parties relating to the Software and supersedes all prior or contemporaneous oral or written communications, proposals, and representations with respect to its subject matter.
|
|
32
|
+
- No amendment to or modification of this License will be binding unless in writing and signed by a duly authorized representative of Seshat.
|
|
33
|
+
|
|
34
|
+
IN WITNESS WHEREOF, the parties hereto have executed this License Agreement as of the Effective Date.
|
|
35
|
+
|
|
36
|
+
Seshat Licensee
|
|
37
|
+
|
|
38
|
+
By: ___________________________ By: ___________________________
|
|
39
|
+
|
|
40
|
+
Title: _________________________ Title: _________________________
|
|
41
|
+
|
|
42
|
+
Date: __________________________ Date: __________________________
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: sdk-seshat-python
|
|
3
|
+
Version: 0.3.4
|
|
4
|
+
Summary: Seshat python SDK is a library to help create data pipelines with minimum effort for blockchain data to be trained later for any AI model.
|
|
5
|
+
License: Commercial - see LICENSE.txt
|
|
6
|
+
Author: sajadgilga
|
|
7
|
+
Author-email: majidstic@yahoo.com
|
|
8
|
+
Requires-Python: >=3.11,<4.0
|
|
9
|
+
Classifier: License :: Other/Proprietary License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Provides-Extra: flipside-support
|
|
14
|
+
Provides-Extra: postgres-support
|
|
15
|
+
Requires-Dist: flipside (>=2.0.8,<3.0.0) ; extra == "flipside-support"
|
|
16
|
+
Requires-Dist: memory-profiler (>=0.61.0,<0.62.0)
|
|
17
|
+
Requires-Dist: pandas (>=2.2.1,<3.0.0)
|
|
18
|
+
Requires-Dist: pyspark (>=3.5.1,<4.0.0)
|
|
19
|
+
Requires-Dist: scikit-learn (>=1.4.1.post1,<2.0.0)
|
|
20
|
+
Requires-Dist: sqlalchemy (>=2.0.29,<3.0.0)
|
|
21
|
+
Requires-Dist: typer (>=0.12.3,<0.13.0)
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
|
|
24
|
+
|
|
File without changes
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "sdk-seshat-python"
|
|
3
|
+
version = "0.3.4"
|
|
4
|
+
description = "Seshat python SDK is a library to help create data pipelines with minimum effort for blockchain data to be trained later for any AI model."
|
|
5
|
+
authors = ["sajadgilga <majidstic@yahoo.com>", "aliomidvarrr <aliomidvarrrrr@gmail.com>"]
|
|
6
|
+
packages = [{ include = "seshat", from = "." }]
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
license = "Commercial - see LICENSE.txt"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
[tool.poetry.dependencies]
|
|
12
|
+
python = "^3.11"
|
|
13
|
+
pandas = "^2.2.1"
|
|
14
|
+
scikit-learn = "^1.4.1.post1"
|
|
15
|
+
pyspark = "^3.5.1"
|
|
16
|
+
flipside = { version = "^2.0.8", optional = true }
|
|
17
|
+
sqlalchemy = "^2.0.29"
|
|
18
|
+
memory-profiler = "^0.61.0"
|
|
19
|
+
typer = "^0.12.3"
|
|
20
|
+
|
|
21
|
+
[tool.poetry.extras]
|
|
22
|
+
flipside_support = ["flipside"]
|
|
23
|
+
postgres_support = ["psycopg2-binary"]
|
|
24
|
+
|
|
25
|
+
[tool.poetry.group.dev.dependencies]
|
|
26
|
+
flake8 = "^7.0.0"
|
|
27
|
+
black = "^24.3.0"
|
|
28
|
+
pre-commit = "^3.7.0"
|
|
29
|
+
|
|
30
|
+
[build-system]
|
|
31
|
+
requires = ["poetry-core>=1.0.0"]
|
|
32
|
+
build-backend = "poetry.core.masonry.api"
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import os.path
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
|
|
5
|
+
from seshat.general.command.base import RECOMMENDATION, SetUpProjectCommand
|
|
6
|
+
|
|
7
|
+
app = typer.Typer()
|
|
8
|
+
|
|
9
|
+
state = {"verbose": False}
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@app.command(name="create-project")
|
|
13
|
+
def create_project(name: str, usecase=typer.Option(default=RECOMMENDATION)):
|
|
14
|
+
command = SetUpProjectCommand(name, usecase, os.getcwd(), report=state["verbose"])
|
|
15
|
+
try:
|
|
16
|
+
command.handle()
|
|
17
|
+
except Exception as exc:
|
|
18
|
+
cli_msg = typer.style(
|
|
19
|
+
f"Setup project in usecase {usecase} failed because of {str(exc)}",
|
|
20
|
+
fg=typer.colors.RED,
|
|
21
|
+
bold=True,
|
|
22
|
+
)
|
|
23
|
+
else:
|
|
24
|
+
cli_msg = typer.style(
|
|
25
|
+
f"""
|
|
26
|
+
Setup project in usecase {usecase} done!\n
|
|
27
|
+
You can deploy your project by this command 🚀:
|
|
28
|
+
'python -m seshat deploy`
|
|
29
|
+
""",
|
|
30
|
+
fg=typer.colors.GREEN,
|
|
31
|
+
bold=True,
|
|
32
|
+
)
|
|
33
|
+
typer.echo(cli_msg)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@app.callback()
|
|
37
|
+
def main(verbose: bool = False):
|
|
38
|
+
state["verbose"] = verbose
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@app.command()
|
|
42
|
+
def deploy():
|
|
43
|
+
pass
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
from pandas import DataFrame
|
|
2
|
+
from pyspark.sql import DataFrame as PySparkDataFrame
|
|
3
|
+
|
|
4
|
+
from .base import SFrame, GroupSFrame
|
|
5
|
+
from .pandas import DFrame
|
|
6
|
+
from .pyspark import SPFrame
|
|
7
|
+
|
|
8
|
+
SF_MAP = {DFrame.frame_name: DFrame, SPFrame.frame_name: SPFrame}
|
|
9
|
+
RAW_MAP = {DFrame.frame_name: DataFrame, SPFrame.frame_name: PySparkDataFrame}
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
from typing import Dict, Iterable, Type, List
|
|
3
|
+
|
|
4
|
+
from seshat.general import configs
|
|
5
|
+
from seshat.general.exceptions import (
|
|
6
|
+
SFrameDoesNotExistError,
|
|
7
|
+
UnknownDataClassError,
|
|
8
|
+
InvalidArgumentsError,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SFrame:
|
|
13
|
+
"""
|
|
14
|
+
An interface for Seshat frames, providing a unified interface
|
|
15
|
+
for either pandas or PySpark dataframes or others.
|
|
16
|
+
This class facilitates additional functionalities over the standard dataframe operations,
|
|
17
|
+
making it versatile for various data manipulation tasks.
|
|
18
|
+
|
|
19
|
+
Attributes
|
|
20
|
+
----------
|
|
21
|
+
data : pandas.DataFrame or pyspark.sql.DataFrame
|
|
22
|
+
The actual dataframe object that is wrapped by this class.
|
|
23
|
+
Depending on initialization,this can be a pandas or PySpark dataframe.
|
|
24
|
+
frame_name : str
|
|
25
|
+
A name assigned to the dataframe. This name is used to identify the dataframe
|
|
26
|
+
and can be particularly useful working with multiple dataframes simultaneously.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
frame_name: str
|
|
30
|
+
data: object
|
|
31
|
+
|
|
32
|
+
def __init__(self, data=None, *args, **kwargs):
|
|
33
|
+
self.data = data
|
|
34
|
+
|
|
35
|
+
def __add__(self, other):
|
|
36
|
+
if other:
|
|
37
|
+
self.extend(other.data)
|
|
38
|
+
return self
|
|
39
|
+
|
|
40
|
+
def __copy__(self):
|
|
41
|
+
return type(self)(self.data)
|
|
42
|
+
|
|
43
|
+
def __deepcopy__(self, memo):
|
|
44
|
+
return type(self)(copy.deepcopy(self.data, memo))
|
|
45
|
+
|
|
46
|
+
def to_raw(self) -> object:
|
|
47
|
+
return self.data
|
|
48
|
+
|
|
49
|
+
def set_raw(self, key: str, data: object):
|
|
50
|
+
self.data = data
|
|
51
|
+
|
|
52
|
+
def get(self, key: str) -> "SFrame":
|
|
53
|
+
return self
|
|
54
|
+
|
|
55
|
+
def get_columns(self, *args) -> Iterable[str]:
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
def to_dict(self, *cols: str, key: str = configs.DEFAULT_SF_KEY) -> List[Dict]:
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
def iterrows(self, column_name: str, key: str = configs.DEFAULT_SF_KEY):
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
def make_group(self, default_key=configs.DEFAULT_SF_KEY):
|
|
65
|
+
pass
|
|
66
|
+
|
|
67
|
+
def convert(
|
|
68
|
+
self, to: "SFrame", default_key: str = configs.DEFAULT_SF_KEY
|
|
69
|
+
) -> "SFrame":
|
|
70
|
+
"""
|
|
71
|
+
Converts the current SFrame to match to the another SFrame.
|
|
72
|
+
|
|
73
|
+
Parameters
|
|
74
|
+
----------
|
|
75
|
+
to: SFrame
|
|
76
|
+
The SFrame to which the current SFrame's data will be converted.
|
|
77
|
+
|
|
78
|
+
Returns
|
|
79
|
+
-------
|
|
80
|
+
SFrame
|
|
81
|
+
A converted SFrame instance.
|
|
82
|
+
"""
|
|
83
|
+
if self.frame_name == to.frame_name:
|
|
84
|
+
return self
|
|
85
|
+
return self._convert(to)
|
|
86
|
+
|
|
87
|
+
def _convert(self, to: "SFrame") -> "SFrame":
|
|
88
|
+
return self.call_conversion_handler(self, to)
|
|
89
|
+
|
|
90
|
+
def extend_from_csv(self, path, *args, **kwargs):
|
|
91
|
+
new_data = self.read_csv(path, *args, **kwargs)
|
|
92
|
+
self.extend(new_data)
|
|
93
|
+
|
|
94
|
+
def extend(
|
|
95
|
+
self,
|
|
96
|
+
other: object,
|
|
97
|
+
axis: int = 0,
|
|
98
|
+
on: str = None,
|
|
99
|
+
left_on: str = None,
|
|
100
|
+
right_on: str = None,
|
|
101
|
+
how: str = "left",
|
|
102
|
+
) -> object:
|
|
103
|
+
if self.data is None:
|
|
104
|
+
self.data = other
|
|
105
|
+
elif axis == 0:
|
|
106
|
+
self.extend_vertically(other)
|
|
107
|
+
elif axis == 1:
|
|
108
|
+
self.extend_horizontally(other, on, left_on, right_on, how)
|
|
109
|
+
|
|
110
|
+
return self.data
|
|
111
|
+
|
|
112
|
+
def extend_vertically(self, other: object):
|
|
113
|
+
pass
|
|
114
|
+
|
|
115
|
+
def extend_horizontally(
|
|
116
|
+
self, other: object, on: str, left_on: str, right_on: str, how: str
|
|
117
|
+
):
|
|
118
|
+
if on is None and (left_on is None or right_on is None):
|
|
119
|
+
raise InvalidArgumentsError(
|
|
120
|
+
"`on` or `left_on` and `right_on` cannot be None while trying to extend horizontally"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
@classmethod
|
|
124
|
+
def read_csv(cls, path, *args, **kwargs):
|
|
125
|
+
pass
|
|
126
|
+
|
|
127
|
+
@staticmethod
|
|
128
|
+
def call_conversion_handler(from_: "SFrame", to: "SFrame") -> "SFrame":
|
|
129
|
+
handler_name = f"to_{to.frame_name}"
|
|
130
|
+
try:
|
|
131
|
+
handler = getattr(from_, handler_name)
|
|
132
|
+
return handler()
|
|
133
|
+
except AttributeError:
|
|
134
|
+
raise NotImplementedError(
|
|
135
|
+
"handler for conversion to %s is not implemented" % to.frame_name
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
@classmethod
|
|
139
|
+
def from_raw(cls, *args, **kwargs) -> "SFrame":
|
|
140
|
+
return cls(*args, **kwargs)
|
|
141
|
+
|
|
142
|
+
def __getitem__(self, item):
|
|
143
|
+
return self
|
|
144
|
+
|
|
145
|
+
def __setitem__(self, key, value):
|
|
146
|
+
self.data = value.to_raw()
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class GroupSFrame(SFrame):
|
|
150
|
+
"""
|
|
151
|
+
A specialized class derived from SFrame that manages a collection of SFrames
|
|
152
|
+
stored in a dictionary. Each SFrame within the dictionary can be accessed
|
|
153
|
+
using a unique key. This class is designed to handle grouped data where
|
|
154
|
+
each group is represented as an individual SFrame, allowing for operations
|
|
155
|
+
to be performed on specific subsets of data efficiently.
|
|
156
|
+
|
|
157
|
+
Parameters
|
|
158
|
+
----------
|
|
159
|
+
children : dict
|
|
160
|
+
A dictionary where each key-value pair consists of a string key
|
|
161
|
+
and an SFrame as the value. This structure allows for easy access
|
|
162
|
+
to each group's sframe by using its corresponding key.
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
# TODO: handle multiple types as children
|
|
166
|
+
def __init__(
|
|
167
|
+
self,
|
|
168
|
+
children: Dict[str, SFrame] = None,
|
|
169
|
+
sframe_class: Type[SFrame] = None,
|
|
170
|
+
*args,
|
|
171
|
+
**kwargs,
|
|
172
|
+
):
|
|
173
|
+
super().__init__()
|
|
174
|
+
if children is None:
|
|
175
|
+
children = {}
|
|
176
|
+
|
|
177
|
+
self.children = children
|
|
178
|
+
self.sframe_class = sframe_class or self.find_sframe_class(
|
|
179
|
+
raise_exception=False
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
def __copy__(self):
|
|
183
|
+
return GroupSFrame(self.children, self.sframe_class)
|
|
184
|
+
|
|
185
|
+
def __deepcopy__(self, memo):
|
|
186
|
+
return type(self)(copy.deepcopy(self.children, memo), self.sframe_class)
|
|
187
|
+
|
|
188
|
+
def to_raw(self) -> Dict[str, object]:
|
|
189
|
+
raw = {}
|
|
190
|
+
for key, sf in self.children.items():
|
|
191
|
+
raw[key] = sf.to_raw()
|
|
192
|
+
return raw
|
|
193
|
+
|
|
194
|
+
def set_raw(self, key, data: object):
|
|
195
|
+
if self.sframe_class is None:
|
|
196
|
+
self.sframe_class = self.find_sframe_class()
|
|
197
|
+
if not isinstance(data, SFrame):
|
|
198
|
+
data = self.sframe_class.from_raw(data)
|
|
199
|
+
self.children[key] = data
|
|
200
|
+
|
|
201
|
+
def get(self, key: str) -> SFrame:
|
|
202
|
+
return self.children.get(key)
|
|
203
|
+
|
|
204
|
+
def set_frame(self, key: str, new_frame: "SFrame") -> None:
|
|
205
|
+
if isinstance(new_frame, GroupSFrame):
|
|
206
|
+
for k, v in new_frame.children.items():
|
|
207
|
+
self.children[k] = v
|
|
208
|
+
else:
|
|
209
|
+
self.children[key] = new_frame
|
|
210
|
+
|
|
211
|
+
def get_columns(self, key) -> Iterable[str]:
|
|
212
|
+
return self.get(key).get_columns()
|
|
213
|
+
|
|
214
|
+
def to_dict(self, *cols: str, key: str = configs.DEFAULT_SF_KEY) -> List[Dict]:
|
|
215
|
+
return self.get(key).to_dict(*cols)
|
|
216
|
+
|
|
217
|
+
def iterrows(self, column_name: str, key: str = configs.DEFAULT_SF_KEY):
|
|
218
|
+
return self.get(key).iterrows(column_name)
|
|
219
|
+
|
|
220
|
+
def convert(
|
|
221
|
+
self, to: "GroupSFrame", default_key: str = configs.DEFAULT_SF_KEY
|
|
222
|
+
) -> SFrame:
|
|
223
|
+
if not isinstance(to, GroupSFrame):
|
|
224
|
+
to = to.make_group(default_key)
|
|
225
|
+
return super().convert(to)
|
|
226
|
+
|
|
227
|
+
def _convert(self, to: "GroupSFrame") -> SFrame:
|
|
228
|
+
for k, v in self.children.items():
|
|
229
|
+
to[k] = self.call_conversion_handler(v, to)
|
|
230
|
+
return to
|
|
231
|
+
|
|
232
|
+
def make_group(self, default_key=configs.DEFAULT_SF_KEY):
|
|
233
|
+
return self
|
|
234
|
+
|
|
235
|
+
def raise_unknown_sf_exception(self):
|
|
236
|
+
raise UnknownDataClassError(
|
|
237
|
+
"one of the `children` or `sframe_class` must be set while calling `set_raw`"
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
def find_sframe_class(self, raise_exception=True):
|
|
241
|
+
if hasattr(self, "children") and len(self.children) > 0:
|
|
242
|
+
return list(self.children.values())[0].__class__
|
|
243
|
+
if raise_exception:
|
|
244
|
+
raise self.raise_unknown_sf_exception()
|
|
245
|
+
|
|
246
|
+
@property
|
|
247
|
+
def keys(self):
|
|
248
|
+
for key in self.children.keys():
|
|
249
|
+
yield key
|
|
250
|
+
|
|
251
|
+
@property
|
|
252
|
+
def frame_name(self) -> str:
|
|
253
|
+
if self.sframe_class is None:
|
|
254
|
+
self.sframe_class = self.find_sframe_class()
|
|
255
|
+
return self.sframe_class.frame_name
|
|
256
|
+
|
|
257
|
+
def __getitem__(self, key):
|
|
258
|
+
try:
|
|
259
|
+
return self.children[key]
|
|
260
|
+
except KeyError:
|
|
261
|
+
raise SFrameDoesNotExistError(self.__class__.__name__, key)
|
|
262
|
+
|
|
263
|
+
def __setitem__(self, key, value):
|
|
264
|
+
self.children[key] = value
|
|
265
|
+
|
|
266
|
+
def __add__(self, other):
|
|
267
|
+
return {**self.children, **other.children}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Iterable, List, Dict
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from pandas import DataFrame
|
|
7
|
+
from pyspark.sql import SparkSession
|
|
8
|
+
|
|
9
|
+
from seshat.data_class import SFrame
|
|
10
|
+
from seshat.data_class.base import GroupSFrame
|
|
11
|
+
from seshat.general import configs
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DFrame(SFrame):
|
|
15
|
+
frame_name = "df"
|
|
16
|
+
data: DataFrame
|
|
17
|
+
|
|
18
|
+
def make_group(self, default_key=configs.DEFAULT_SF_KEY):
|
|
19
|
+
return GroupSFrame({default_key: self}, sframe_class=self.__class__)
|
|
20
|
+
|
|
21
|
+
def get_columns(self, *args) -> Iterable[str]:
|
|
22
|
+
return self.data.columns
|
|
23
|
+
|
|
24
|
+
def to_dict(self, *cols: str, key=configs.DEFAULT_SF_KEY) -> List[Dict]:
|
|
25
|
+
selected = self.data[list(cols)] if cols else self.data
|
|
26
|
+
selected = selected.loc[selected.astype(str).drop_duplicates().index]
|
|
27
|
+
return selected.to_dict("records")
|
|
28
|
+
|
|
29
|
+
def iterrows(self, column_name: str, key: str = configs.DEFAULT_SF_KEY):
|
|
30
|
+
for row in self.data[column_name]:
|
|
31
|
+
yield row
|
|
32
|
+
|
|
33
|
+
def to_spf(self) -> SFrame:
|
|
34
|
+
from seshat.data_class import SPFrame
|
|
35
|
+
|
|
36
|
+
spark = SparkSession.builder.appName(configs.SPARK_APP_NAME).getOrCreate()
|
|
37
|
+
return SPFrame.from_raw(spark.createDataFrame(self.data))
|
|
38
|
+
|
|
39
|
+
def extend_vertically(self, other: DataFrame):
|
|
40
|
+
super().extend_vertically(other)
|
|
41
|
+
self.data = pd.concat([self.data, other], axis=0)
|
|
42
|
+
|
|
43
|
+
def extend_horizontally(
|
|
44
|
+
self, other: DataFrame, on: str, left_on: str, right_on: str, how: str
|
|
45
|
+
):
|
|
46
|
+
super().extend_horizontally(other, on, left_on, right_on, how)
|
|
47
|
+
self.data = pd.merge(
|
|
48
|
+
self.data,
|
|
49
|
+
other,
|
|
50
|
+
on=on,
|
|
51
|
+
left_on=left_on,
|
|
52
|
+
right_on=right_on,
|
|
53
|
+
how=how,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
@classmethod
|
|
57
|
+
def read_csv(cls, path, *args, **kwargs) -> "DataFrame":
|
|
58
|
+
kwargs.setdefault("on_bad_lines", "skip")
|
|
59
|
+
return pd.read_csv(path, *args, **kwargs)
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def from_raw(cls, data, *args, **kwargs) -> "DFrame":
|
|
63
|
+
if not isinstance(data, DataFrame):
|
|
64
|
+
data = DataFrame(data)
|
|
65
|
+
return cls(data)
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
from typing import Iterable, List, Dict
|
|
2
|
+
|
|
3
|
+
from pandas import DataFrame
|
|
4
|
+
from pyspark.sql import DataFrame as PySparkDataFrame, SparkSession
|
|
5
|
+
|
|
6
|
+
from seshat.data_class import SFrame, DFrame
|
|
7
|
+
from seshat.data_class.base import GroupSFrame
|
|
8
|
+
from seshat.general import configs
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SPFrame(SFrame):
|
|
12
|
+
frame_name = "spf"
|
|
13
|
+
data: PySparkDataFrame
|
|
14
|
+
|
|
15
|
+
def make_group(self, default_key=configs.DEFAULT_SF_KEY):
|
|
16
|
+
return GroupSFrame({default_key: self}, sframe_class=self.__class__)
|
|
17
|
+
|
|
18
|
+
def get_columns(self, *args) -> Iterable[str]:
|
|
19
|
+
return self.data.columns
|
|
20
|
+
|
|
21
|
+
def to_dict(self, *cols: str, key: str = configs.DEFAULT_SF_KEY) -> List[Dict]:
|
|
22
|
+
selected = self.data.select(*cols).distinct() if cols else self.data
|
|
23
|
+
return [row.asDict() for row in selected.collect()]
|
|
24
|
+
|
|
25
|
+
def iterrows(self, column_name: str, key: str = configs.DEFAULT_SF_KEY):
|
|
26
|
+
for row in self.data.collect():
|
|
27
|
+
yield row[column_name]
|
|
28
|
+
|
|
29
|
+
def to_df(self) -> SFrame:
|
|
30
|
+
from seshat.data_class import DFrame
|
|
31
|
+
|
|
32
|
+
return DFrame.from_raw(self.data.toPandas())
|
|
33
|
+
|
|
34
|
+
def extend_vertically(self, other: PySparkDataFrame):
|
|
35
|
+
super().extend_vertically(other)
|
|
36
|
+
self.data = self.data.unionByName(other)
|
|
37
|
+
|
|
38
|
+
def extend_horizontally(
|
|
39
|
+
self, other: PySparkDataFrame, on: str, left_on: str, right_on: str, how: str
|
|
40
|
+
):
|
|
41
|
+
super().extend_horizontally(other, on, left_on, right_on, how)
|
|
42
|
+
|
|
43
|
+
if left_on and right_on:
|
|
44
|
+
on = getattr(self.data, left_on) == getattr(other, right_on)
|
|
45
|
+
|
|
46
|
+
self.data = self.data.drop_duplicates().join(
|
|
47
|
+
other.drop_duplicates(), on=on, how=how
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
@classmethod
|
|
51
|
+
def read_csv(cls, path, *args, **kwargs) -> "PySparkDataFrame":
|
|
52
|
+
kwargs.setdefault("header", True)
|
|
53
|
+
return cls.get_spark().read.csv(path, *args, **kwargs)
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def from_raw(cls, data, *args, **kwargs) -> "SPFrame":
|
|
57
|
+
if isinstance(data, DataFrame):
|
|
58
|
+
data = DFrame.from_raw(data).convert(cls).to_raw()
|
|
59
|
+
return cls(data)
|
|
60
|
+
|
|
61
|
+
@staticmethod
|
|
62
|
+
def get_spark():
|
|
63
|
+
return SparkSession.builder.appName(configs.SPARK_APP_NAME).getOrCreate()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .base import Evaluation
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import List, Callable
|
|
3
|
+
|
|
4
|
+
from seshat.data_class import SFrame
|
|
5
|
+
from seshat.evaluation.evaluator import Evaluator
|
|
6
|
+
from seshat.general.exceptions import InvalidArgumentsError
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Evaluation:
|
|
10
|
+
evaluators: List[Evaluator]
|
|
11
|
+
test_sf: SFrame
|
|
12
|
+
prediction_sf: SFrame
|
|
13
|
+
model_func: Callable
|
|
14
|
+
report_path: str
|
|
15
|
+
|
|
16
|
+
def __init__(self, evaluators, report_path):
|
|
17
|
+
self.evaluators = evaluators
|
|
18
|
+
self.report_path = report_path
|
|
19
|
+
|
|
20
|
+
def __call__(self, test_sf, model_func=None, **prediction_kwargs):
|
|
21
|
+
if not prediction_kwargs and not model_func:
|
|
22
|
+
raise InvalidArgumentsError(
|
|
23
|
+
"Must provide either prediction_kwargs or mode_func"
|
|
24
|
+
)
|
|
25
|
+
elif not prediction_kwargs:
|
|
26
|
+
prediction_kwargs = model_func(test_sf)
|
|
27
|
+
|
|
28
|
+
report = {}
|
|
29
|
+
for evaluator in self.evaluators:
|
|
30
|
+
report |= evaluator(test_sf=test_sf, **prediction_kwargs)
|
|
31
|
+
self.write_report(report, self.report_path)
|
|
32
|
+
return report
|
|
33
|
+
|
|
34
|
+
@staticmethod
|
|
35
|
+
def write_report(report, report_path):
|
|
36
|
+
report_content = ""
|
|
37
|
+
for metric, result in report.items():
|
|
38
|
+
report_content += f"Metric {metric}: {result}\n"
|
|
39
|
+
directory = os.path.dirname(report_path)
|
|
40
|
+
os.makedirs(directory, exist_ok=True)
|
|
41
|
+
with open(report_path, "w") as file:
|
|
42
|
+
file.write(report_content)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .base import Evaluator
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from typing import Dict
|
|
2
|
+
|
|
3
|
+
from seshat.data_class import SFrame
|
|
4
|
+
from seshat.general import configs
|
|
5
|
+
from seshat.utils.mixin import SFHandlerDispatcherMixin
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Evaluator(SFHandlerDispatcherMixin):
|
|
9
|
+
HANDLER_NAME = "evaluate"
|
|
10
|
+
input_sf: SFrame
|
|
11
|
+
DEFAULT_GROUP_KEYS: Dict[str, str] = {"test": configs.DEFAULT_SF_KEY}
|
|
12
|
+
|
|
13
|
+
def __call__(self, test_sf: SFrame, **prediction_kwargs: object):
|
|
14
|
+
test_kwargs = self.extract_raw(test_sf)
|
|
15
|
+
return self.call_handler(test_sf, **prediction_kwargs, **test_kwargs)
|