sdk-seshat-python 0.4.1__tar.gz → 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/PKG-INFO +8 -7
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/pyproject.toml +9 -8
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/reducer/base.py +68 -17
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/LICENSE +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/README.md +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/__main__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/data_class/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/data_class/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/data_class/pandas.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/data_class/pyspark.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/evaluation/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/evaluation/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/evaluation/evaluator/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/evaluation/evaluator/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/evaluation/evaluator/general/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/evaluation/evaluator/general/classification.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/evaluation/evaluator/general/clustering.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/evaluation/evaluator/general/regression.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/evaluation/evaluator/recommendation/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/evaluation/evaluator/recommendation/diversity.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/evaluation/evaluator/recommendation/ranking.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/feature_view/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/feature_view/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/general/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/general/command/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/general/command/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/general/command/code_inspect.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/general/command/job_status.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/general/command/setup_project.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/general/command/submit_to_network.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/general/config.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/general/exceptions.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/general/lazy_config.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/general/models.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/general/template/README.md-tmpl +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/general/template/config.py-tmpl +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/general/template/env-templ +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/general/template/jobignore-tmpl +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/general/template/pyproject._toml-tmpl +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/general/template/recommender-jupyter.ipynb-tmpl +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/general/template/recommender.py-tmpl +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/profiler/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/profiler/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/profiler/decorator.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/profiler/format.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/source/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/source/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/source/database/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/source/database/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/source/exceptions.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/source/flip_side/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/source/flip_side/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/source/local/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/source/local/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/source/mixins.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/source/multisource/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/source/multisource/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/source/saver/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/source/saver/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/source/saver/database.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/source/saver/utils/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/source/saver/utils/postgres.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/aggregator/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/aggregator/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/augmenter/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/augmenter/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/deriver/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/deriver/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/deriver/from_database.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/imputer/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/imputer/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/merger/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/merger/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/merger/nested_key.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/pipeline/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/pipeline/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/pipeline/branch.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/pipeline/recommendation/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/pipeline/recommendation/address_pipeline.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/pseudo/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/pseudo/action_gate.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/pseudo/table_existence.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/reducer/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/scaler/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/scaler/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/schema/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/schema/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/splitter/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/splitter/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/splitter/block/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/splitter/block/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/splitter/random/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/splitter/random/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/splitter/time_line/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/splitter/time_line/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/trimmer/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/trimmer/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/vectorizer/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/vectorizer/base.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/vectorizer/cosine_similarity.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/vectorizer/pivot.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/vectorizer/utils.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/batcher.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/binary_utils.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/clean_json.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/col_to_list.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/contracts.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/file.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/file_cryptography.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/filter_json.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/grouper.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/jobignore.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/join_columns_to_list.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/join_str.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/llm_client/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/llm_client/chatbot_factory.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/logging/__init__.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/logging/base_logger.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/logging/console_logger.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/logging/logstash_logger.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/logging/multi_logger.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/memory.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/mixin.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/obfuscate.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/package_utils.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/pandas_func.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/patching.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/pyspark_func.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/rest.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/singleton.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/validation.py +0 -0
- {sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/zip_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sdk-seshat-python
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.2
|
|
4
4
|
Summary: Seshat python SDK is a library to help create ML data pipelines.
|
|
5
5
|
License: Commercial - see LICENSE.txt
|
|
6
6
|
Author: SeshatLabs
|
|
@@ -13,29 +13,30 @@ Provides-Extra: flipside-support
|
|
|
13
13
|
Provides-Extra: postgres-support
|
|
14
14
|
Requires-Dist: backoff (>=2.2.1,<3.0.0)
|
|
15
15
|
Requires-Dist: bokeh (>=3.6.0,<4.0.0)
|
|
16
|
-
Requires-Dist: boto3 (>=1.
|
|
16
|
+
Requires-Dist: boto3 (>=1.35.68,<2.0.0)
|
|
17
17
|
Requires-Dist: croniter (>=6.0.0,<7.0.0)
|
|
18
18
|
Requires-Dist: cryptography (>=44.0.0,<45.0.0)
|
|
19
19
|
Requires-Dist: dask[array,complete,dataframe,distributed] (>=2024.10.0,<2025.0.0)
|
|
20
20
|
Requires-Dist: flipside (>=2.1.0,<3.0.0) ; extra == "flipside-support"
|
|
21
21
|
Requires-Dist: langchain (>=0.3.23,<0.4.0)
|
|
22
22
|
Requires-Dist: langchain-community (>=0.3.21,<0.4.0)
|
|
23
|
-
Requires-Dist: langchain-openai (
|
|
23
|
+
Requires-Dist: langchain-openai (>=0.3.12,<0.4.0)
|
|
24
24
|
Requires-Dist: loguru (>=0.7.3,<0.8.0)
|
|
25
25
|
Requires-Dist: memory-profiler (>=0.61.0,<0.62.0)
|
|
26
26
|
Requires-Dist: openai (>=1.73.0,<2.0.0)
|
|
27
|
-
Requires-Dist: pandas (>=2.
|
|
27
|
+
Requires-Dist: pandas (>=2.2.1,<3.0.0)
|
|
28
28
|
Requires-Dist: psycopg2-binary (>=2.9,<3.0) ; extra == "postgres-support"
|
|
29
29
|
Requires-Dist: pyarmor (>=8.5.1,<9.0.0)
|
|
30
30
|
Requires-Dist: pydantic (>=2.7.4,<3.0.0)
|
|
31
31
|
Requires-Dist: pyspark (>=3.5.1,<4.0.0)
|
|
32
32
|
Requires-Dist: python-logstash-async (>=4.0.2,<5.0.0)
|
|
33
|
-
Requires-Dist: requests (
|
|
34
|
-
Requires-Dist: rich (>=
|
|
33
|
+
Requires-Dist: requests (==2.32.0)
|
|
34
|
+
Requires-Dist: rich (>=13.9.4,<14.0.0)
|
|
35
35
|
Requires-Dist: scikit-learn (>=1.4.1.post1,<2.0.0)
|
|
36
|
+
Requires-Dist: setuptools (>=80.9.0,<81.0.0)
|
|
36
37
|
Requires-Dist: sqlalchemy (>=2.0.29,<3.0.0)
|
|
37
38
|
Requires-Dist: toml (>=0.10.2,<0.11.0)
|
|
38
|
-
Requires-Dist: typer (>=0.
|
|
39
|
+
Requires-Dist: typer (>=0.12.3,<0.13.0)
|
|
39
40
|
Description-Content-Type: text/markdown
|
|
40
41
|
|
|
41
42
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "sdk-seshat-python"
|
|
3
|
-
version = "0.4.
|
|
3
|
+
version = "0.4.2"
|
|
4
4
|
description = "Seshat python SDK is a library to help create ML data pipelines."
|
|
5
5
|
authors = ["SeshatLabs <info@seshatlabs.xyz>"]
|
|
6
6
|
packages = [{ include = "seshat", from = "." }]
|
|
@@ -10,19 +10,19 @@ license = "Commercial - see LICENSE.txt"
|
|
|
10
10
|
|
|
11
11
|
[tool.poetry.dependencies]
|
|
12
12
|
python = "^3.11"
|
|
13
|
-
pandas = "^2.
|
|
13
|
+
pandas = "^2.2.1"
|
|
14
14
|
scikit-learn = "^1.4.1.post1"
|
|
15
15
|
pyspark = "^3.5.1"
|
|
16
16
|
flipside = "^2.1.0"
|
|
17
17
|
sqlalchemy = "^2.0.29"
|
|
18
18
|
memory-profiler = "^0.61.0"
|
|
19
|
-
typer = "^0.
|
|
19
|
+
typer = "^0.12.3"
|
|
20
20
|
dask = {extras = ["array", "complete", "dataframe", "distributed"], version = "^2024.10.0"}
|
|
21
21
|
bokeh = "^3.6.0"
|
|
22
22
|
toml = "^0.10.2"
|
|
23
|
-
rich = "^
|
|
24
|
-
boto3 = "^1.
|
|
25
|
-
requests = "
|
|
23
|
+
rich = "^13.9.4"
|
|
24
|
+
boto3 = "^1.35.68"
|
|
25
|
+
requests = "2.32.0"
|
|
26
26
|
backoff = "^2.2.1"
|
|
27
27
|
cryptography = "^44.0.0"
|
|
28
28
|
loguru = "^0.7.3"
|
|
@@ -30,11 +30,12 @@ openai = "^1.73.0"
|
|
|
30
30
|
pydantic = "^2.7.4"
|
|
31
31
|
langchain = "^0.3.23"
|
|
32
32
|
langchain-community = "^0.3.21"
|
|
33
|
-
langchain-openai = "0.3.
|
|
33
|
+
langchain-openai = "^0.3.12"
|
|
34
34
|
pyarmor = "^8.5.1"
|
|
35
|
+
python-logstash-async = "^4.0.2"
|
|
35
36
|
croniter = "^6.0.0"
|
|
36
37
|
psycopg2-binary = { version = "^2.9", optional = true }
|
|
37
|
-
|
|
38
|
+
setuptools = "^80.9.0"
|
|
38
39
|
|
|
39
40
|
[tool.poetry.extras]
|
|
40
41
|
flipside_support = ["flipside"]
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import math
|
|
2
3
|
from typing import Any, Callable, Dict, List, TypeAlias
|
|
3
4
|
|
|
4
5
|
import pandas as pd
|
|
@@ -18,7 +19,17 @@ from seshat.utils.clean_json import JSONCleaner
|
|
|
18
19
|
InputType: TypeAlias = List[Dict[str, Any]]
|
|
19
20
|
OutputType: TypeAlias = List[Dict[str, Any]]
|
|
20
21
|
ProcessResponseFn: TypeAlias = Callable[[InputType, InputType], OutputType]
|
|
21
|
-
|
|
22
|
+
GetExtraCtxFn: TypeAlias = Callable[[List[Dict[str, Any]]], Dict[str, Any]]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def math_nan_to_none(row):
|
|
26
|
+
d = row.asDict()
|
|
27
|
+
for key, value in d.items():
|
|
28
|
+
if value is None:
|
|
29
|
+
continue
|
|
30
|
+
if isinstance(value, float) and math.isnan(value):
|
|
31
|
+
d[key] = None
|
|
32
|
+
return Row(**d)
|
|
22
33
|
|
|
23
34
|
|
|
24
35
|
class SFrameReducer(Transformer):
|
|
@@ -39,12 +50,14 @@ class LLMInsightExtractor(SFrameReducer):
|
|
|
39
50
|
|
|
40
51
|
Parameters
|
|
41
52
|
----------
|
|
42
|
-
get_llm_client : BaseChatModel
|
|
53
|
+
get_llm_client : () -> BaseChatModel
|
|
43
54
|
The LLM client used to generate insights.
|
|
44
55
|
template_prompt : str
|
|
45
56
|
The template prompt to send to the LLM. Should include placeholders for data.
|
|
46
57
|
id_column : str, optional
|
|
47
58
|
The column name to use as an identifier when expanding results. Required if expand_on_id is True.
|
|
59
|
+
join_cols : list[str], optional
|
|
60
|
+
The columns to use for joining the extracted insights back to the original DataFrame. If not provided, defaults to [id_column].
|
|
48
61
|
template_context : str, optional
|
|
49
62
|
The system context to provide to the LLM. Defaults to a basic data scientist role.
|
|
50
63
|
llm_input_columns : List[str], optional
|
|
@@ -52,7 +65,7 @@ class LLMInsightExtractor(SFrameReducer):
|
|
|
52
65
|
process_llm_json_response_fn : ProcessResponseFn, optional
|
|
53
66
|
Function to process the JSON response from the LLM.
|
|
54
67
|
get_extra_context : ProcessBatchFn, optional
|
|
55
|
-
Function to process
|
|
68
|
+
Function to process data before sending to the LLM. Receives the current data and should return a dict to update format_args.
|
|
56
69
|
process_llm_response : Callable, optional
|
|
57
70
|
Function to process the raw LLM response before JSON parsing.
|
|
58
71
|
retry : int, default=3
|
|
@@ -71,6 +84,8 @@ class LLMInsightExtractor(SFrameReducer):
|
|
|
71
84
|
Whether to expand results based on ID column. Requires id_column.
|
|
72
85
|
inject_keys : dict[str, str], optional
|
|
73
86
|
Additional keys to inject into the template prompt.
|
|
87
|
+
merge_result : bool, default=True
|
|
88
|
+
Whether to merge the extracted insights back to the original DataFrame.
|
|
74
89
|
|
|
75
90
|
Raises
|
|
76
91
|
------
|
|
@@ -137,10 +152,11 @@ class LLMInsightExtractor(SFrameReducer):
|
|
|
137
152
|
get_llm_client: Callable[[], "BaseChatModel"],
|
|
138
153
|
template_prompt: str,
|
|
139
154
|
id_column: str = None,
|
|
155
|
+
join_cols: list[str] = None,
|
|
140
156
|
template_context: str = None,
|
|
141
157
|
llm_input_columns: List[str] = None,
|
|
142
158
|
process_llm_json_response_fn: ProcessResponseFn = None,
|
|
143
|
-
get_extra_context:
|
|
159
|
+
get_extra_context: GetExtraCtxFn = None,
|
|
144
160
|
process_llm_response: Callable = None,
|
|
145
161
|
retry: int = 3,
|
|
146
162
|
llm_result_cleaner: Callable = JSONCleaner().clean,
|
|
@@ -151,6 +167,7 @@ class LLMInsightExtractor(SFrameReducer):
|
|
|
151
167
|
groupby_inject_key: str = None,
|
|
152
168
|
expand_on_id: bool = False,
|
|
153
169
|
inject_keys: dict[str, str] = None,
|
|
170
|
+
merge_result: bool = True,
|
|
154
171
|
):
|
|
155
172
|
|
|
156
173
|
super().__init__(group_keys)
|
|
@@ -172,9 +189,11 @@ class LLMInsightExtractor(SFrameReducer):
|
|
|
172
189
|
Your task is to analyze and provide insights about the given dataset.
|
|
173
190
|
"""
|
|
174
191
|
)
|
|
192
|
+
self.merge_result = merge_result
|
|
175
193
|
|
|
176
194
|
self.group_by_columns = group_by_columns
|
|
177
195
|
self.id_column = id_column
|
|
196
|
+
self.join_cols = join_cols or [self.id_column]
|
|
178
197
|
self.expand_on_id = expand_on_id
|
|
179
198
|
self.static_injected_data = inject_keys
|
|
180
199
|
self.process_llm_response = process_llm_response
|
|
@@ -277,11 +296,19 @@ class LLMInsightExtractor(SFrameReducer):
|
|
|
277
296
|
prompt_kwargs = (
|
|
278
297
|
self.get_extra_context(batch_result) if self.get_extra_context else {}
|
|
279
298
|
)
|
|
280
|
-
|
|
299
|
+
res = self.perform_extract(batch_data, prompt_kwargs, **kwargs)
|
|
300
|
+
if res:
|
|
301
|
+
batch_result += res
|
|
281
302
|
return batch_result
|
|
282
303
|
|
|
304
|
+
def extract_insight_one_shot(
|
|
305
|
+
self, data: List[Dict[str, Any]], **kwargs
|
|
306
|
+
) -> List[Dict[str, Any]]:
|
|
307
|
+
prompt_kwargs = self.get_extra_context(data) if self.get_extra_context else {}
|
|
308
|
+
return self.perform_extract(data, prompt_kwargs, **kwargs)
|
|
309
|
+
|
|
283
310
|
@track
|
|
284
|
-
def
|
|
311
|
+
def perform_extract(
|
|
285
312
|
self, data: List[Dict[str, Any]], prompt_kwargs=None, **kwargs
|
|
286
313
|
) -> List[Dict[str, Any]]:
|
|
287
314
|
"""
|
|
@@ -329,11 +356,17 @@ class LLMInsightExtractor(SFrameReducer):
|
|
|
329
356
|
mask &= default[col] == val
|
|
330
357
|
group_df = default[mask]
|
|
331
358
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
359
|
+
llm_input_columns = self.llm_input_columns or group_df.columns
|
|
360
|
+
if set(llm_input_columns) - set(group_df.columns):
|
|
361
|
+
continue
|
|
362
|
+
|
|
363
|
+
selected = group_df[[*llm_input_columns]]
|
|
364
|
+
|
|
365
|
+
if (
|
|
366
|
+
self.id_column in selected.columns
|
|
367
|
+
and selected[self.id_column].isnull().all()
|
|
368
|
+
):
|
|
369
|
+
continue
|
|
337
370
|
|
|
338
371
|
# If batch mode, create chunks otherwise use whole data
|
|
339
372
|
if self.batch_mode:
|
|
@@ -343,11 +376,16 @@ class LLMInsightExtractor(SFrameReducer):
|
|
|
343
376
|
]
|
|
344
377
|
else:
|
|
345
378
|
data = selected.to_dict("records")
|
|
346
|
-
group_name =
|
|
379
|
+
group_name = (
|
|
380
|
+
"-".join([group.get(c) for c in self.group_by_columns]) if group else ""
|
|
381
|
+
)
|
|
347
382
|
inputs.append({"group_name": group_name, "data": data})
|
|
348
383
|
|
|
349
384
|
def reduce_df(self, default: DataFrame, **kwargs) -> Dict[str, DataFrame]:
|
|
350
385
|
# Find the groups if group_by_columns set
|
|
386
|
+
if default.empty:
|
|
387
|
+
return {"default": default}
|
|
388
|
+
|
|
351
389
|
groups = (
|
|
352
390
|
default[[*self.group_by_columns]].drop_duplicates().to_dict("records")
|
|
353
391
|
if self.group_by_columns
|
|
@@ -359,28 +397,38 @@ class LLMInsightExtractor(SFrameReducer):
|
|
|
359
397
|
self._find_extract_inputs(default, groups, inputs)
|
|
360
398
|
|
|
361
399
|
extract_func = (
|
|
362
|
-
self.extract_insight_batch
|
|
400
|
+
self.extract_insight_batch
|
|
401
|
+
if self.batch_mode
|
|
402
|
+
else self.extract_insight_one_shot
|
|
363
403
|
)
|
|
364
404
|
results = []
|
|
365
405
|
for d in inputs:
|
|
366
406
|
results.extend(extract_func(**d))
|
|
367
407
|
results = pd.DataFrame(results)
|
|
368
408
|
|
|
369
|
-
if self.expand_on_id:
|
|
409
|
+
if self.expand_on_id and not results.empty:
|
|
370
410
|
redundant_cols = [
|
|
371
411
|
col
|
|
372
412
|
for col in results.columns
|
|
373
|
-
if col in default.columns
|
|
413
|
+
if col in default.columns
|
|
414
|
+
and col != self.id_column
|
|
415
|
+
and col not in set(self.join_cols)
|
|
374
416
|
]
|
|
375
417
|
results = (
|
|
376
418
|
results.explode(self.id_column)
|
|
377
419
|
.set_index(self.id_column)
|
|
378
420
|
.drop(columns=redundant_cols, axis=1)
|
|
379
|
-
|
|
421
|
+
)
|
|
422
|
+
if not self.merge_result:
|
|
423
|
+
return {"default": results.drop_duplicates()}
|
|
424
|
+
if not results.empty:
|
|
425
|
+
default = (
|
|
426
|
+
pd.merge(default, results, on=self.join_cols, how="left")
|
|
380
427
|
.reset_index()
|
|
428
|
+
.drop_duplicates()
|
|
381
429
|
)
|
|
382
430
|
|
|
383
|
-
return {"default":
|
|
431
|
+
return {"default": default}
|
|
384
432
|
|
|
385
433
|
def reduce_spf(
|
|
386
434
|
self, default: PySparkDataFrame, **kwargs
|
|
@@ -407,6 +455,9 @@ class LLMInsightExtractor(SFrameReducer):
|
|
|
407
455
|
# To avoid calling process group again, cache rdd and
|
|
408
456
|
# use count() to trigger running process_group and cache the result.
|
|
409
457
|
rdd = default.rdd.mapPartitions(process_group)
|
|
458
|
+
# Because pandas result maybe contains math nan values and
|
|
459
|
+
# these values are not valid for spark
|
|
460
|
+
rdd = rdd.map(math_nan_to_none)
|
|
410
461
|
rdd.cache()
|
|
411
462
|
rdd.count()
|
|
412
463
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/evaluation/evaluator/general/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/general/command/submit_to_network.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/general/template/pyproject._toml-tmpl
RENAMED
|
File without changes
|
|
File without changes
|
{sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/general/template/recommender.py-tmpl
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/aggregator/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/augmenter/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/deriver/from_database.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/pseudo/action_gate.py
RENAMED
|
File without changes
|
{sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/pseudo/table_existence.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/splitter/block/__init__.py
RENAMED
|
File without changes
|
{sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/splitter/block/base.py
RENAMED
|
File without changes
|
{sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/splitter/random/__init__.py
RENAMED
|
File without changes
|
{sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/splitter/random/base.py
RENAMED
|
File without changes
|
|
File without changes
|
{sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/splitter/time_line/base.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/transformer/vectorizer/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sdk_seshat_python-0.4.1 → sdk_seshat_python-0.4.2}/seshat/utils/llm_client/chatbot_factory.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|