sdk-seshat-python 0.3.4__tar.gz → 0.3.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. sdk_seshat_python-0.3.13/PKG-INFO +40 -0
  2. sdk_seshat_python-0.3.13/pyproject.toml +59 -0
  3. sdk_seshat_python-0.3.13/seshat/__init__.py +321 -0
  4. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/__main__.py +1 -0
  5. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/data_class/pandas.py +1 -1
  6. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/feature_view/base.py +33 -15
  7. sdk_seshat_python-0.3.13/seshat/general/command/__init__.py +12 -0
  8. sdk_seshat_python-0.3.13/seshat/general/command/base.py +122 -0
  9. sdk_seshat_python-0.3.13/seshat/general/command/code_inspect.py +102 -0
  10. sdk_seshat_python-0.3.13/seshat/general/command/job_status.py +69 -0
  11. sdk_seshat_python-0.3.4/seshat/general/command/base.py → sdk_seshat_python-0.3.13/seshat/general/command/setup_project.py +3 -0
  12. sdk_seshat_python-0.3.13/seshat/general/command/submit_to_network.py +358 -0
  13. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/general/config.py +2 -0
  14. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/general/exceptions.py +63 -0
  15. sdk_seshat_python-0.3.13/seshat/general/models.py +16 -0
  16. sdk_seshat_python-0.3.13/seshat/general/template/jobignore-tmpl +28 -0
  17. sdk_seshat_python-0.3.13/seshat/profiler/base.py +291 -0
  18. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/profiler/decorator.py +4 -0
  19. sdk_seshat_python-0.3.13/seshat/profiler/format.py +30 -0
  20. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/source/base.py +3 -0
  21. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/source/database/base.py +3 -0
  22. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/source/flip_side/base.py +22 -10
  23. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/source/local/base.py +3 -0
  24. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/source/mixins.py +23 -1
  25. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/source/multisource/base.py +5 -0
  26. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/source/saver/base.py +3 -0
  27. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/source/saver/database.py +9 -25
  28. sdk_seshat_python-0.3.13/seshat/transformer/aggregator/base.py +107 -0
  29. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/base.py +3 -0
  30. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/deriver/base.py +1068 -6
  31. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/deriver/from_database.py +3 -0
  32. sdk_seshat_python-0.3.13/seshat/transformer/merger/__init__.py +2 -0
  33. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/merger/base.py +9 -0
  34. sdk_seshat_python-0.3.13/seshat/transformer/merger/nested_key.py +84 -0
  35. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/pipeline/base.py +3 -0
  36. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/pipeline/branch.py +11 -3
  37. sdk_seshat_python-0.3.13/seshat/transformer/pseudo/__init__.py +1 -0
  38. sdk_seshat_python-0.3.13/seshat/transformer/pseudo/table_existence.py +34 -0
  39. sdk_seshat_python-0.3.13/seshat/transformer/reducer/base.py +562 -0
  40. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/schema/base.py +3 -0
  41. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/splitter/base.py +3 -0
  42. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/trimmer/base.py +163 -1
  43. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/vectorizer/cosine_similarity.py +3 -0
  44. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/vectorizer/pivot.py +3 -0
  45. sdk_seshat_python-0.3.13/seshat/utils/__init__.py +8 -0
  46. sdk_seshat_python-0.3.13/seshat/utils/batcher.py +49 -0
  47. sdk_seshat_python-0.3.13/seshat/utils/binary_utils.py +91 -0
  48. sdk_seshat_python-0.3.13/seshat/utils/clean_json.py +315 -0
  49. sdk_seshat_python-0.3.13/seshat/utils/file.py +46 -0
  50. sdk_seshat_python-0.3.13/seshat/utils/file_cryptography.py +114 -0
  51. sdk_seshat_python-0.3.13/seshat/utils/filter_json.py +7 -0
  52. sdk_seshat_python-0.3.13/seshat/utils/grouper.py +126 -0
  53. sdk_seshat_python-0.3.13/seshat/utils/jobignore.py +151 -0
  54. sdk_seshat_python-0.3.13/seshat/utils/llm_client/__init__.py +3 -0
  55. sdk_seshat_python-0.3.13/seshat/utils/llm_client/chatbot_factory.py +76 -0
  56. sdk_seshat_python-0.3.13/seshat/utils/logging/__init__.py +3 -0
  57. sdk_seshat_python-0.3.13/seshat/utils/logging/base_logger.py +56 -0
  58. sdk_seshat_python-0.3.13/seshat/utils/logging/console_logger.py +62 -0
  59. sdk_seshat_python-0.3.13/seshat/utils/logging/logstash_logger.py +136 -0
  60. sdk_seshat_python-0.3.13/seshat/utils/logging/multi_logger.py +67 -0
  61. sdk_seshat_python-0.3.13/seshat/utils/obfuscate.py +229 -0
  62. sdk_seshat_python-0.3.13/seshat/utils/package_utils.py +130 -0
  63. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/utils/pandas_func.py +8 -2
  64. sdk_seshat_python-0.3.13/seshat/utils/rest.py +294 -0
  65. sdk_seshat_python-0.3.13/seshat/utils/zip_utils.py +123 -0
  66. sdk_seshat_python-0.3.4/PKG-INFO +0 -24
  67. sdk_seshat_python-0.3.4/pyproject.toml +0 -32
  68. sdk_seshat_python-0.3.4/seshat/__init__.py +0 -43
  69. sdk_seshat_python-0.3.4/seshat/general/command/__init__.py +0 -1
  70. sdk_seshat_python-0.3.4/seshat/profiler/base.py +0 -222
  71. sdk_seshat_python-0.3.4/seshat/transformer/merger/__init__.py +0 -1
  72. sdk_seshat_python-0.3.4/seshat/transformer/reducer/base.py +0 -6
  73. sdk_seshat_python-0.3.4/seshat/utils/file.py +0 -9
  74. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/LICENSE +0 -0
  75. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/README.md +0 -0
  76. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/data_class/__init__.py +0 -0
  77. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/data_class/base.py +0 -0
  78. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/data_class/pyspark.py +0 -0
  79. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/evaluation/__init__.py +0 -0
  80. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/evaluation/base.py +0 -0
  81. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/evaluation/evaluator/__init__.py +0 -0
  82. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/evaluation/evaluator/base.py +0 -0
  83. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/evaluation/evaluator/general/__init__.py +0 -0
  84. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/evaluation/evaluator/general/classification.py +0 -0
  85. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/evaluation/evaluator/general/clustering.py +0 -0
  86. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/evaluation/evaluator/general/regression.py +0 -0
  87. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/evaluation/evaluator/recommendation/__init__.py +0 -0
  88. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/evaluation/evaluator/recommendation/diversity.py +0 -0
  89. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/evaluation/evaluator/recommendation/ranking.py +0 -0
  90. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/feature_view/__init__.py +0 -0
  91. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/general/__init__.py +0 -0
  92. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/general/lazy_config.py +0 -0
  93. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/general/template/README.md-tmpl +0 -0
  94. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/general/template/config.py-tmpl +0 -0
  95. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/general/template/env-templ +0 -0
  96. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/general/template/pyproject._toml-tmpl +0 -0
  97. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/general/template/recommender-jupyter.ipynb-tmpl +0 -0
  98. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/general/template/recommender.py-tmpl +0 -0
  99. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/profiler/__init__.py +0 -0
  100. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/source/__init__.py +0 -0
  101. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/source/database/__init__.py +0 -0
  102. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/source/exceptions.py +0 -0
  103. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/source/flip_side/__init__.py +0 -0
  104. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/source/local/__init__.py +0 -0
  105. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/source/multisource/__init__.py +0 -0
  106. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/source/saver/__init__.py +0 -0
  107. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/source/saver/utils/__init__.py +0 -0
  108. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/source/saver/utils/postgres.py +0 -0
  109. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/__init__.py +0 -0
  110. {sdk_seshat_python-0.3.4/seshat/transformer/augmenter → sdk_seshat_python-0.3.13/seshat/transformer/aggregator}/__init__.py +0 -0
  111. {sdk_seshat_python-0.3.4/seshat/transformer/imputer → sdk_seshat_python-0.3.13/seshat/transformer/augmenter}/__init__.py +0 -0
  112. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/augmenter/base.py +0 -0
  113. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/deriver/__init__.py +0 -0
  114. {sdk_seshat_python-0.3.4/seshat/transformer/pipeline/recommendation → sdk_seshat_python-0.3.13/seshat/transformer/imputer}/__init__.py +0 -0
  115. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/imputer/base.py +0 -0
  116. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/pipeline/__init__.py +0 -0
  117. {sdk_seshat_python-0.3.4/seshat/transformer/reducer → sdk_seshat_python-0.3.13/seshat/transformer/pipeline/recommendation}/__init__.py +0 -0
  118. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/pipeline/recommendation/address_pipeline.py +0 -0
  119. {sdk_seshat_python-0.3.4/seshat/transformer/scaler → sdk_seshat_python-0.3.13/seshat/transformer/reducer}/__init__.py +0 -0
  120. {sdk_seshat_python-0.3.4/seshat/utils → sdk_seshat_python-0.3.13/seshat/transformer/scaler}/__init__.py +0 -0
  121. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/scaler/base.py +0 -0
  122. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/schema/__init__.py +0 -0
  123. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/splitter/__init__.py +0 -0
  124. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/splitter/block/__init__.py +0 -0
  125. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/splitter/block/base.py +0 -0
  126. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/splitter/random/__init__.py +0 -0
  127. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/splitter/random/base.py +0 -0
  128. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/splitter/time_line/__init__.py +0 -0
  129. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/splitter/time_line/base.py +0 -0
  130. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/trimmer/__init__.py +0 -0
  131. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/vectorizer/__init__.py +0 -0
  132. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/vectorizer/base.py +0 -0
  133. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/transformer/vectorizer/utils.py +0 -0
  134. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/utils/col_to_list.py +0 -0
  135. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/utils/contracts.py +0 -0
  136. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/utils/join_columns_to_list.py +0 -0
  137. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/utils/join_str.py +0 -0
  138. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/utils/memory.py +0 -0
  139. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/utils/mixin.py +0 -0
  140. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/utils/patching.py +0 -0
  141. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/utils/pyspark_func.py +0 -0
  142. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/utils/singleton.py +0 -0
  143. {sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/utils/validation.py +0 -0
@@ -0,0 +1,40 @@
1
+ Metadata-Version: 2.1
2
+ Name: sdk-seshat-python
3
+ Version: 0.3.13
4
+ Summary: Seshat python SDK is a library to help create ML data pipelines.
5
+ License: Commercial - see LICENSE.txt
6
+ Author: SeshatLabs
7
+ Author-email: info@seshatlabs.xyz
8
+ Requires-Python: >=3.11,<4.0
9
+ Classifier: License :: Other/Proprietary License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Provides-Extra: flipside-support
13
+ Provides-Extra: postgres-support
14
+ Requires-Dist: backoff (>=2.2.1,<3.0.0)
15
+ Requires-Dist: bokeh (>=3.6.0,<4.0.0)
16
+ Requires-Dist: boto3 (>=1.35.68,<2.0.0)
17
+ Requires-Dist: croniter (>=6.0.0,<7.0.0)
18
+ Requires-Dist: cryptography (>=44.0.0,<45.0.0)
19
+ Requires-Dist: dask[array,complete,dataframe,distributed] (>=2024.10.0,<2025.0.0)
20
+ Requires-Dist: flipside (>=2.1.0,<3.0.0) ; extra == "flipside-support"
21
+ Requires-Dist: langchain (>=0.3.23,<0.4.0)
22
+ Requires-Dist: langchain-community (>=0.3.21,<0.4.0)
23
+ Requires-Dist: langchain-openai (>=0.3.12,<0.4.0)
24
+ Requires-Dist: loguru (>=0.7.3,<0.8.0)
25
+ Requires-Dist: memory-profiler (>=0.61.0,<0.62.0)
26
+ Requires-Dist: openai (>=1.73.0,<2.0.0)
27
+ Requires-Dist: pandas (>=2.2.1,<3.0.0)
28
+ Requires-Dist: pyarmor (>=8.5.1,<9.0.0)
29
+ Requires-Dist: pydantic (>=2.7.4,<3.0.0)
30
+ Requires-Dist: pyspark (>=3.5.1,<4.0.0)
31
+ Requires-Dist: python-logstash-async (>=4.0.2,<5.0.0)
32
+ Requires-Dist: requests (==2.32.0)
33
+ Requires-Dist: rich (>=13.9.4,<14.0.0)
34
+ Requires-Dist: scikit-learn (>=1.4.1.post1,<2.0.0)
35
+ Requires-Dist: sqlalchemy (>=2.0.29,<3.0.0)
36
+ Requires-Dist: toml (>=0.10.2,<0.11.0)
37
+ Requires-Dist: typer (>=0.12.3,<0.13.0)
38
+ Description-Content-Type: text/markdown
39
+
40
+
@@ -0,0 +1,59 @@
1
+ [tool.poetry]
2
+ name = "sdk-seshat-python"
3
+ version = "0.3.13"
4
+ description = "Seshat python SDK is a library to help create ML data pipelines."
5
+ authors = ["SeshatLabs <info@seshatlabs.xyz>"]
6
+ packages = [{ include = "seshat", from = "." }]
7
+ readme = "README.md"
8
+ license = "Commercial - see LICENSE.txt"
9
+
10
+
11
+ [tool.poetry.dependencies]
12
+ python = "^3.11"
13
+ pandas = "^2.2.1"
14
+ scikit-learn = "^1.4.1.post1"
15
+ pyspark = "^3.5.1"
16
+ flipside = "^2.1.0"
17
+ sqlalchemy = "^2.0.29"
18
+ memory-profiler = "^0.61.0"
19
+ typer = "^0.12.3"
20
+ dask = {extras = ["array", "complete", "dataframe", "distributed"], version = "^2024.10.0"}
21
+ bokeh = "^3.6.0"
22
+ toml = "^0.10.2"
23
+ rich = "^13.9.4"
24
+ boto3 = "^1.35.68"
25
+ requests = "2.32.0"
26
+ backoff = "^2.2.1"
27
+ cryptography = "^44.0.0"
28
+ loguru = "^0.7.3"
29
+ openai = "^1.73.0"
30
+ pydantic = "^2.7.4"
31
+ langchain = "^0.3.23"
32
+ langchain-community = "^0.3.21"
33
+ langchain-openai = "^0.3.12"
34
+ pyarmor = "^8.5.1"
35
+ croniter = "^6.0.0"
36
+ python-logstash-async = "^4.0.2"
37
+
38
+ [tool.poetry.extras]
39
+ flipside_support = ["flipside"]
40
+ postgres_support = ["psycopg2-binary"]
41
+
42
+ [tool.poetry.group.dev.dependencies]
43
+ flake8 = "^7.0.0"
44
+ black = "^24.3.0"
45
+ pre-commit = "^3.7.0"
46
+ pytest = "^8.3.4"
47
+
48
+ [build-system]
49
+ requires = ["poetry-core>=1.0.0"]
50
+ build-backend = "poetry.core.masonry.api"
51
+
52
+ [tool.pytest.ini_options]
53
+ pythonpath = [
54
+ ".",
55
+ "src",
56
+ ]
57
+ testpaths = [
58
+ "test",
59
+ ]
@@ -0,0 +1,321 @@
1
+ import os.path
2
+ from pathlib import Path
3
+
4
+ import toml
5
+ import typer
6
+ from rich.console import Console
7
+ from rich.table import Table
8
+
9
+ from seshat.general.command import (
10
+ SubmitCommand,
11
+ JobMetadata,
12
+ JobExecutionSchedule,
13
+ ExecutionMode,
14
+ )
15
+ from seshat.general.command.code_inspect import CodeInspectCommand
16
+ from seshat.general.command.job_status import JobStatusCommand
17
+ from seshat.general.command.setup_project import RECOMMENDATION, SetUpProjectCommand
18
+ from seshat.general.exceptions import NoConfigSetError, RestClientException
19
+
20
+ app = typer.Typer()
21
+ console = Console()
22
+ DEFAULT_DATA_SIZE = 1_000_000 # 1 GB
23
+
24
+ state = {"verbose": False}
25
+
26
+
27
+ def load_config(config_file: Path = Path.home() / ".codemanager.toml") -> dict:
28
+ try:
29
+ if config_file.exists():
30
+ return toml.load(config_file)
31
+ else:
32
+ raise NoConfigSetError("Config file not found.")
33
+ except toml.TomlDecodeError:
34
+ raise NoConfigSetError("Config file is malformed.")
35
+
36
+
37
+ @app.command(name="create-project")
38
+ def create_project(name: str, usecase=typer.Option(default=RECOMMENDATION)):
39
+ command = SetUpProjectCommand(name, usecase, os.getcwd(), report=state["verbose"])
40
+ try:
41
+ command.handle()
42
+ except Exception as exc:
43
+ cli_msg = typer.style(
44
+ f"Setup project in usecase {usecase} failed because of {str(exc)}",
45
+ fg=typer.colors.RED,
46
+ bold=True,
47
+ )
48
+ else:
49
+ cli_msg = typer.style(
50
+ f"""
51
+ Setup project in usecase {usecase} done!\n
52
+ You can deploy your project by this command 🚀:
53
+ 'python -m seshat deploy`
54
+ """,
55
+ fg=typer.colors.GREEN,
56
+ bold=True,
57
+ )
58
+ typer.echo(cli_msg)
59
+
60
+
61
+ @app.command(name="inspect")
62
+ def inspect_code(
63
+ directory: str = typer.Argument(..., help="Directory containing the code"),
64
+ config_file: Path = typer.Option(
65
+ Path.home() / ".codemanager.toml", help="Path to config file"
66
+ ),
67
+ ):
68
+ try:
69
+ config = load_config(config_file)
70
+ if not config:
71
+ raise NoConfigSetError()
72
+
73
+ manager = CodeInspectCommand(config)
74
+ complexity = manager.handle(directory)
75
+ config["code"] = {**config.get("code", {}), "complexity_factor": complexity}
76
+
77
+ with open(config_file, "w") as f:
78
+ toml.dump(config, f)
79
+
80
+ typer.echo(f"Configuration updated in {config_file}")
81
+ except Exception as e:
82
+ typer.echo(f"Error: {str(e)}", err=True)
83
+ raise typer.Exit(1)
84
+
85
+
86
+ @app.command(name="submit")
87
+ def submit_job(
88
+ directory: str = typer.Argument(..., help="Directory containing the code"),
89
+ name: str = typer.Option(..., help="Name of the package"),
90
+ version: str = typer.Option(..., help="Version of the package"),
91
+ executor_image_tag: str = typer.Option(
92
+ "latest", help="Image tag of the executor which runs the job"
93
+ ),
94
+ confidential_level: str = typer.Option(
95
+ "default", help="Confidential level desired for the pipeline "
96
+ ),
97
+ execution_mode: str = typer.Option(
98
+ "single", help="Execution mode for the pipeline "
99
+ ),
100
+ config_file: Path = typer.Option(
101
+ Path.home() / ".codemanager.toml", help="Path to config file"
102
+ ),
103
+ ):
104
+ try:
105
+ config = load_config(config_file)
106
+ if not config:
107
+ raise NoConfigSetError()
108
+ manager = SubmitCommand(config)
109
+
110
+ job_execution_schedule = None
111
+ if "execution" in config:
112
+ job_execution_schedule = JobExecutionSchedule(
113
+ **config.get("execution", {}).get("plan", {})
114
+ )
115
+
116
+ job_metadata = JobMetadata(
117
+ pipeline_hash="",
118
+ confidential_level=confidential_level,
119
+ execution_mode=ExecutionMode(execution_mode),
120
+ execution_plan=job_execution_schedule,
121
+ main_file_path=config.get("code", {}).get("main_file", "main.py"),
122
+ env_file_path=config.get("code", {}).get("env_file", ".env"),
123
+ data_size=config.get("code", {}).get("data_size", DEFAULT_DATA_SIZE),
124
+ complexity_factor=config.get("code", {}).get("complexity_factor", 0),
125
+ requirement_file=None,
126
+ requirements_type=None,
127
+ secret_key="",
128
+ iv="",
129
+ )
130
+
131
+ package = manager.handle(
132
+ directory, name, version, executor_image_tag, metadata=job_metadata
133
+ )
134
+
135
+ obfuscate_code = config.get("code", {}).get("obfuscate_code", "false")
136
+ obfuscate_code = True if obfuscate_code.lower() == "true" else False
137
+ if obfuscate_code:
138
+ package = manager.obfuscate_code(package)
139
+
140
+ job_metadata.pipeline_hash = package.hash
141
+ identifier = manager.store_code(package)
142
+
143
+ job_response = manager.submit_job(
144
+ identifier, name, version, executor_image_tag, job_metadata
145
+ )
146
+ job_response_data = job_response.get("data", {})
147
+
148
+ table = Table(title="Upload Summary")
149
+ table.add_column("Property", style="cyan")
150
+ table.add_column("Value", style="green")
151
+
152
+ table.add_row("Name", package.name)
153
+ table.add_row("Version", package.version)
154
+ table.add_row("ExecutorImageTag", package.executor_image_tag)
155
+ table.add_row("Hash", package.hash)
156
+ table.add_row("S3 Location", identifier)
157
+ table.add_row("Job ID", str(job_response_data.get("id", "N/A")))
158
+ table.add_row("Status", job_response_data.get("status", {}).get("state", "N/A"))
159
+
160
+ console.print(table)
161
+
162
+ except Exception as e:
163
+ typer.echo(f"Error: {str(e)}", err=True)
164
+ raise typer.Exit(1)
165
+
166
+
167
+ @app.command(name="configure")
168
+ def configure_job(
169
+ bucket: str = typer.Option(..., prompt=True, help="S3 bucket name"),
170
+ prefix: str = typer.Option("code", prompt=True, help="S3 prefix"),
171
+ aws_access_key_id: str = typer.Option(..., prompt=True, help="AWS access key ID"),
172
+ aws_secret_access_key: str = typer.Option(
173
+ ..., prompt=True, hide_input=True, help="AWS secret access key"
174
+ ),
175
+ aws_region: str = typer.Option("us-east-1", prompt=True, help="AWS region"),
176
+ api_base_url: str = typer.Option(
177
+ ..., prompt=True, show_default=True, help="API base URL"
178
+ ),
179
+ api_auth_token: str = typer.Option(
180
+ ...,
181
+ prompt=True,
182
+ hide_input=True,
183
+ prompt_required=True,
184
+ help="API authentication token",
185
+ ),
186
+ main_file_path: str = typer.Option(
187
+ ...,
188
+ prompt=True,
189
+ hide_input=False,
190
+ prompt_required=True,
191
+ help="Path to main file",
192
+ ),
193
+ env_file_path: str = typer.Option(
194
+ ".env",
195
+ prompt=True,
196
+ hide_input=False,
197
+ prompt_required=True,
198
+ help="Path to main file",
199
+ ),
200
+ ignore_file: str = typer.Option(
201
+ ".jobignore",
202
+ prompt=True,
203
+ help="Path to file containing ignore patterns for job files",
204
+ ),
205
+ schedule_mode: str = typer.Option(
206
+ "once", prompt=True, help="Execution schedule mode (e.g. single, recurring)"
207
+ ),
208
+ start_time: str = typer.Option(
209
+ "", prompt=True, help="Start time (YYYY-MM-DDTHH:MM)"
210
+ ),
211
+ until: str = typer.Option("", prompt=True, help="Until time (YYYY-MM-DDTHH:MM)"),
212
+ cron_expression: str = typer.Option(
213
+ "", prompt=True, help="Cron expression for schedule (if applicable)"
214
+ ),
215
+ run_overlap: str = typer.Option(
216
+ "true", prompt=True, help="Allow overlapping runs (true/false)"
217
+ ),
218
+ data_size: float = typer.Option(
219
+ DEFAULT_DATA_SIZE,
220
+ prompt=True,
221
+ help="Estimate of size of data you want processed",
222
+ ),
223
+ complexity_factor: float = typer.Option(
224
+ None,
225
+ prompt=True,
226
+ help="Estimate of process complexity (between 1 to 1000, higher takes more time and is more expensive)",
227
+ ),
228
+ config_file: Path = typer.Option(
229
+ Path.home() / ".codemanager.toml", help="Path to config file"
230
+ ),
231
+ ):
232
+ config = {
233
+ "aws": {
234
+ "bucket": bucket,
235
+ "prefix": prefix,
236
+ "access_key_id": aws_access_key_id,
237
+ "secret_access_key": aws_secret_access_key,
238
+ "region": aws_region,
239
+ },
240
+ "api": {
241
+ "base_url": api_base_url,
242
+ "auth_token": api_auth_token,
243
+ },
244
+ "code": {
245
+ "main_file": main_file_path,
246
+ "ignore_file": ignore_file,
247
+ "env_file": env_file_path,
248
+ "data_size": data_size,
249
+ "complexity_factor": complexity_factor,
250
+ },
251
+ "execution": {
252
+ "plan": {
253
+ "schedule_mode": schedule_mode,
254
+ "start_time": start_time,
255
+ "until": until,
256
+ "cron_expression": cron_expression,
257
+ "run_overlap": run_overlap,
258
+ }
259
+ },
260
+ }
261
+
262
+ config_file.parent.mkdir(parents=True, exist_ok=True)
263
+ with open(config_file, "w") as f:
264
+ toml.dump(config, f)
265
+
266
+ typer.echo(f"Configuration saved to {config_file}")
267
+
268
+
269
+ @app.command(name="job-status")
270
+ def job_status(
271
+ job_id: str = typer.Argument(..., help="The job ID to check"),
272
+ config_file: Path = typer.Option(
273
+ Path.home() / ".codemanager.toml", help="Path to config file"
274
+ ),
275
+ api_base_url: str = typer.Option(None, help="API base URL"),
276
+ api_auth_token: str = typer.Option(
277
+ None,
278
+ help="API authentication token",
279
+ ),
280
+ ):
281
+ """Check the status of a submitted job"""
282
+ try:
283
+ try:
284
+ config = load_config(config_file)
285
+ except NoConfigSetError():
286
+ config = None
287
+
288
+ if not config and not (api_base_url and api_auth_token):
289
+ raise NoConfigSetError()
290
+ else:
291
+ base_url = api_base_url or config.get("api", {}).get("base_url")
292
+ auth_token = api_auth_token or config.get("api", {}).get("auth_token")
293
+
294
+ if not base_url:
295
+ typer.echo("base_url is not set")
296
+ raise NoConfigSetError(
297
+ "base_url parameter must be set in the config file or as command option"
298
+ )
299
+ if not auth_token:
300
+ typer.echo("auth_token is not set")
301
+ raise NoConfigSetError(
302
+ "auth_token parameter must be set in the config file or as command option"
303
+ )
304
+
305
+ manager = JobStatusCommand(base_url=base_url, auth_token=auth_token)
306
+ manager.job_status(job_id)
307
+
308
+ except NoConfigSetError as e:
309
+ typer.echo(f"Configuration error: {e}")
310
+ raise typer.Exit(2)
311
+ except RestClientException as e:
312
+ typer.echo(f"API error: {e}")
313
+ raise typer.Exit(1)
314
+ except Exception as e:
315
+ typer.echo(f"Unexpected error: {e}")
316
+ raise typer.Exit(1)
317
+
318
+
319
+ @app.callback()
320
+ def main(verbose: bool = False):
321
+ state["verbose"] = verbose
@@ -8,4 +8,5 @@ try:
8
8
  rc = 0
9
9
  except Exception as e:
10
10
  print("Error: %s" % e, file=sys.stderr)
11
+
11
12
  sys.exit(rc)
@@ -19,7 +19,7 @@ class DFrame(SFrame):
19
19
  return GroupSFrame({default_key: self}, sframe_class=self.__class__)
20
20
 
21
21
  def get_columns(self, *args) -> Iterable[str]:
22
- return self.data.columns
22
+ return self.data.columns.tolist()
23
23
 
24
24
  def to_dict(self, *cols: str, key=configs.DEFAULT_SF_KEY) -> List[Dict]:
25
25
  selected = self.data[list(cols)] if cols else self.data
@@ -4,7 +4,7 @@ from typing import Dict, Callable
4
4
  from seshat.data_class import SFrame
5
5
  from seshat.evaluation.base import Evaluation
6
6
  from seshat.profiler import ProfileConfig
7
- from seshat.profiler.base import Profiler, profiler
7
+ from seshat.profiler.base import profiler
8
8
  from seshat.source import Source
9
9
  from seshat.source.saver import Saver
10
10
  from seshat.transformer.pipeline import Pipeline
@@ -83,20 +83,38 @@ class FeatureView:
83
83
  evaluation: Evaluation
84
84
 
85
85
  def __call__(self, *args, **kwargs):
86
- Profiler.setup(self.profile_config)
87
86
  source = self._get_source()
88
87
  pipeline = self._get_pipeline()
89
88
 
90
- self.data = source(*args, **kwargs)
91
- if self.split_at_start:
92
- self._split(*args, **kwargs)
93
- self.run_pipline_on_split_data(pipeline, *args, **kwargs)
94
- else:
95
- self.data = pipeline(self.data, *args, **kwargs)
89
+ profiler.setup(config=self.profile_config)
90
+ with profiler:
91
+ self.data = source(*args, **kwargs)
92
+ if self.split_at_start:
93
+ self._split(*args, **kwargs)
94
+ self.run_pipline_on_split_data(pipeline, *args, **kwargs)
95
+ else:
96
+ self.data = pipeline(self.data, *args, **kwargs)
96
97
 
97
- profiler.tear_down()
98
98
  return self
99
99
 
100
+ def calculate_complexity(self):
101
+ complexity = 0
102
+ if self.saver is not None:
103
+ complexity += self.saver.calculate_complexity()
104
+
105
+ if self.online:
106
+ return (
107
+ complexity
108
+ + self.online_source.calculate_complexity()
109
+ + self.online_pipeline.calculate_complexity()
110
+ )
111
+
112
+ return (
113
+ complexity
114
+ + self.offline_source.calculate_complexity()
115
+ + self.offline_pipeline.calculate_complexity()
116
+ )
117
+
100
118
  def run_pipline_on_split_data(self, pipeline, *args, **kwargs):
101
119
  for k, data in self.split_data.items():
102
120
  self.split_data[k] = pipeline(data, *args, **kwargs)
@@ -119,12 +137,12 @@ class FeatureView:
119
137
  def save(self):
120
138
  if self.saver is None:
121
139
  return
122
-
123
- (
124
- self.saver(self.train_data())
125
- if hasattr(self, "splitter")
126
- else self.saver(self.data)
127
- )
140
+ with profiler:
141
+ (
142
+ self.saver(self.train_data())
143
+ if hasattr(self, "splitter")
144
+ else self.saver(self.data)
145
+ )
128
146
 
129
147
  def _split(self, *args, **kwargs):
130
148
  self.split_data = self.splitter(self.data, *args, **kwargs)
@@ -0,0 +1,12 @@
1
+ from .setup_project import SetUpProjectCommand
2
+ from .submit_to_network import SubmitCommand
3
+ from .base import JobMetadata, JobExecutionSchedule, ExecutionMode, JobScheduleMode
4
+
5
+ __all__ = [
6
+ "SetUpProjectCommand",
7
+ "SubmitCommand",
8
+ "JobMetadata",
9
+ "JobExecutionSchedule",
10
+ "ExecutionMode",
11
+ "JobScheduleMode",
12
+ ]
@@ -0,0 +1,122 @@
1
+ from dataclasses import dataclass
2
+ from datetime import datetime
3
+ from enum import StrEnum
4
+ from typing import Optional
5
+
6
+ import typer
7
+ from croniter import croniter
8
+
9
+
10
+ class ExecutionMode(StrEnum):
11
+ SINGLE = "single"
12
+ CLUSTER = "cluster"
13
+ STREAM = "stream"
14
+
15
+
16
+ class JobScheduleMode(StrEnum):
17
+ ONCE = "once"
18
+ CRON = "cron"
19
+
20
+
21
+ @dataclass
22
+ class ApiConfig:
23
+ base_url: str
24
+ auth_token: str
25
+
26
+ def __post_init__(self):
27
+ for field_name, field_value in self.__dict__.items():
28
+ if field_value is None:
29
+ raise ValueError(f"The field '{field_name}' cannot be None.")
30
+
31
+
32
+ @dataclass
33
+ class JobExecutionSchedule:
34
+ schedule_mode: JobScheduleMode
35
+ start_time: datetime | None
36
+ until: datetime | None
37
+ cron_expression: str | None
38
+ run_overlap: bool = True
39
+
40
+ def __post_init__(self):
41
+ import re
42
+ from datetime import datetime
43
+
44
+ fmt = "%Y-%m-%dT%H:%M"
45
+ regex = r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}$"
46
+ for field in ["start_time", "until"]:
47
+ value = getattr(self, field)
48
+ if value:
49
+ if isinstance(value, str):
50
+ if not re.match(regex, value):
51
+ raise ValueError(
52
+ f"{field} must be in format YYYY-MM-DDTHH:MM, e.g., 2025-08-12T18:35"
53
+ )
54
+ try:
55
+ datetime.strptime(value, fmt)
56
+ except ValueError:
57
+ raise ValueError(f"{field} is not a valid date/time: {value}")
58
+ elif isinstance(value, datetime):
59
+ pass
60
+ else:
61
+ raise TypeError(
62
+ f"{field} must be a string in format YYYY-MM-DDTHH:MM or a datetime object"
63
+ )
64
+
65
+ cron_value = self.cron_expression
66
+ if cron_value:
67
+ if not croniter.is_valid(cron_value):
68
+ raise ValueError(
69
+ "cron_expression must be a valid 5-field cron string, e.g., '0 0 * * *'"
70
+ )
71
+
72
+
73
+ @dataclass
74
+ class JobMetadata:
75
+ """Metadata container for job execution in a pipeline system.
76
+
77
+ This class encapsulates necessary metadata for executing a job within
78
+ a pipeline, including security credentials, execution configuration, and
79
+ resource requirements.
80
+
81
+ Attributes:
82
+ pipeline_hash (str): Unique identifier hash for the pipeline instance.
83
+ confidential_level (str): Security classification level of the job
84
+ (e.g., 'public', 'internal', 'confidential', 'secret').
85
+ execution_mode (ExecutionMode): Enum specifying how the job should be
86
+ executed (e.g., local, distributed, containerized).
87
+ execution_plan (Optional[JobExecutionSchedule]): Schedule configuration
88
+ for job execution. None if job runs immediately or on-demand.
89
+ main_file_path (str): Path to the main executable file or script.
90
+ env_file_path (str): Path to the environment configuration file.
91
+ secret_key (str): Encryption key for securing sensitive job data.
92
+ iv (str): Initialization vector for encryption operations.
93
+ requirement_file (str): Path to the file containing job dependencies.
94
+ requirements_type (str): Type of requirements specification
95
+ (e.g., 'pip', 'conda', 'poetry').
96
+ complexity_factor (float): Numerical measure of job computational
97
+ complexity, used for resource allocation.
98
+ data_size (float): Expected size of data in KB to be processed in the job.
99
+ """
100
+
101
+ pipeline_hash: str
102
+ confidential_level: str
103
+ execution_mode: ExecutionMode
104
+ execution_plan: Optional[JobExecutionSchedule]
105
+ main_file_path: str
106
+ env_file_path: str
107
+ requirement_file: Optional[str]
108
+ requirements_type: Optional[str]
109
+ complexity_factor: float
110
+ data_size: float
111
+ secret_key: str
112
+ iv: str
113
+
114
+
115
+ class BaseTyperCommand:
116
+ def __init__(self, report: bool = False):
117
+ self.report = report
118
+
119
+ def echo(self, msg, *args, **kwargs):
120
+ if not self.report:
121
+ return
122
+ typer.echo(msg, *args, **kwargs)