sdk-seshat-python 0.4.2__tar.gz → 0.4.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/PKG-INFO +2 -1
  2. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/pyproject.toml +2 -1
  3. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/__init__.py +156 -52
  4. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/data_class/base.py +1 -1
  5. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/command/base.py +6 -2
  6. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/command/submit_to_network.py +115 -15
  7. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/config.py +1 -0
  8. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/mixins.py +29 -3
  9. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/saver/database.py +14 -28
  10. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/saver/utils/postgres.py +74 -11
  11. sdk_seshat_python-0.4.4/seshat/transformer/aggregator/base.py +160 -0
  12. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/deriver/__init__.py +2 -1
  13. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/deriver/base.py +405 -147
  14. sdk_seshat_python-0.4.4/seshat/transformer/imputer/base.py +57 -0
  15. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/merger/nested_key.py +32 -37
  16. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/pipeline/branch.py +7 -0
  17. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/pseudo/action_gate.py +1 -0
  18. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/trimmer/base.py +14 -0
  19. sdk_seshat_python-0.4.4/seshat/utils/date_utils.py +29 -0
  20. sdk_seshat_python-0.4.4/seshat/utils/llm_client/chatbot_factory.py +165 -0
  21. sdk_seshat_python-0.4.2/seshat/transformer/aggregator/base.py +0 -107
  22. sdk_seshat_python-0.4.2/seshat/transformer/imputer/base.py +0 -6
  23. sdk_seshat_python-0.4.2/seshat/utils/llm_client/chatbot_factory.py +0 -76
  24. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/LICENSE +0 -0
  25. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/README.md +0 -0
  26. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/__main__.py +0 -0
  27. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/data_class/__init__.py +0 -0
  28. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/data_class/pandas.py +0 -0
  29. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/data_class/pyspark.py +0 -0
  30. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/evaluation/__init__.py +0 -0
  31. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/evaluation/base.py +0 -0
  32. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/__init__.py +0 -0
  33. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/base.py +0 -0
  34. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/general/__init__.py +0 -0
  35. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/general/classification.py +0 -0
  36. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/general/clustering.py +0 -0
  37. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/general/regression.py +0 -0
  38. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/recommendation/__init__.py +0 -0
  39. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/recommendation/diversity.py +0 -0
  40. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/recommendation/ranking.py +0 -0
  41. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/feature_view/__init__.py +0 -0
  42. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/feature_view/base.py +0 -0
  43. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/__init__.py +0 -0
  44. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/command/__init__.py +0 -0
  45. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/command/code_inspect.py +0 -0
  46. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/command/job_status.py +0 -0
  47. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/command/setup_project.py +0 -0
  48. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/exceptions.py +0 -0
  49. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/lazy_config.py +0 -0
  50. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/models.py +0 -0
  51. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/template/README.md-tmpl +0 -0
  52. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/template/config.py-tmpl +0 -0
  53. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/template/env-templ +0 -0
  54. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/template/jobignore-tmpl +0 -0
  55. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/template/pyproject._toml-tmpl +0 -0
  56. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/template/recommender-jupyter.ipynb-tmpl +0 -0
  57. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/template/recommender.py-tmpl +0 -0
  58. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/profiler/__init__.py +0 -0
  59. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/profiler/base.py +0 -0
  60. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/profiler/decorator.py +0 -0
  61. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/profiler/format.py +0 -0
  62. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/__init__.py +0 -0
  63. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/base.py +0 -0
  64. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/database/__init__.py +0 -0
  65. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/database/base.py +0 -0
  66. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/exceptions.py +0 -0
  67. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/flip_side/__init__.py +0 -0
  68. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/flip_side/base.py +0 -0
  69. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/local/__init__.py +0 -0
  70. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/local/base.py +0 -0
  71. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/multisource/__init__.py +0 -0
  72. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/multisource/base.py +0 -0
  73. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/saver/__init__.py +0 -0
  74. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/saver/base.py +0 -0
  75. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/saver/utils/__init__.py +0 -0
  76. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/__init__.py +0 -0
  77. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/aggregator/__init__.py +0 -0
  78. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/augmenter/__init__.py +0 -0
  79. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/augmenter/base.py +0 -0
  80. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/base.py +0 -0
  81. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/deriver/from_database.py +0 -0
  82. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/imputer/__init__.py +0 -0
  83. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/merger/__init__.py +0 -0
  84. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/merger/base.py +0 -0
  85. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/pipeline/__init__.py +0 -0
  86. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/pipeline/base.py +0 -0
  87. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/pipeline/recommendation/__init__.py +0 -0
  88. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/pipeline/recommendation/address_pipeline.py +0 -0
  89. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/pseudo/__init__.py +0 -0
  90. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/pseudo/table_existence.py +0 -0
  91. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/reducer/__init__.py +0 -0
  92. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/reducer/base.py +0 -0
  93. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/scaler/__init__.py +0 -0
  94. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/scaler/base.py +0 -0
  95. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/schema/__init__.py +0 -0
  96. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/schema/base.py +0 -0
  97. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/splitter/__init__.py +0 -0
  98. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/splitter/base.py +0 -0
  99. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/splitter/block/__init__.py +0 -0
  100. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/splitter/block/base.py +0 -0
  101. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/splitter/random/__init__.py +0 -0
  102. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/splitter/random/base.py +0 -0
  103. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/splitter/time_line/__init__.py +0 -0
  104. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/splitter/time_line/base.py +0 -0
  105. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/trimmer/__init__.py +0 -0
  106. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/vectorizer/__init__.py +0 -0
  107. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/vectorizer/base.py +0 -0
  108. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/vectorizer/cosine_similarity.py +0 -0
  109. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/vectorizer/pivot.py +0 -0
  110. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/vectorizer/utils.py +0 -0
  111. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/__init__.py +0 -0
  112. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/batcher.py +0 -0
  113. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/binary_utils.py +0 -0
  114. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/clean_json.py +0 -0
  115. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/col_to_list.py +0 -0
  116. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/contracts.py +0 -0
  117. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/file.py +0 -0
  118. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/file_cryptography.py +0 -0
  119. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/filter_json.py +0 -0
  120. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/grouper.py +0 -0
  121. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/jobignore.py +0 -0
  122. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/join_columns_to_list.py +0 -0
  123. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/join_str.py +0 -0
  124. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/llm_client/__init__.py +0 -0
  125. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/logging/__init__.py +0 -0
  126. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/logging/base_logger.py +0 -0
  127. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/logging/console_logger.py +0 -0
  128. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/logging/logstash_logger.py +0 -0
  129. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/logging/multi_logger.py +0 -0
  130. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/memory.py +0 -0
  131. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/mixin.py +0 -0
  132. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/obfuscate.py +0 -0
  133. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/package_utils.py +0 -0
  134. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/pandas_func.py +0 -0
  135. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/patching.py +0 -0
  136. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/pyspark_func.py +0 -0
  137. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/rest.py +0 -0
  138. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/singleton.py +0 -0
  139. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/validation.py +0 -0
  140. {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/zip_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sdk-seshat-python
3
- Version: 0.4.2
3
+ Version: 0.4.4
4
4
  Summary: Seshat python SDK is a library to help create ML data pipelines.
5
5
  License: Commercial - see LICENSE.txt
6
6
  Author: SeshatLabs
@@ -19,6 +19,7 @@ Requires-Dist: cryptography (>=44.0.0,<45.0.0)
19
19
  Requires-Dist: dask[array,complete,dataframe,distributed] (>=2024.10.0,<2025.0.0)
20
20
  Requires-Dist: flipside (>=2.1.0,<3.0.0) ; extra == "flipside-support"
21
21
  Requires-Dist: langchain (>=0.3.23,<0.4.0)
22
+ Requires-Dist: langchain-aws (>=0.2.31,<0.3.0)
22
23
  Requires-Dist: langchain-community (>=0.3.21,<0.4.0)
23
24
  Requires-Dist: langchain-openai (>=0.3.12,<0.4.0)
24
25
  Requires-Dist: loguru (>=0.7.3,<0.8.0)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "sdk-seshat-python"
3
- version = "0.4.2"
3
+ version = "0.4.4"
4
4
  description = "Seshat python SDK is a library to help create ML data pipelines."
5
5
  authors = ["SeshatLabs <info@seshatlabs.xyz>"]
6
6
  packages = [{ include = "seshat", from = "." }]
@@ -36,6 +36,7 @@ python-logstash-async = "^4.0.2"
36
36
  croniter = "^6.0.0"
37
37
  psycopg2-binary = { version = "^2.9", optional = true }
38
38
  setuptools = "^80.9.0"
39
+ langchain-aws = "^0.2.31"
39
40
 
40
41
  [tool.poetry.extras]
41
42
  flipside_support = ["flipside"]
@@ -84,6 +84,119 @@ def inspect_code(
84
84
  raise typer.Exit(1)
85
85
 
86
86
 
87
+ def _execute_job_submission(
88
+ directory: str,
89
+ name: str,
90
+ version: str,
91
+ config: dict,
92
+ confidential_level: str,
93
+ execution_mode: str,
94
+ executor_image_tag: str = None,
95
+ operation_type: str = "submit",
96
+ ) -> None:
97
+ """
98
+ Common logic for submitting and publishing jobs.
99
+
100
+ Args:
101
+ directory: Directory containing the code
102
+ name: Name of the package
103
+ version: Version of the package
104
+ config: Configuration dictionary
105
+ confidential_level: Confidential level for the pipeline
106
+ execution_mode: Execution mode for the pipeline
107
+ executor_image_tag: Image tag of the executor (optional, only for submit)
108
+ operation_type: Type of operation ("submit" or "publish")
109
+ """
110
+ manager = SubmitCommand(config)
111
+
112
+ job_execution_schedule = None
113
+ if "execution" in config:
114
+ job_execution_schedule = JobExecutionSchedule(
115
+ **config.get("execution", {}).get("plan", {})
116
+ )
117
+
118
+ job_metadata = JobMetadata(
119
+ pipeline_hash="",
120
+ confidential_level=confidential_level,
121
+ execution_mode=ExecutionMode(execution_mode),
122
+ execution_plan=job_execution_schedule,
123
+ main_file_path=config.get("code", {}).get("main_file", "main.py"),
124
+ env_file_path=config.get("code", {}).get("env_file", ".env"),
125
+ data_size=config.get("code", {}).get("data_size", DEFAULT_DATA_SIZE),
126
+ code_size=0 if operation_type == "publish" else None, # Only for publish
127
+ complexity_factor=config.get("code", {}).get("complexity_factor", 0),
128
+ requirement_file=None,
129
+ requirements_type=None,
130
+ secret_key=None,
131
+ iv=None,
132
+ )
133
+
134
+ # Handle package creation
135
+ if operation_type == "submit":
136
+ package = manager.handle(
137
+ directory,
138
+ name,
139
+ version,
140
+ executor_image_tag=executor_image_tag,
141
+ metadata=job_metadata,
142
+ )
143
+ else:
144
+ package = manager.handle(directory, name, version, metadata=job_metadata)
145
+
146
+ # Handle code obfuscation
147
+ obfuscate_code = config.get("code", {}).get("obfuscate_code", "false")
148
+ obfuscate_code = obfuscate_code.lower() == "true"
149
+ if obfuscate_code:
150
+ package = manager.obfuscate_code(package)
151
+
152
+ job_metadata.pipeline_hash = package.hash
153
+ identifier = manager.store_code(package)
154
+
155
+ if operation_type == "submit":
156
+ job_response = manager.submit_job(
157
+ identifier,
158
+ name,
159
+ version,
160
+ job_metadata,
161
+ executor_image_tag=executor_image_tag,
162
+ expiration=config.get("aws", {}).get("expiration", DEFAULT_EXPIRATION),
163
+ )
164
+ else:
165
+ job_response = manager.publish_job(
166
+ identifier,
167
+ name,
168
+ version,
169
+ job_metadata,
170
+ expiration=config.get("aws", {}).get("expiration", DEFAULT_EXPIRATION),
171
+ )
172
+
173
+ _display_job_summary(
174
+ package, identifier, job_response, include_executor=bool(executor_image_tag)
175
+ )
176
+
177
+
178
+ def _display_job_summary(
179
+ package, identifier: str, job_response: dict, include_executor: bool = False
180
+ ) -> None:
181
+ """Display job submission summary in a formatted table."""
182
+ job_response_data = job_response.get("data", {})
183
+
184
+ table = Table(title="Upload Summary")
185
+ table.add_column("Property", style="cyan")
186
+ table.add_column("Value", style="green")
187
+
188
+ table.add_row("Name", package.name)
189
+ table.add_row("Version", package.version)
190
+ if include_executor:
191
+ table.add_row("ExecutorImageTag", package.executor_image_tag)
192
+ table.add_row("Hash", package.hash)
193
+ table.add_row("S3 Location", identifier)
194
+ table.add_row("Job ID", str(job_response_data.get("id", "N/A")))
195
+ table.add_row("Status", job_response_data.get("status", {}).get("state", "N/A"))
196
+
197
+ console.print(table)
198
+
199
+
87
200
  @app.command(name="submit")
88
201
  def submit_job(
89
202
  directory: str = typer.Argument(..., help="Directory containing the code"),
@@ -93,77 +206,68 @@ def submit_job(
93
206
  "latest", help="Image tag of the executor which runs the job"
94
207
  ),
95
208
  confidential_level: str = typer.Option(
96
- "default", help="Confidential level desired for the pipeline "
209
+ "default", help="Confidential level desired for the pipeline"
97
210
  ),
98
211
  execution_mode: str = typer.Option(
99
- "single", help="Execution mode for the pipeline "
212
+ "single", help="Execution mode for the pipeline"
100
213
  ),
101
214
  config_file: Path = typer.Option(
102
215
  Path.home() / ".codemanager.toml", help="Path to config file"
103
216
  ),
104
217
  ):
218
+ """Submit a job with executor image tag."""
105
219
  try:
106
220
  config = load_config(config_file)
107
221
  if not config:
108
222
  raise NoConfigSetError()
109
- manager = SubmitCommand(config)
110
223
 
111
- job_execution_schedule = None
112
- if "execution" in config:
113
- job_execution_schedule = JobExecutionSchedule(
114
- **config.get("execution", {}).get("plan", {})
115
- )
116
-
117
- job_metadata = JobMetadata(
118
- pipeline_hash="",
224
+ _execute_job_submission(
225
+ directory=directory,
226
+ name=name,
227
+ version=version,
228
+ config=config,
119
229
  confidential_level=confidential_level,
120
- execution_mode=ExecutionMode(execution_mode),
121
- execution_plan=job_execution_schedule,
122
- main_file_path=config.get("code", {}).get("main_file", "main.py"),
123
- env_file_path=config.get("code", {}).get("env_file", ".env"),
124
- data_size=config.get("code", {}).get("data_size", DEFAULT_DATA_SIZE),
125
- complexity_factor=config.get("code", {}).get("complexity_factor", 0),
126
- requirement_file=None,
127
- requirements_type=None,
128
- secret_key="",
129
- iv="",
230
+ execution_mode=execution_mode,
231
+ executor_image_tag=executor_image_tag,
232
+ operation_type="submit",
130
233
  )
131
234
 
132
- package = manager.handle(
133
- directory, name, version, executor_image_tag, metadata=job_metadata
134
- )
235
+ except Exception as e:
236
+ typer.echo(f"Error: {str(e)}", err=True)
237
+ raise typer.Exit(1)
135
238
 
136
- obfuscate_code = config.get("code", {}).get("obfuscate_code", "false")
137
- obfuscate_code = True if obfuscate_code.lower() == "true" else False
138
- if obfuscate_code:
139
- package = manager.obfuscate_code(package)
140
239
 
141
- job_metadata.pipeline_hash = package.hash
142
- identifier = manager.store_code(package)
240
+ @app.command(name="publish")
241
+ def publish_job_on_cook(
242
+ directory: str = typer.Argument(..., help="Directory containing the code"),
243
+ name: str = typer.Option(..., help="Name of the package"),
244
+ version: str = typer.Option(..., help="Version of the package"),
245
+ confidential_level: str = typer.Option(
246
+ "default", help="Confidential level desired for the pipeline"
247
+ ),
248
+ execution_mode: str = typer.Option(
249
+ "single", help="Execution mode for the pipeline"
250
+ ),
251
+ config_file: Path = typer.Option(
252
+ Path.home() / ".codemanager.toml", help="Path to config file"
253
+ ),
254
+ ):
255
+ """Publish a job on cook without executor image tag."""
256
+ try:
257
+ config = load_config(config_file)
258
+ if not config:
259
+ raise NoConfigSetError()
143
260
 
144
- job_response = manager.submit_job(
145
- identifier,
146
- name,
147
- version,
148
- executor_image_tag,
149
- job_metadata,
150
- expiration=config.get("aws", {}).get("expiration", DEFAULT_EXPIRATION),
261
+ _execute_job_submission(
262
+ directory=directory,
263
+ name=name,
264
+ version=version,
265
+ config=config,
266
+ confidential_level=confidential_level,
267
+ execution_mode=execution_mode,
268
+ executor_image_tag=None,
269
+ operation_type="publish",
151
270
  )
152
- job_response_data = job_response.get("data", {})
153
-
154
- table = Table(title="Upload Summary")
155
- table.add_column("Property", style="cyan")
156
- table.add_column("Value", style="green")
157
-
158
- table.add_row("Name", package.name)
159
- table.add_row("Version", package.version)
160
- table.add_row("ExecutorImageTag", package.executor_image_tag)
161
- table.add_row("Hash", package.hash)
162
- table.add_row("S3 Location", identifier)
163
- table.add_row("Job ID", str(job_response_data.get("id", "N/A")))
164
- table.add_row("Status", job_response_data.get("status", {}).get("state", "N/A"))
165
-
166
- console.print(table)
167
271
 
168
272
  except Exception as e:
169
273
  typer.echo(f"Error: {str(e)}", err=True)
@@ -61,7 +61,7 @@ class SFrame:
61
61
  def iterrows(self, column_name: str, key: str = configs.DEFAULT_SF_KEY):
62
62
  pass
63
63
 
64
- def make_group(self, default_key=configs.DEFAULT_SF_KEY):
64
+ def make_group(self, default_key=configs.DEFAULT_SF_KEY) -> "GroupSFrame":
65
65
  pass
66
66
 
67
67
  def convert(
@@ -36,6 +36,8 @@ class JobExecutionSchedule:
36
36
  until: datetime | None
37
37
  cron_expression: str | None
38
38
  run_overlap: bool = True
39
+ initial_run: bool = True
40
+ timezone: str = "UTC"
39
41
 
40
42
  def __post_init__(self):
41
43
  import re
@@ -108,8 +110,10 @@ class JobMetadata:
108
110
  requirements_type: Optional[str]
109
111
  complexity_factor: float
110
112
  data_size: float
111
- secret_key: str
112
- iv: str
113
+ code_size: float
114
+ secret_key: Optional[str]
115
+ iv: Optional[str]
116
+ env_vars: dict = None
113
117
 
114
118
 
115
119
  class BaseTyperCommand:
@@ -8,10 +8,12 @@ from typing import Optional, BinaryIO
8
8
  import boto3
9
9
  import typer
10
10
  from botocore.exceptions import ClientError
11
+ from dotenv import dotenv_values
11
12
 
12
13
  from seshat.general.command.base import BaseTyperCommand, ApiConfig, JobMetadata
13
14
  from seshat.general.exceptions import RestClientException, EnvFileNotFound
14
15
  from seshat.general.models import CodePackage
16
+ from seshat.utils.date_utils import format_datetime_for_api
15
17
  from seshat.utils.file import is_binary_file
16
18
  from seshat.utils.file_cryptography import AESCipher
17
19
  from seshat.utils.jobignore import JobIgnoreHandler
@@ -191,15 +193,17 @@ class SubmitCommand(BaseTyperCommand):
191
193
  directory: str,
192
194
  name: str,
193
195
  version: str,
194
- executor_image_tag: str,
196
+ metadata: JobMetadata,
197
+ executor_image_tag: str = None,
195
198
  requirements_file: Optional[str] = None,
196
- metadata: JobMetadata = None,
199
+ secret_env: bool = False,
197
200
  ) -> CodePackage:
198
201
  self.echo(f"📦 Packaging code from {directory}")
199
-
200
- env_file, encrypted_data = None, None
202
+ env_file, env_data = None, None
201
203
  try:
202
- encrypted_data, key, iv, env_file = self.handle_env_file(directory)
204
+ env_data, key, iv, env_file = self.handle_env_file(directory, secret_env)
205
+ if not secret_env:
206
+ metadata.env_vars = env_data
203
207
  metadata.secret_key = key
204
208
  metadata.iv = iv
205
209
  except EnvFileNotFound:
@@ -225,9 +229,13 @@ class SubmitCommand(BaseTyperCommand):
225
229
  if job_ignore_handler.match_gitignore_like_path(relative_path):
226
230
  continue
227
231
 
228
- if env_file and (pathlib.Path(filepath) == pathlib.Path(env_file)):
232
+ if (
233
+ secret_env
234
+ and env_file
235
+ and (pathlib.Path(filepath) == pathlib.Path(env_file))
236
+ ):
229
237
  with open(filepath, "r", encoding="utf-8") as _:
230
- all_files[relative_path] = encrypted_data
238
+ all_files[relative_path] = env_data
231
239
  # metadata.env_file_path = env_file
232
240
  self.echo(f"📄 Added {relative_path}")
233
241
  continue
@@ -236,6 +244,8 @@ class SubmitCommand(BaseTyperCommand):
236
244
  all_files, filepath, public_files, relative_path, total_size
237
245
  )
238
246
 
247
+ metadata.code_size = total_size
248
+
239
249
  content_hash = self._hash_package(public_files)
240
250
 
241
251
  package = CodePackage(
@@ -243,7 +253,7 @@ class SubmitCommand(BaseTyperCommand):
243
253
  version=version,
244
254
  executor_image_tag=executor_image_tag,
245
255
  files=all_files,
246
- metadata=None if metadata is None else asdict(metadata),
256
+ metadata=asdict(metadata),
247
257
  hash=content_hash.hexdigest(),
248
258
  binary_files=set(),
249
259
  )
@@ -270,7 +280,7 @@ class SubmitCommand(BaseTyperCommand):
270
280
 
271
281
  return new_total_size
272
282
 
273
- def handle_env_file(self, directory: str):
283
+ def handle_env_file(self, directory: str, secret_env: bool):
274
284
  env_file_path = self.config.get("code").get("env_file")
275
285
  if not env_file_path:
276
286
  self.echo("No env file found in config")
@@ -289,11 +299,15 @@ class SubmitCommand(BaseTyperCommand):
289
299
  self.echo("No env file found to encrypt")
290
300
  raise EnvFileNotFound
291
301
 
292
- encryption_result = AESCipher().encrypt_file(env_file)
293
- encrypted_data = encryption_result["encrypted_data"]
294
- key = encryption_result["key"]
295
- iv = encryption_result["iv"]
296
- return encrypted_data, key, iv, env_file
302
+ key, iv = None, None
303
+ if secret_env:
304
+ encryption_result = AESCipher().encrypt_file(env_file)
305
+ env_data = encryption_result["encrypted_data"]
306
+ key = encryption_result["key"]
307
+ iv = encryption_result["iv"]
308
+ else:
309
+ env_data = dotenv_values(env_file)
310
+ return env_data, key, iv, env_file
297
311
 
298
312
  def store_code(self, package: CodePackage) -> str:
299
313
  self.echo("☁️ Uploading to S3...")
@@ -311,8 +325,8 @@ class SubmitCommand(BaseTyperCommand):
311
325
  s3_key: str,
312
326
  name: str,
313
327
  version: str,
314
- executor_image_tag: str,
315
328
  metadata: JobMetadata,
329
+ executor_image_tag: str = None,
316
330
  expiration=86400,
317
331
  ) -> dict:
318
332
  """Submit job to API after successful upload"""
@@ -357,3 +371,89 @@ class SubmitCommand(BaseTyperCommand):
357
371
  except RestClientException as e:
358
372
  self.echo(f"❌ Failed to submit job: {str(e)}")
359
373
  raise
374
+
375
+ def publish_job(
376
+ self,
377
+ s3_key: str,
378
+ name: str,
379
+ version: str,
380
+ metadata: JobMetadata,
381
+ expiration=86400,
382
+ ) -> dict:
383
+ """Submit job to API after successful upload"""
384
+ if not self.job_config.base_url or not self.job_config.auth_token:
385
+ raise ValueError(
386
+ "API configuration missing. Please set base_url and auth_token"
387
+ )
388
+
389
+ presigned_url = self.backend.generate_presigned_url(s3_key, expiration)
390
+ executor_label = self.config.get("executor", {}).get("label")
391
+
392
+ payload = {
393
+ "name": name,
394
+ "label": name,
395
+ "public": False,
396
+ "pipeline": {
397
+ "configs": metadata.env_vars,
398
+ "schedule": {
399
+ "type": metadata.execution_plan.schedule_mode,
400
+ "expression": metadata.execution_plan.cron_expression,
401
+ "timezone": metadata.execution_plan.timezone,
402
+ "start_time": format_datetime_for_api(
403
+ metadata.execution_plan.start_time
404
+ ),
405
+ "end_time": format_datetime_for_api(metadata.execution_plan.until),
406
+ "initial_run": metadata.execution_plan.initial_run,
407
+ "run_overlap": metadata.execution_plan.run_overlap,
408
+ }
409
+ if metadata.execution_plan is not None
410
+ else {"type": "once"},
411
+ "job_template": {
412
+ "name": f"{name}-job",
413
+ "label": f"{name}-job",
414
+ "description": "",
415
+ "version": str(version),
416
+ "execution_priority": "default",
417
+ "validation_priority": "default",
418
+ "directory": {"url": presigned_url, "type": "s3"},
419
+ "retry_policy": {
420
+ "retry_on_error": False,
421
+ "interval": "0",
422
+ "retry_count": 0,
423
+ "action_on_failure": "none",
424
+ },
425
+ "meta_data": {
426
+ "code_size": metadata.code_size,
427
+ "pipeline_hash": metadata.pipeline_hash,
428
+ "confidential_level": metadata.confidential_level,
429
+ "execution_mode": metadata.execution_mode,
430
+ "main_file_path": metadata.main_file_path,
431
+ "env_file_path": metadata.env_file_path,
432
+ "complexity_factor": metadata.complexity_factor,
433
+ "data_size": metadata.data_size,
434
+ },
435
+ "executor_label": executor_label,
436
+ },
437
+ "config_handler": "store_accounts",
438
+ "active": True,
439
+ },
440
+ }
441
+
442
+ if metadata.secret_key:
443
+ payload["pipeline"]["job_template"]["meta_data"].update(
444
+ {
445
+ "encryption_secret_key": metadata.secret_key,
446
+ "encryption_iv": metadata.iv,
447
+ }
448
+ )
449
+
450
+ try:
451
+ self.echo("🚀 Publishing job to cook...")
452
+ response_data = self.rest_client.post(
453
+ "agent-launchers/data-agents/submit", json=payload
454
+ )
455
+ self.echo("✅ Job published to cook successfully!")
456
+ return response_data
457
+ except RestClientException as e:
458
+ self.echo(f"❌ Failed to publish job: {str(e)}")
459
+ raise
@@ -26,6 +26,7 @@ TOP_ADDRESS_SF_KEY = "top_address"
26
26
  EXCLUSION_SF_KEY = "exclusion"
27
27
  TOKEN_PRICE_SF_KEY = "token_price"
28
28
  PROFIT_LOSS_SF_KEY = "profit_loss"
29
+ DUPLICATED_SF_KEY = "duplicated"
29
30
  SPARK_APP_NAME = "seshat"
30
31
  PANDAS_MODE = "df"
31
32
  PYSPARK_MODE = "spf"
@@ -78,13 +78,31 @@ class SQLMixin:
78
78
  trans.commit()
79
79
  conn.close()
80
80
 
81
+ def _parse_table_name(self, table_name: str) -> tuple[Optional[str], str]:
82
+ """
83
+ Parse a table name that might include a schema prefix.
84
+ Returns (schema_name, table_name)
85
+ """
86
+ if "." in table_name:
87
+ parts = table_name.split(".", 1)
88
+ return parts[0], parts[1]
89
+ return None, table_name
90
+
81
91
  def ensure_table_exists(self, table: str, schema: Schema):
82
92
  engine = self.get_engine()
83
- if table in inspect(engine).get_table_names():
93
+ db_schema, table_name = self._parse_table_name(table)
94
+
95
+ # Check if table exists in the specific schema
96
+ inspector = inspect(engine)
97
+ existing_tables = inspector.get_table_names(schema=db_schema)
98
+
99
+ if table_name in existing_tables:
84
100
  return
85
101
  self.create_table(schema, table)
86
102
 
87
103
  def create_table(self, schema: Schema, table: str):
104
+ db_schema, table_name = self._parse_table_name(table)
105
+
88
106
  table_columns = []
89
107
  pk_cols = []
90
108
  for col in schema.cols:
@@ -96,7 +114,9 @@ class SQLMixin:
96
114
  constraints = []
97
115
  if pk_cols:
98
116
  constraints.append(
99
- PrimaryKeyConstraint(*pk_cols, name=f"{table}_pk_{'_'.join(pk_cols)}")
117
+ PrimaryKeyConstraint(
118
+ *pk_cols, name=f"{table_name}_pk_{'_'.join(pk_cols)}"
119
+ )
100
120
  )
101
121
  _, metadata = self.get_table(
102
122
  table, False, *table_columns, *constraints, extend_existing=True
@@ -104,7 +124,13 @@ class SQLMixin:
104
124
  metadata.create_all(self.get_engine())
105
125
 
106
126
  def get_table(self, table_name, autoload, *args, **kwargs):
127
+ db_schema, actual_table_name = self._parse_table_name(table_name)
128
+
107
129
  metadata = MetaData()
108
130
  if autoload:
109
131
  kwargs.setdefault("autoload_with", self.get_engine())
110
- return Table(table_name, metadata, *args, **kwargs), metadata
132
+
133
+ if db_schema:
134
+ kwargs["schema"] = db_schema
135
+
136
+ return Table(actual_table_name, metadata, *args, **kwargs), metadata
@@ -2,12 +2,8 @@ import hashlib
2
2
  import statistics
3
3
  from typing import List
4
4
 
5
- import sqlalchemy as db
6
5
  from sqlalchemy import (
7
- Column,
8
6
  Index,
9
- MetaData,
10
- Table,
11
7
  and_,
12
8
  inspect,
13
9
  select,
@@ -52,29 +48,21 @@ class SQLDBSaver(SQLMixin, Saver):
52
48
  else:
53
49
  self.insert(selected_sf, config)
54
50
 
55
- def ensure_table_exists(self, table: str, schema: Schema):
56
- engine = self.get_engine()
57
- if table in inspect(engine).get_table_names():
58
- return
59
- self.create_table(schema, table)
60
-
61
- def create_table(self, schema: Schema, table: str):
62
- table_columns = []
63
- for col in schema.cols:
64
- col_name = col.to
65
- col_type = getattr(db, col.dtype or "String")
66
- table_columns.append(Column(col_name, col_type))
67
- _, metadata = self.get_table(table, False, *table_columns, extend_existing=True)
68
- metadata.create_all(self.get_engine())
69
-
70
51
  def delete(self, table_name):
71
52
  table, _ = self.get_table(table_name, autoload=True)
72
53
  self.write_on_db(table.delete())
73
54
 
74
55
  def drop_table(self, table_name):
75
- if table_name in inspect(self.get_engine()).get_table_names():
56
+ db_schema, actual_table_name = self._parse_table_name(table_name)
57
+ engine = self.get_engine()
58
+ inspector = inspect(engine)
59
+
60
+ # Check if table exists in the specific schema
61
+ existing_tables = inspector.get_table_names(schema=db_schema)
62
+
63
+ if actual_table_name in existing_tables:
76
64
  table, _ = self.get_table(table_name, autoload=True)
77
- table.drop(self.get_engine())
65
+ table.drop(engine)
78
66
 
79
67
  def insert(self, selected_sf: SFrame, config: SaveConfig):
80
68
  values = self.prepare_sf_to_insert(selected_sf, config).to_dict()
@@ -126,13 +114,17 @@ class SQLDBSaver(SQLMixin, Saver):
126
114
  hashed_cols = self.hash_columns([col.key for col in index.columns])
127
115
  current_indexes.add(hashed_cols)
128
116
 
117
+ # Parse table name to get the actual table name without schema
118
+ _, actual_table_name = self._parse_table_name(config.table)
119
+
129
120
  for index in config.indexes:
130
121
  index_cols = [index] if isinstance(index, str) else index
131
122
  index_hash = self.hash_columns(index_cols)
132
123
  if index_hash in current_indexes:
133
124
  continue
134
125
 
135
- index_name = f"{'_'.join(index_cols)}_index_{table.name}"
126
+ # Use the actual table name (without schema) for index naming
127
+ index_name = f"{'_'.join(index_cols)}_index_{actual_table_name}"
136
128
  index_obj = Index(
137
129
  index_name,
138
130
  *[getattr(table.c, index_col) for index_col in index_cols],
@@ -184,12 +176,6 @@ class SQLDBSaver(SQLMixin, Saver):
184
176
  )
185
177
  return self.get_from_db(query)
186
178
 
187
- def get_table(self, table_name, autoload, *args, **kwargs):
188
- metadata = MetaData()
189
- if autoload:
190
- kwargs.setdefault("autoload_with", self.get_engine())
191
- return Table(table_name, metadata, *args, **kwargs), metadata
192
-
193
179
  def get_from_db(self, query):
194
180
  with self.get_engine().connect() as conn:
195
181
  result = conn.execute(query)