satisfactoscript 0.5.10__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/PKG-INFO +1 -1
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/pyproject.toml +1 -1
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/core/core.py +58 -43
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript.egg-info/PKG-INFO +1 -1
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/README.md +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/setup.cfg +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/__init__.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/agentic/__init__.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/agentic/agent.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/core/__init__.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/core/config.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/core/loaders.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/core/registry.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/registry.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/semantic/__init__.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/semantic/semantic.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/utils.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript.egg-info/SOURCES.txt +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript.egg-info/dependency_links.txt +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript.egg-info/requires.txt +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript.egg-info/top_level.txt +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/tests/test_config.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/tests/test_core.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/tests/test_core_connect_patch.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/tests/test_core_env_detection.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/tests/test_core_join.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/tests/test_core_username.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/tests/test_dummy.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/tests/test_loaders.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/tests/test_registry.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/tests/test_registry_import_paths.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/tests/test_utils_safe_columns.py +0 -0
|
@@ -686,37 +686,50 @@ class SatisfactoEngine:
|
|
|
686
686
|
|
|
687
687
|
return df_main
|
|
688
688
|
|
|
689
|
-
def
|
|
689
|
+
def run_process_and_split(self, schema_dict, split_values, target_layer, target_base_name, split_column):
|
|
690
690
|
"""
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
691
|
+
Processes the schema and splits the resulting DataFrame into multiple Delta tables,
|
|
692
|
+
one per value in split_values, filtering on split_column.
|
|
693
|
+
|
|
694
694
|
Args:
|
|
695
695
|
schema_dict (dict): The pipeline dictionary schema.
|
|
696
|
-
|
|
696
|
+
split_values (list of dict): Each item must have:
|
|
697
|
+
- 'value' (str): The column value to filter on.
|
|
698
|
+
- 'label' (str): The suffix appended to target_base_name for the table name.
|
|
697
699
|
target_layer (str): The target database layer (e.g., 'silver').
|
|
698
700
|
target_base_name (str): The base name for the target tables.
|
|
699
701
|
split_column (str): The column used to split the data.
|
|
700
702
|
"""
|
|
701
|
-
print(f"--- Executing Pattern:
|
|
703
|
+
print(f"--- Executing Pattern: process_and_split (Base: {target_base_name}, Column: {split_column}) ---")
|
|
702
704
|
df_full = self.process_schema(schema_dict)
|
|
703
705
|
actual_schema = self.get_target_schema(target_layer)
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
706
|
+
|
|
707
|
+
cached = False
|
|
708
|
+
try:
|
|
709
|
+
df_full.cache()
|
|
710
|
+
cached = True
|
|
711
|
+
except Exception:
|
|
712
|
+
print(" -> [Cache] WARNING: cache() failed (UserContext gRPC issue). Continuing without caching.")
|
|
713
|
+
|
|
714
|
+
for item in split_values:
|
|
715
|
+
fqn = f"`{self.db}`.`{actual_schema}`.`{target_base_name}_{item['label']}`"
|
|
708
716
|
self._drop_table_if_exists(fqn)
|
|
709
|
-
print(f" -> Processing
|
|
710
|
-
|
|
711
|
-
self._write_dataframe(
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
717
|
+
print(f" -> Processing split value: {item['label']} ({split_column} = '{item['value']}')")
|
|
718
|
+
df_slice = df_full.filter(F.col(split_column) == item["value"])
|
|
719
|
+
self._write_dataframe(df_slice, fqn, item["label"])
|
|
720
|
+
|
|
721
|
+
if cached:
|
|
722
|
+
try:
|
|
723
|
+
df_full.unpersist()
|
|
724
|
+
except Exception:
|
|
725
|
+
pass
|
|
726
|
+
|
|
727
|
+
print("--- Pattern 'process_and_split' completed. ---")
|
|
715
728
|
|
|
716
|
-
def
|
|
729
|
+
def run_process_to_table(self, schema_dict, target_layer, target_table_name):
|
|
717
730
|
"""
|
|
718
|
-
|
|
719
|
-
|
|
731
|
+
Processes a schema and writes the result to a single Delta table.
|
|
732
|
+
|
|
720
733
|
Args:
|
|
721
734
|
schema_dict (dict): The pipeline dictionary schema.
|
|
722
735
|
target_layer (str): The target database layer (e.g., 'gold').
|
|
@@ -724,55 +737,57 @@ class SatisfactoEngine:
|
|
|
724
737
|
"""
|
|
725
738
|
actual_schema = self.get_target_schema(target_layer)
|
|
726
739
|
fqn = f"`{self.db}`.`{actual_schema}`.`{target_table_name}`"
|
|
727
|
-
print(f"--- Executing Pattern:
|
|
740
|
+
print(f"--- Executing Pattern: process_to_table (Target: {target_table_name}) ---")
|
|
728
741
|
self._drop_table_if_exists(fqn)
|
|
729
742
|
df = self.process_schema(schema_dict)
|
|
730
743
|
self._write_dataframe(df, fqn, target_table_name)
|
|
731
|
-
print("--- Pattern '
|
|
744
|
+
print("--- Pattern 'process_to_table' completed. ---")
|
|
732
745
|
|
|
733
|
-
def
|
|
746
|
+
def run_union_sources_to_table(self, schema_dict, source_partitions, source_layer, target_layer, target_table_name, source_base_names, source_alias):
|
|
734
747
|
"""
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
748
|
+
Unions source tables (one per partition label per base name), processes the
|
|
749
|
+
combined DataFrame through the schema, and writes the result to a single Delta table.
|
|
750
|
+
|
|
751
|
+
Source table names are resolved as: {db}.{source_layer}.{base_name}_{partition['label']}
|
|
752
|
+
|
|
738
753
|
Args:
|
|
739
754
|
schema_dict (dict): The pipeline dictionary schema.
|
|
740
|
-
|
|
755
|
+
source_partitions (list of dict): Each item must have a 'label' key (str) used
|
|
756
|
+
as the suffix when resolving source table names.
|
|
741
757
|
source_layer (str): The database layer containing the source tables.
|
|
742
758
|
target_layer (str): The database layer to write the output to.
|
|
743
759
|
target_table_name (str): The name of the target output table.
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
760
|
+
source_base_names (list of str): Base names of the source tables to union.
|
|
761
|
+
source_alias (str): The key under which the unioned DataFrame is injected
|
|
762
|
+
into the schema processing (replaces the table alias in schema_dict).
|
|
763
|
+
|
|
748
764
|
Raises:
|
|
749
|
-
ValueError: If no source tables can be found to
|
|
765
|
+
ValueError: If no source tables can be found to union.
|
|
750
766
|
"""
|
|
751
767
|
actual_schema_source = self.get_target_schema(source_layer)
|
|
752
768
|
actual_schema_target = self.get_target_schema(target_layer)
|
|
753
769
|
fqn = f"`{self.db}`.`{actual_schema_target}`.`{target_table_name}`"
|
|
754
|
-
print(f"--- Executing Pattern:
|
|
770
|
+
print(f"--- Executing Pattern: union_sources_to_table (Target: {target_table_name}) ---")
|
|
755
771
|
self._drop_table_if_exists(fqn)
|
|
756
|
-
|
|
772
|
+
|
|
757
773
|
list_of_dfs = []
|
|
758
|
-
for base in
|
|
759
|
-
for
|
|
760
|
-
source_fqn = f"`{self.db}`.`{actual_schema_source}`.`{base}_{
|
|
774
|
+
for base in source_base_names:
|
|
775
|
+
for partition in source_partitions:
|
|
776
|
+
source_fqn = f"`{self.db}`.`{actual_schema_source}`.`{base}_{partition['label']}`"
|
|
761
777
|
try:
|
|
762
778
|
list_of_dfs.append(self.spark.table(source_fqn))
|
|
763
779
|
except Exception:
|
|
764
780
|
print(f" - WARNING: Missing table {source_fqn}")
|
|
765
781
|
|
|
766
|
-
if not list_of_dfs:
|
|
767
|
-
|
|
782
|
+
if not list_of_dfs:
|
|
783
|
+
raise ValueError("No sources found to union.")
|
|
784
|
+
|
|
768
785
|
print(f" -> [Union] Merging {len(list_of_dfs)} tables...")
|
|
769
786
|
unioned_df = reduce(lambda x, y: x.unionByName(y, allowMissingColumns=True), list_of_dfs).dropDuplicates()
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
df_final = self.process_schema(schema_dict, dataframes_in=input_dfs)
|
|
787
|
+
|
|
788
|
+
df_final = self.process_schema(schema_dict, dataframes_in={source_alias: unioned_df})
|
|
774
789
|
self._write_dataframe(df_final, fqn, target_table_name)
|
|
775
|
-
print("--- Pattern '
|
|
790
|
+
print("--- Pattern 'union_sources_to_table' completed. ---")
|
|
776
791
|
|
|
777
792
|
def optimize_table(self, target_layer, target_table_name, zorder_cols=None):
|
|
778
793
|
"""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/semantic/__init__.py
RENAMED
|
File without changes
|
{satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/semantic/semantic.py
RENAMED
|
File without changes
|
|
File without changes
|
{satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript.egg-info/requires.txt
RENAMED
|
File without changes
|
{satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|