satisfactoscript 0.5.10__tar.gz → 0.6.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/PKG-INFO +1 -1
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/pyproject.toml +1 -1
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/core/core.py +89 -47
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript.egg-info/PKG-INFO +1 -1
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/README.md +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/setup.cfg +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/__init__.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/agentic/__init__.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/agentic/agent.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/core/__init__.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/core/config.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/core/loaders.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/core/registry.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/registry.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/semantic/__init__.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/semantic/semantic.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/utils.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript.egg-info/SOURCES.txt +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript.egg-info/dependency_links.txt +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript.egg-info/requires.txt +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript.egg-info/top_level.txt +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/tests/test_config.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/tests/test_core.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/tests/test_core_connect_patch.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/tests/test_core_env_detection.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/tests/test_core_join.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/tests/test_core_username.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/tests/test_dummy.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/tests/test_loaders.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/tests/test_registry.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/tests/test_registry_import_paths.py +0 -0
- {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/tests/test_utils_safe_columns.py +0 -0
|
@@ -502,13 +502,40 @@ class SatisfactoEngine:
|
|
|
502
502
|
|
|
503
503
|
def _drop_table_if_exists(self, fqn):
|
|
504
504
|
"""
|
|
505
|
-
Drops a table using
|
|
506
|
-
|
|
505
|
+
Drops a table if it exists, using two strategies in order:
|
|
506
|
+
1. Spark SQL DROP TABLE IF EXISTS (works natively on Databricks).
|
|
507
|
+
2. Databricks SDK REST API (works locally when gRPC DDL fails with UserContext).
|
|
508
|
+
|
|
507
509
|
Args:
|
|
508
|
-
fqn (str): The Fully Qualified Name of the table.
|
|
510
|
+
fqn (str): The Fully Qualified Name of the table (backtick-quoted).
|
|
509
511
|
"""
|
|
510
512
|
print(f" -> [Cleanup] Dropping table if exists: {fqn}")
|
|
511
|
-
|
|
513
|
+
|
|
514
|
+
# Strategy 1: Spark SQL (works natively on Databricks)
|
|
515
|
+
try:
|
|
516
|
+
self.spark.sql(f"DROP TABLE IF EXISTS {fqn}")
|
|
517
|
+
return
|
|
518
|
+
except Exception:
|
|
519
|
+
pass
|
|
520
|
+
|
|
521
|
+
# Strategy 2: Databricks SDK REST API (works locally when gRPC DDL fails)
|
|
522
|
+
try:
|
|
523
|
+
from databricks.sdk import WorkspaceClient
|
|
524
|
+
host = os.getenv("DATABRICKS_HOST")
|
|
525
|
+
token = os.getenv("DATABRICKS_TOKEN")
|
|
526
|
+
if host and token:
|
|
527
|
+
clean_fqn = fqn.replace("`", "")
|
|
528
|
+
w = WorkspaceClient(host=host, token=token)
|
|
529
|
+
try:
|
|
530
|
+
w.tables.delete(clean_fqn)
|
|
531
|
+
except Exception:
|
|
532
|
+
# Table does not exist — equivalent to IF EXISTS, not an error
|
|
533
|
+
pass
|
|
534
|
+
return
|
|
535
|
+
except Exception:
|
|
536
|
+
pass
|
|
537
|
+
|
|
538
|
+
print(f" -> [Cleanup] WARNING: Could not drop {fqn} via SQL or SDK. Continuing anyway.")
|
|
512
539
|
|
|
513
540
|
def _write_dataframe(self, df, fqn, label):
|
|
514
541
|
"""
|
|
@@ -686,37 +713,50 @@ class SatisfactoEngine:
|
|
|
686
713
|
|
|
687
714
|
return df_main
|
|
688
715
|
|
|
689
|
-
def
|
|
716
|
+
def run_process_and_split(self, schema_dict, split_values, target_layer, target_base_name, split_column):
|
|
690
717
|
"""
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
718
|
+
Processes the schema and splits the resulting DataFrame into multiple Delta tables,
|
|
719
|
+
one per value in split_values, filtering on split_column.
|
|
720
|
+
|
|
694
721
|
Args:
|
|
695
722
|
schema_dict (dict): The pipeline dictionary schema.
|
|
696
|
-
|
|
723
|
+
split_values (list of dict): Each item must have:
|
|
724
|
+
- 'value' (str): The column value to filter on.
|
|
725
|
+
- 'label' (str): The suffix appended to target_base_name for the table name.
|
|
697
726
|
target_layer (str): The target database layer (e.g., 'silver').
|
|
698
727
|
target_base_name (str): The base name for the target tables.
|
|
699
728
|
split_column (str): The column used to split the data.
|
|
700
729
|
"""
|
|
701
|
-
print(f"--- Executing Pattern:
|
|
730
|
+
print(f"--- Executing Pattern: process_and_split (Base: {target_base_name}, Column: {split_column}) ---")
|
|
702
731
|
df_full = self.process_schema(schema_dict)
|
|
703
732
|
actual_schema = self.get_target_schema(target_layer)
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
733
|
+
|
|
734
|
+
cached = False
|
|
735
|
+
try:
|
|
736
|
+
df_full.cache()
|
|
737
|
+
cached = True
|
|
738
|
+
except Exception:
|
|
739
|
+
print(" -> [Cache] WARNING: cache() failed (UserContext gRPC issue). Continuing without caching.")
|
|
740
|
+
|
|
741
|
+
for item in split_values:
|
|
742
|
+
fqn = f"`{self.db}`.`{actual_schema}`.`{target_base_name}_{item['label']}`"
|
|
708
743
|
self._drop_table_if_exists(fqn)
|
|
709
|
-
print(f" -> Processing
|
|
710
|
-
|
|
711
|
-
self._write_dataframe(
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
744
|
+
print(f" -> Processing split value: {item['label']} ({split_column} = '{item['value']}')")
|
|
745
|
+
df_slice = df_full.filter(F.col(split_column) == item["value"])
|
|
746
|
+
self._write_dataframe(df_slice, fqn, item["label"])
|
|
747
|
+
|
|
748
|
+
if cached:
|
|
749
|
+
try:
|
|
750
|
+
df_full.unpersist()
|
|
751
|
+
except Exception:
|
|
752
|
+
pass
|
|
715
753
|
|
|
716
|
-
|
|
754
|
+
print("--- Pattern 'process_and_split' completed. ---")
|
|
755
|
+
|
|
756
|
+
def run_process_to_table(self, schema_dict, target_layer, target_table_name):
|
|
717
757
|
"""
|
|
718
|
-
|
|
719
|
-
|
|
758
|
+
Processes a schema and writes the result to a single Delta table.
|
|
759
|
+
|
|
720
760
|
Args:
|
|
721
761
|
schema_dict (dict): The pipeline dictionary schema.
|
|
722
762
|
target_layer (str): The target database layer (e.g., 'gold').
|
|
@@ -724,55 +764,57 @@ class SatisfactoEngine:
|
|
|
724
764
|
"""
|
|
725
765
|
actual_schema = self.get_target_schema(target_layer)
|
|
726
766
|
fqn = f"`{self.db}`.`{actual_schema}`.`{target_table_name}`"
|
|
727
|
-
print(f"--- Executing Pattern:
|
|
767
|
+
print(f"--- Executing Pattern: process_to_table (Target: {target_table_name}) ---")
|
|
728
768
|
self._drop_table_if_exists(fqn)
|
|
729
769
|
df = self.process_schema(schema_dict)
|
|
730
770
|
self._write_dataframe(df, fqn, target_table_name)
|
|
731
|
-
print("--- Pattern '
|
|
771
|
+
print("--- Pattern 'process_to_table' completed. ---")
|
|
732
772
|
|
|
733
|
-
def
|
|
773
|
+
def run_union_sources_to_table(self, schema_dict, source_partitions, source_layer, target_layer, target_table_name, source_base_names, source_alias):
|
|
734
774
|
"""
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
775
|
+
Unions source tables (one per partition label per base name), processes the
|
|
776
|
+
combined DataFrame through the schema, and writes the result to a single Delta table.
|
|
777
|
+
|
|
778
|
+
Source table names are resolved as: {db}.{source_layer}.{base_name}_{partition['label']}
|
|
779
|
+
|
|
738
780
|
Args:
|
|
739
781
|
schema_dict (dict): The pipeline dictionary schema.
|
|
740
|
-
|
|
782
|
+
source_partitions (list of dict): Each item must have a 'label' key (str) used
|
|
783
|
+
as the suffix when resolving source table names.
|
|
741
784
|
source_layer (str): The database layer containing the source tables.
|
|
742
785
|
target_layer (str): The database layer to write the output to.
|
|
743
786
|
target_table_name (str): The name of the target output table.
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
787
|
+
source_base_names (list of str): Base names of the source tables to union.
|
|
788
|
+
source_alias (str): The key under which the unioned DataFrame is injected
|
|
789
|
+
into the schema processing (replaces the table alias in schema_dict).
|
|
790
|
+
|
|
748
791
|
Raises:
|
|
749
|
-
ValueError: If no source tables can be found to
|
|
792
|
+
ValueError: If no source tables can be found to union.
|
|
750
793
|
"""
|
|
751
794
|
actual_schema_source = self.get_target_schema(source_layer)
|
|
752
795
|
actual_schema_target = self.get_target_schema(target_layer)
|
|
753
796
|
fqn = f"`{self.db}`.`{actual_schema_target}`.`{target_table_name}`"
|
|
754
|
-
print(f"--- Executing Pattern:
|
|
797
|
+
print(f"--- Executing Pattern: union_sources_to_table (Target: {target_table_name}) ---")
|
|
755
798
|
self._drop_table_if_exists(fqn)
|
|
756
|
-
|
|
799
|
+
|
|
757
800
|
list_of_dfs = []
|
|
758
|
-
for base in
|
|
759
|
-
for
|
|
760
|
-
source_fqn = f"`{self.db}`.`{actual_schema_source}`.`{base}_{
|
|
801
|
+
for base in source_base_names:
|
|
802
|
+
for partition in source_partitions:
|
|
803
|
+
source_fqn = f"`{self.db}`.`{actual_schema_source}`.`{base}_{partition['label']}`"
|
|
761
804
|
try:
|
|
762
805
|
list_of_dfs.append(self.spark.table(source_fqn))
|
|
763
806
|
except Exception:
|
|
764
807
|
print(f" - WARNING: Missing table {source_fqn}")
|
|
765
808
|
|
|
766
|
-
if not list_of_dfs:
|
|
767
|
-
|
|
809
|
+
if not list_of_dfs:
|
|
810
|
+
raise ValueError("No sources found to union.")
|
|
811
|
+
|
|
768
812
|
print(f" -> [Union] Merging {len(list_of_dfs)} tables...")
|
|
769
813
|
unioned_df = reduce(lambda x, y: x.unionByName(y, allowMissingColumns=True), list_of_dfs).dropDuplicates()
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
df_final = self.process_schema(schema_dict, dataframes_in=input_dfs)
|
|
814
|
+
|
|
815
|
+
df_final = self.process_schema(schema_dict, dataframes_in={source_alias: unioned_df})
|
|
774
816
|
self._write_dataframe(df_final, fqn, target_table_name)
|
|
775
|
-
print("--- Pattern '
|
|
817
|
+
print("--- Pattern 'union_sources_to_table' completed. ---")
|
|
776
818
|
|
|
777
819
|
def optimize_table(self, target_layer, target_table_name, zorder_cols=None):
|
|
778
820
|
"""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/semantic/__init__.py
RENAMED
|
File without changes
|
{satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/semantic/semantic.py
RENAMED
|
File without changes
|
|
File without changes
|
{satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript.egg-info/requires.txt
RENAMED
|
File without changes
|
{satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|