satisfactoscript 0.5.10__tar.gz → 0.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/PKG-INFO +1 -1
  2. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/pyproject.toml +1 -1
  3. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/core/core.py +89 -47
  4. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript.egg-info/PKG-INFO +1 -1
  5. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/README.md +0 -0
  6. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/setup.cfg +0 -0
  7. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/__init__.py +0 -0
  8. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/agentic/__init__.py +0 -0
  9. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/agentic/agent.py +0 -0
  10. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/core/__init__.py +0 -0
  11. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/core/config.py +0 -0
  12. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/core/loaders.py +0 -0
  13. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/core/registry.py +0 -0
  14. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/registry.py +0 -0
  15. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/semantic/__init__.py +0 -0
  16. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/semantic/semantic.py +0 -0
  17. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript/utils.py +0 -0
  18. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript.egg-info/SOURCES.txt +0 -0
  19. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript.egg-info/dependency_links.txt +0 -0
  20. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript.egg-info/requires.txt +0 -0
  21. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/src/satisfactoscript.egg-info/top_level.txt +0 -0
  22. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/tests/test_config.py +0 -0
  23. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/tests/test_core.py +0 -0
  24. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/tests/test_core_connect_patch.py +0 -0
  25. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/tests/test_core_env_detection.py +0 -0
  26. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/tests/test_core_join.py +0 -0
  27. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/tests/test_core_username.py +0 -0
  28. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/tests/test_dummy.py +0 -0
  29. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/tests/test_loaders.py +0 -0
  30. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/tests/test_registry.py +0 -0
  31. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/tests/test_registry_import_paths.py +0 -0
  32. {satisfactoscript-0.5.10 → satisfactoscript-0.6.1}/tests/test_utils_safe_columns.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: satisfactoscript
3
- Version: 0.5.10
3
+ Version: 0.6.1
4
4
  Summary: An Enterprise-Ready, Declarative Data Engineering Framework for Databricks Lakehouse.
5
5
  Author: julhouba
6
6
  Classifier: Programming Language :: Python :: 3
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "satisfactoscript"
7
- version = "0.5.10"
7
+ version = "0.6.1"
8
8
  description = "An Enterprise-Ready, Declarative Data Engineering Framework for Databricks Lakehouse."
9
9
  readme = "README.md"
10
10
  authors = [
@@ -502,13 +502,40 @@ class SatisfactoEngine:
502
502
 
503
503
  def _drop_table_if_exists(self, fqn):
504
504
  """
505
- Drops a table using SQL directly if it exists.
506
-
505
+ Drops a table if it exists, using two strategies in order:
506
+ 1. Spark SQL DROP TABLE IF EXISTS (works natively on Databricks).
507
+ 2. Databricks SDK REST API (works locally when gRPC DDL fails with UserContext).
508
+
507
509
  Args:
508
- fqn (str): The Fully Qualified Name of the table.
510
+ fqn (str): The Fully Qualified Name of the table (backtick-quoted).
509
511
  """
510
512
  print(f" -> [Cleanup] Dropping table if exists: {fqn}")
511
- self.spark.sql(f"DROP TABLE IF EXISTS {fqn}")
513
+
514
+ # Strategy 1: Spark SQL (works natively on Databricks)
515
+ try:
516
+ self.spark.sql(f"DROP TABLE IF EXISTS {fqn}")
517
+ return
518
+ except Exception:
519
+ pass
520
+
521
+ # Strategy 2: Databricks SDK REST API (works locally when gRPC DDL fails)
522
+ try:
523
+ from databricks.sdk import WorkspaceClient
524
+ host = os.getenv("DATABRICKS_HOST")
525
+ token = os.getenv("DATABRICKS_TOKEN")
526
+ if host and token:
527
+ clean_fqn = fqn.replace("`", "")
528
+ w = WorkspaceClient(host=host, token=token)
529
+ try:
530
+ w.tables.delete(clean_fqn)
531
+ except Exception:
532
+ # Table does not exist — equivalent to IF EXISTS, not an error
533
+ pass
534
+ return
535
+ except Exception:
536
+ pass
537
+
538
+ print(f" -> [Cleanup] WARNING: Could not drop {fqn} via SQL or SDK. Continuing anyway.")
512
539
 
513
540
  def _write_dataframe(self, df, fqn, label):
514
541
  """
@@ -686,37 +713,50 @@ class SatisfactoEngine:
686
713
 
687
714
  return df_main
688
715
 
689
- def run_split_to_org(self, schema_dict, org_list, target_layer, target_base_name, split_column="sales_org_code"):
716
+ def run_process_and_split(self, schema_dict, split_values, target_layer, target_base_name, split_column):
690
717
  """
691
- Executes a pattern where the processed DataFrame is split based on an organization
692
- column and written to multiple Delta tables.
693
-
718
+ Processes the schema and splits the resulting DataFrame into multiple Delta tables,
719
+ one per value in split_values, filtering on split_column.
720
+
694
721
  Args:
695
722
  schema_dict (dict): The pipeline dictionary schema.
696
- org_list (list of dict): List of organizations containing 'org_code' and 'label'.
723
+ split_values (list of dict): Each item must have:
724
+ - 'value' (str): The column value to filter on.
725
+ - 'label' (str): The suffix appended to target_base_name for the table name.
697
726
  target_layer (str): The target database layer (e.g., 'silver').
698
727
  target_base_name (str): The base name for the target tables.
699
728
  split_column (str): The column used to split the data.
700
729
  """
701
- print(f"--- Executing Pattern: split_to_org (Base: {target_base_name}) ---")
730
+ print(f"--- Executing Pattern: process_and_split (Base: {target_base_name}, Column: {split_column}) ---")
702
731
  df_full = self.process_schema(schema_dict)
703
732
  actual_schema = self.get_target_schema(target_layer)
704
- df_full.cache()
705
-
706
- for org in org_list:
707
- fqn = f"`{self.db}`.`{actual_schema}`.`{target_base_name}_{org['label']}`"
733
+
734
+ cached = False
735
+ try:
736
+ df_full.cache()
737
+ cached = True
738
+ except Exception:
739
+ print(" -> [Cache] WARNING: cache() failed (UserContext gRPC issue). Continuing without caching.")
740
+
741
+ for item in split_values:
742
+ fqn = f"`{self.db}`.`{actual_schema}`.`{target_base_name}_{item['label']}`"
708
743
  self._drop_table_if_exists(fqn)
709
- print(f" -> Processing Org: {org['label']}")
710
- df_org = df_full.filter(F.col(split_column) == org["org_code"])
711
- self._write_dataframe(df_org, fqn, org["label"])
712
-
713
- df_full.unpersist()
714
- print("--- Pattern 'split_to_org' completed. ---")
744
+ print(f" -> Processing split value: {item['label']} ({split_column} = '{item['value']}')")
745
+ df_slice = df_full.filter(F.col(split_column) == item["value"])
746
+ self._write_dataframe(df_slice, fqn, item["label"])
747
+
748
+ if cached:
749
+ try:
750
+ df_full.unpersist()
751
+ except Exception:
752
+ pass
715
753
 
716
- def run_follow_schema(self, schema_dict, target_layer, target_table_name):
754
+ print("--- Pattern 'process_and_split' completed. ---")
755
+
756
+ def run_process_to_table(self, schema_dict, target_layer, target_table_name):
717
757
  """
718
- Standard execution pattern: Process a schema and write to a single target table.
719
-
758
+ Processes a schema and writes the result to a single Delta table.
759
+
720
760
  Args:
721
761
  schema_dict (dict): The pipeline dictionary schema.
722
762
  target_layer (str): The target database layer (e.g., 'gold').
@@ -724,55 +764,57 @@ class SatisfactoEngine:
724
764
  """
725
765
  actual_schema = self.get_target_schema(target_layer)
726
766
  fqn = f"`{self.db}`.`{actual_schema}`.`{target_table_name}`"
727
- print(f"--- Executing Pattern: follow_schema (Target: {target_table_name}) ---")
767
+ print(f"--- Executing Pattern: process_to_table (Target: {target_table_name}) ---")
728
768
  self._drop_table_if_exists(fqn)
729
769
  df = self.process_schema(schema_dict)
730
770
  self._write_dataframe(df, fqn, target_table_name)
731
- print("--- Pattern 'follow_schema' completed. ---")
771
+ print("--- Pattern 'process_to_table' completed. ---")
732
772
 
733
- def run_unify_and_process(self, schema_dict, org_list, source_layer, target_layer, target_table_name, unified_source_base_names, unified_temp_view_key):
773
+ def run_union_sources_to_table(self, schema_dict, source_partitions, source_layer, target_layer, target_table_name, source_base_names, source_alias):
734
774
  """
735
- Executes a pattern where data from multiple organizations is first unified (unioned)
736
- and then passed through the schema processing.
737
-
775
+ Unions source tables (one per partition label per base name), processes the
776
+ combined DataFrame through the schema, and writes the result to a single Delta table.
777
+
778
+ Source table names are resolved as: {db}.{source_layer}.{base_name}_{partition['label']}
779
+
738
780
  Args:
739
781
  schema_dict (dict): The pipeline dictionary schema.
740
- org_list (list of dict): List of organizations defining expected source tables.
782
+ source_partitions (list of dict): Each item must have a 'label' key (str) used
783
+ as the suffix when resolving source table names.
741
784
  source_layer (str): The database layer containing the source tables.
742
785
  target_layer (str): The database layer to write the output to.
743
786
  target_table_name (str): The name of the target output table.
744
- unified_source_base_names (list of str): Base names of the source tables to unify.
745
- unified_temp_view_key (str): The alias/key to use for the unified DataFrame
746
- in the schema logic.
747
-
787
+ source_base_names (list of str): Base names of the source tables to union.
788
+ source_alias (str): The key under which the unioned DataFrame is injected
789
+ into the schema processing (replaces the table alias in schema_dict).
790
+
748
791
  Raises:
749
- ValueError: If no source tables can be found to unify.
792
+ ValueError: If no source tables can be found to union.
750
793
  """
751
794
  actual_schema_source = self.get_target_schema(source_layer)
752
795
  actual_schema_target = self.get_target_schema(target_layer)
753
796
  fqn = f"`{self.db}`.`{actual_schema_target}`.`{target_table_name}`"
754
- print(f"--- Executing Pattern: unify_and_process (Target: {target_table_name}) ---")
797
+ print(f"--- Executing Pattern: union_sources_to_table (Target: {target_table_name}) ---")
755
798
  self._drop_table_if_exists(fqn)
756
-
799
+
757
800
  list_of_dfs = []
758
- for base in unified_source_base_names:
759
- for org in org_list:
760
- source_fqn = f"`{self.db}`.`{actual_schema_source}`.`{base}_{org['label']}`"
801
+ for base in source_base_names:
802
+ for partition in source_partitions:
803
+ source_fqn = f"`{self.db}`.`{actual_schema_source}`.`{base}_{partition['label']}`"
761
804
  try:
762
805
  list_of_dfs.append(self.spark.table(source_fqn))
763
806
  except Exception:
764
807
  print(f" - WARNING: Missing table {source_fqn}")
765
808
 
766
- if not list_of_dfs: raise ValueError("No sources found.")
767
-
809
+ if not list_of_dfs:
810
+ raise ValueError("No sources found to union.")
811
+
768
812
  print(f" -> [Union] Merging {len(list_of_dfs)} tables...")
769
813
  unioned_df = reduce(lambda x, y: x.unionByName(y, allowMissingColumns=True), list_of_dfs).dropDuplicates()
770
-
771
- input_dfs = {unified_temp_view_key: unioned_df}
772
-
773
- df_final = self.process_schema(schema_dict, dataframes_in=input_dfs)
814
+
815
+ df_final = self.process_schema(schema_dict, dataframes_in={source_alias: unioned_df})
774
816
  self._write_dataframe(df_final, fqn, target_table_name)
775
- print("--- Pattern 'unify_and_process' completed. ---")
817
+ print("--- Pattern 'union_sources_to_table' completed. ---")
776
818
 
777
819
  def optimize_table(self, target_layer, target_table_name, zorder_cols=None):
778
820
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: satisfactoscript
3
- Version: 0.5.10
3
+ Version: 0.6.1
4
4
  Summary: An Enterprise-Ready, Declarative Data Engineering Framework for Databricks Lakehouse.
5
5
  Author: julhouba
6
6
  Classifier: Programming Language :: Python :: 3