satisfactoscript 0.5.10__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/PKG-INFO +1 -1
  2. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/pyproject.toml +1 -1
  3. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/core/core.py +58 -43
  4. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript.egg-info/PKG-INFO +1 -1
  5. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/README.md +0 -0
  6. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/setup.cfg +0 -0
  7. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/__init__.py +0 -0
  8. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/agentic/__init__.py +0 -0
  9. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/agentic/agent.py +0 -0
  10. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/core/__init__.py +0 -0
  11. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/core/config.py +0 -0
  12. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/core/loaders.py +0 -0
  13. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/core/registry.py +0 -0
  14. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/registry.py +0 -0
  15. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/semantic/__init__.py +0 -0
  16. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/semantic/semantic.py +0 -0
  17. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript/utils.py +0 -0
  18. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript.egg-info/SOURCES.txt +0 -0
  19. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript.egg-info/dependency_links.txt +0 -0
  20. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript.egg-info/requires.txt +0 -0
  21. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/src/satisfactoscript.egg-info/top_level.txt +0 -0
  22. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/tests/test_config.py +0 -0
  23. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/tests/test_core.py +0 -0
  24. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/tests/test_core_connect_patch.py +0 -0
  25. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/tests/test_core_env_detection.py +0 -0
  26. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/tests/test_core_join.py +0 -0
  27. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/tests/test_core_username.py +0 -0
  28. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/tests/test_dummy.py +0 -0
  29. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/tests/test_loaders.py +0 -0
  30. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/tests/test_registry.py +0 -0
  31. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/tests/test_registry_import_paths.py +0 -0
  32. {satisfactoscript-0.5.10 → satisfactoscript-0.6.0}/tests/test_utils_safe_columns.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: satisfactoscript
3
- Version: 0.5.10
3
+ Version: 0.6.0
4
4
  Summary: An Enterprise-Ready, Declarative Data Engineering Framework for Databricks Lakehouse.
5
5
  Author: julhouba
6
6
  Classifier: Programming Language :: Python :: 3
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "satisfactoscript"
7
- version = "0.5.10"
7
+ version = "0.6.0"
8
8
  description = "An Enterprise-Ready, Declarative Data Engineering Framework for Databricks Lakehouse."
9
9
  readme = "README.md"
10
10
  authors = [
@@ -686,37 +686,50 @@ class SatisfactoEngine:
686
686
 
687
687
  return df_main
688
688
 
689
- def run_split_to_org(self, schema_dict, org_list, target_layer, target_base_name, split_column="sales_org_code"):
689
+ def run_process_and_split(self, schema_dict, split_values, target_layer, target_base_name, split_column):
690
690
  """
691
- Executes a pattern where the processed DataFrame is split based on an organization
692
- column and written to multiple Delta tables.
693
-
691
+ Processes the schema and splits the resulting DataFrame into multiple Delta tables,
692
+ one per value in split_values, filtering on split_column.
693
+
694
694
  Args:
695
695
  schema_dict (dict): The pipeline dictionary schema.
696
- org_list (list of dict): List of organizations containing 'org_code' and 'label'.
696
+ split_values (list of dict): Each item must have:
697
+ - 'value' (str): The column value to filter on.
698
+ - 'label' (str): The suffix appended to target_base_name for the table name.
697
699
  target_layer (str): The target database layer (e.g., 'silver').
698
700
  target_base_name (str): The base name for the target tables.
699
701
  split_column (str): The column used to split the data.
700
702
  """
701
- print(f"--- Executing Pattern: split_to_org (Base: {target_base_name}) ---")
703
+ print(f"--- Executing Pattern: process_and_split (Base: {target_base_name}, Column: {split_column}) ---")
702
704
  df_full = self.process_schema(schema_dict)
703
705
  actual_schema = self.get_target_schema(target_layer)
704
- df_full.cache()
705
-
706
- for org in org_list:
707
- fqn = f"`{self.db}`.`{actual_schema}`.`{target_base_name}_{org['label']}`"
706
+
707
+ cached = False
708
+ try:
709
+ df_full.cache()
710
+ cached = True
711
+ except Exception:
712
+ print(" -> [Cache] WARNING: cache() failed (UserContext gRPC issue). Continuing without caching.")
713
+
714
+ for item in split_values:
715
+ fqn = f"`{self.db}`.`{actual_schema}`.`{target_base_name}_{item['label']}`"
708
716
  self._drop_table_if_exists(fqn)
709
- print(f" -> Processing Org: {org['label']}")
710
- df_org = df_full.filter(F.col(split_column) == org["org_code"])
711
- self._write_dataframe(df_org, fqn, org["label"])
712
-
713
- df_full.unpersist()
714
- print("--- Pattern 'split_to_org' completed. ---")
717
+ print(f" -> Processing split value: {item['label']} ({split_column} = '{item['value']}')")
718
+ df_slice = df_full.filter(F.col(split_column) == item["value"])
719
+ self._write_dataframe(df_slice, fqn, item["label"])
720
+
721
+ if cached:
722
+ try:
723
+ df_full.unpersist()
724
+ except Exception:
725
+ pass
726
+
727
+ print("--- Pattern 'process_and_split' completed. ---")
715
728
 
716
- def run_follow_schema(self, schema_dict, target_layer, target_table_name):
729
+ def run_process_to_table(self, schema_dict, target_layer, target_table_name):
717
730
  """
718
- Standard execution pattern: Process a schema and write to a single target table.
719
-
731
+ Processes a schema and writes the result to a single Delta table.
732
+
720
733
  Args:
721
734
  schema_dict (dict): The pipeline dictionary schema.
722
735
  target_layer (str): The target database layer (e.g., 'gold').
@@ -724,55 +737,57 @@ class SatisfactoEngine:
724
737
  """
725
738
  actual_schema = self.get_target_schema(target_layer)
726
739
  fqn = f"`{self.db}`.`{actual_schema}`.`{target_table_name}`"
727
- print(f"--- Executing Pattern: follow_schema (Target: {target_table_name}) ---")
740
+ print(f"--- Executing Pattern: process_to_table (Target: {target_table_name}) ---")
728
741
  self._drop_table_if_exists(fqn)
729
742
  df = self.process_schema(schema_dict)
730
743
  self._write_dataframe(df, fqn, target_table_name)
731
- print("--- Pattern 'follow_schema' completed. ---")
744
+ print("--- Pattern 'process_to_table' completed. ---")
732
745
 
733
- def run_unify_and_process(self, schema_dict, org_list, source_layer, target_layer, target_table_name, unified_source_base_names, unified_temp_view_key):
746
+ def run_union_sources_to_table(self, schema_dict, source_partitions, source_layer, target_layer, target_table_name, source_base_names, source_alias):
734
747
  """
735
- Executes a pattern where data from multiple organizations is first unified (unioned)
736
- and then passed through the schema processing.
737
-
748
+ Unions source tables (one per partition label per base name), processes the
749
+ combined DataFrame through the schema, and writes the result to a single Delta table.
750
+
751
+ Source table names are resolved as: {db}.{source_layer}.{base_name}_{partition['label']}
752
+
738
753
  Args:
739
754
  schema_dict (dict): The pipeline dictionary schema.
740
- org_list (list of dict): List of organizations defining expected source tables.
755
+ source_partitions (list of dict): Each item must have a 'label' key (str) used
756
+ as the suffix when resolving source table names.
741
757
  source_layer (str): The database layer containing the source tables.
742
758
  target_layer (str): The database layer to write the output to.
743
759
  target_table_name (str): The name of the target output table.
744
- unified_source_base_names (list of str): Base names of the source tables to unify.
745
- unified_temp_view_key (str): The alias/key to use for the unified DataFrame
746
- in the schema logic.
747
-
760
+ source_base_names (list of str): Base names of the source tables to union.
761
+ source_alias (str): The key under which the unioned DataFrame is injected
762
+ into the schema processing (replaces the table alias in schema_dict).
763
+
748
764
  Raises:
749
- ValueError: If no source tables can be found to unify.
765
+ ValueError: If no source tables can be found to union.
750
766
  """
751
767
  actual_schema_source = self.get_target_schema(source_layer)
752
768
  actual_schema_target = self.get_target_schema(target_layer)
753
769
  fqn = f"`{self.db}`.`{actual_schema_target}`.`{target_table_name}`"
754
- print(f"--- Executing Pattern: unify_and_process (Target: {target_table_name}) ---")
770
+ print(f"--- Executing Pattern: union_sources_to_table (Target: {target_table_name}) ---")
755
771
  self._drop_table_if_exists(fqn)
756
-
772
+
757
773
  list_of_dfs = []
758
- for base in unified_source_base_names:
759
- for org in org_list:
760
- source_fqn = f"`{self.db}`.`{actual_schema_source}`.`{base}_{org['label']}`"
774
+ for base in source_base_names:
775
+ for partition in source_partitions:
776
+ source_fqn = f"`{self.db}`.`{actual_schema_source}`.`{base}_{partition['label']}`"
761
777
  try:
762
778
  list_of_dfs.append(self.spark.table(source_fqn))
763
779
  except Exception:
764
780
  print(f" - WARNING: Missing table {source_fqn}")
765
781
 
766
- if not list_of_dfs: raise ValueError("No sources found.")
767
-
782
+ if not list_of_dfs:
783
+ raise ValueError("No sources found to union.")
784
+
768
785
  print(f" -> [Union] Merging {len(list_of_dfs)} tables...")
769
786
  unioned_df = reduce(lambda x, y: x.unionByName(y, allowMissingColumns=True), list_of_dfs).dropDuplicates()
770
-
771
- input_dfs = {unified_temp_view_key: unioned_df}
772
-
773
- df_final = self.process_schema(schema_dict, dataframes_in=input_dfs)
787
+
788
+ df_final = self.process_schema(schema_dict, dataframes_in={source_alias: unioned_df})
774
789
  self._write_dataframe(df_final, fqn, target_table_name)
775
- print("--- Pattern 'unify_and_process' completed. ---")
790
+ print("--- Pattern 'union_sources_to_table' completed. ---")
776
791
 
777
792
  def optimize_table(self, target_layer, target_table_name, zorder_cols=None):
778
793
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: satisfactoscript
3
- Version: 0.5.10
3
+ Version: 0.6.0
4
4
  Summary: An Enterprise-Ready, Declarative Data Engineering Framework for Databricks Lakehouse.
5
5
  Author: julhouba
6
6
  Classifier: Programming Language :: Python :: 3