cloe-nessy 1.0.10__py3-none-any.whl → 1.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -34,6 +34,7 @@ class LoggerMixin:
34
34
  logger_name=f"Console:{self.__class__.__name__}",
35
35
  logging_level=level if level is not None else logging_settings.log_level_console,
36
36
  log_format=log_format if log_format is not None else logging_settings.log_format_console,
37
+ datefmt="%Y-%m-%d %H:%M:%S",
37
38
  )
38
39
  return logger
39
40
 
@@ -18,7 +18,7 @@ class UnityCatalogAdapter(LoggerMixin):
18
18
  """Initializes the UnityCatalogAdapter class."""
19
19
  self._spark = spark or SessionManager.get_spark_session()
20
20
  self._console_logger = self.get_console_logger()
21
- self._catalogs = self.get_catalogs()
21
+ self._catalogs: list[Catalog] | None = None
22
22
 
23
23
  def _execute_sql(self, query):
24
24
  """Execute a SQL query and return a DataFrame.
@@ -57,6 +57,8 @@ class UnityCatalogAdapter(LoggerMixin):
57
57
  Returns:
58
58
  The Catalog with the specified name.
59
59
  """
60
+ if self._catalogs is None:
61
+ self._catalogs: list[Catalog] = self.get_catalogs()
60
62
  for catalog in self._catalogs:
61
63
  if catalog.name == name:
62
64
  return catalog
@@ -106,6 +106,8 @@ class WriteDeltaMergeAction(PipelineAction):
106
106
  raise ValueError("Data is required for the merge operation.")
107
107
  if context.table_metadata is None and table_identifier is None:
108
108
  raise ValueError("Table metadata or a table identifier are required for the merge operation.")
109
+ if context.table_metadata is not None and table_identifier is not None:
110
+ raise ValueError("Provide either table metadata or a table identifier, not both.")
109
111
 
110
112
  if table_identifier is not None:
111
113
  context.table_metadata = UnityCatalogAdapter().get_table_by_name(table_identifier)
@@ -56,13 +56,13 @@ class Pipeline(LoggerMixin):
56
56
  step.context.table_metadata = self.steps[step._table_metadata_ref].result.table_metadata
57
57
 
58
58
  try:
59
- self._console_logger.info(f"Starting execution of step: {step.name}")
59
+ self._console_logger.info(f">>> Starting execution of step: {step.name}")
60
60
  step.run()
61
61
  except Exception as err:
62
- self._console_logger.error(f"Execution of step {step.name} failed with error: {str(err)}")
62
+ self._console_logger.error(f">>> Execution of step {step.name} failed with error: {str(err)}")
63
63
  raise err
64
64
  else:
65
- self._console_logger.info(f"Execution of step {step.name} succeeded.")
65
+ self._console_logger.info(f">>> Execution of step {step.name} succeeded.")
66
66
 
67
67
  def _get_ready_to_run_steps(self, remaining_steps: list[str], g: nx.DiGraph) -> set[str]:
68
68
  """Identifies and returns the steps that are ready to run.
@@ -67,6 +67,7 @@ class PipelineParsingService:
67
67
  try:
68
68
  secrets_repl_yaml_str = PipelineParsingService._replace_secret_refs(yaml_str)
69
69
  fixed_yaml_str = PipelineParsingService._fix_yaml_str_with_templates(secrets_repl_yaml_str)
70
+ PipelineParsingService._validate_yaml(fixed_yaml_str)
70
71
  config = yaml.safe_load(fixed_yaml_str)
71
72
  except yaml.YAMLError as e:
72
73
  console_logger.error("Failed to parse YAML. Check for syntax errors or unquoted template variables.")
@@ -78,9 +79,49 @@ class PipelineParsingService:
78
79
  pipeline_config = PipelineConfig.metadata_to_instance(config)
79
80
  steps = PipelineParsingService._get_steps(pipeline_config.steps, pipeline_config.env)
80
81
  pipeline = Pipeline(name=pipeline_config.name, steps=steps) # type: ignore
81
- console_logger.info("Pipeline [ '%s' ] parsed successfully with %d steps.", pipeline.name, len(pipeline.steps))
82
+ console_logger.info(
83
+ "Pipeline [ '%s' ] parsed successfully with %d steps.",
84
+ pipeline.name,
85
+ len(pipeline.steps),
86
+ )
82
87
  return pipeline
83
88
 
89
+ @staticmethod
90
+ def _validate_yaml(yaml_str: str) -> None:
91
+ """Validates YAML string including checking for duplicate keys.
92
+
93
+ Args:
94
+ yaml_str: A string that can be parsed in YAML format.
95
+
96
+ Raises:
97
+ yaml.YAMLError: If the YAML is invalid.
98
+ ValueError: If duplicate keys are found.
99
+ """
100
+
101
+ def no_duplicates_constructor(loader, node, deep=False):
102
+ """Check for duplicate keys in YAML mappings."""
103
+ mapping = {}
104
+ for key_node, value_node in node.value:
105
+ key = loader.construct_object(key_node, deep=deep)
106
+ if key in mapping:
107
+ raise ValueError(f"Duplicate key found in YAML: '{key}'")
108
+ value = loader.construct_object(value_node, deep=deep)
109
+ mapping[key] = value
110
+ return mapping
111
+
112
+ class DuplicateCheckLoader(yaml.SafeLoader):
113
+ pass
114
+
115
+ DuplicateCheckLoader.add_constructor(
116
+ yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
117
+ no_duplicates_constructor,
118
+ )
119
+
120
+ try:
121
+ yaml.load(yaml_str, Loader=DuplicateCheckLoader)
122
+ except yaml.YAMLError as e:
123
+ raise yaml.YAMLError(f"Invalid YAML: {e}") from e
124
+
84
125
  @staticmethod
85
126
  def _get_steps(
86
127
  step_configs: OrderedDict[str, PipelineStepConfig],
@@ -36,6 +36,7 @@ class PipelineStep:
36
36
  _table_metadata_ref: str | None = None
37
37
 
38
38
  def __post_init__(self) -> None:
39
+ self._validate_step_name()
39
40
  if not isinstance(self.action, PipelineAction):
40
41
  raise ValueError("action must be a PipelineAction subclass.")
41
42
  if self._context_ref:
@@ -47,6 +48,22 @@ class PipelineStep:
47
48
  if isinstance(val, PipelineStep):
48
49
  self._predecessors.add(val.name)
49
50
 
51
+ def _validate_step_name(self) -> None:
52
+ """Validates that the step name does not contain forbidden characters.
53
+
54
+ These character have lead to parsing issues when using joins and unions
55
+ in the past.
56
+
57
+ Raises:
58
+ ValueError: If step name contains brackets or parentheses.
59
+ """
60
+ invalid_chars = ["[", "]", "(", ")", "{", "}"]
61
+ for char in invalid_chars:
62
+ if char in self.name:
63
+ raise ValueError(
64
+ f"Invalid step name '{self.name}': step names cannot contain brackets or parentheses ({', '.join(invalid_chars)})"
65
+ )
66
+
50
67
  def run(self) -> None:
51
68
  """Execute the action on the context."""
52
69
  self.result = self.action.run(context=self.context, **self.options)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cloe-nessy
3
- Version: 1.0.10
3
+ Version: 1.0.12
4
4
  Summary: Your friendly datalake monster.
5
5
  Project-URL: homepage, https://initions.com/
6
6
  Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3
14
14
  Classifier: Topic :: Database
15
15
  Requires-Python: <3.14,>=3.11
16
16
  Requires-Dist: azure-identity<2.0.0,>=1.19.0
17
- Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.8
17
+ Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.11
18
18
  Requires-Dist: databricks-sdk<1.0.0,>=0.36.0
19
19
  Requires-Dist: fsspec<2026.1.1,>=2026.1.0
20
20
  Requires-Dist: httpx<1.0.0,>=0.27.2
@@ -45,7 +45,7 @@ cloe_nessy/integration/writer/delta_writer/delta_table_operation_type.py,sha256=
45
45
  cloe_nessy/integration/writer/delta_writer/delta_writer_base.py,sha256=B7PwPHKrsJL0ZxBT-H9wWSy0gn7shqNDJ0AbrpMHyMg,10135
46
46
  cloe_nessy/integration/writer/delta_writer/exceptions.py,sha256=xPmGiYV0xQXauln5Oh34E5vbm0rVcs6xCh-SJSb2bw0,107
47
47
  cloe_nessy/logging/__init__.py,sha256=ySVCVbdyR3Dno_tl2ZfiER_7EVaDoQMHVkNyfdMZumY,65
48
- cloe_nessy/logging/logger_mixin.py,sha256=xA12LIVn4yUEoaKmm7nGC1-U3ddSo_HiL3I5MfkvEwU,7409
48
+ cloe_nessy/logging/logger_mixin.py,sha256=Ws3sirAi7CVyWWc_ClQs0wzdMIJ1LNNrMMkFlO1V0dw,7450
49
49
  cloe_nessy/models/__init__.py,sha256=-FmWEJ1Oq1njSopjc0R7GmT64mLSmALkm8PkHNzy9Y8,327
50
50
  cloe_nessy/models/catalog.py,sha256=ayC1sMp4cNLAZtu0ICVV3Us6-o4hn8U9tpzzvxC9RAs,177
51
51
  cloe_nessy/models/column.py,sha256=W4V1Ls1d60VyZ1Ko9Yu9eSipcMbxSzKicn0aloHPiR0,2027
@@ -56,7 +56,7 @@ cloe_nessy/models/table.py,sha256=-MQLwKthcO4JE99lbikJL1cnt27aEZmt3o96R9CtoTU,12
56
56
  cloe_nessy/models/types.py,sha256=XRbuJGdTNa6aXyE3IAzs_J9gVjbfkzMDLfGl-k6jI_4,223
57
57
  cloe_nessy/models/volume.py,sha256=BPEy6DW0Yig-mFQiNgm9P5JoO6Vcvtxcipnk2-H8Q_k,2456
58
58
  cloe_nessy/models/adapter/__init__.py,sha256=m36W_mqwB3dCYnCIt0fLOSHS4E1VU8FRGoaum4Gf95o,90
59
- cloe_nessy/models/adapter/unity_catalog_adapter.py,sha256=a-14Ys-AevVYQd0xeJU1syLxjT5Wzo4uog1hFSEs76M,12651
59
+ cloe_nessy/models/adapter/unity_catalog_adapter.py,sha256=75QNiYgvWGG2zhxslNyABIZRuKtgTP7DlYrF5PdfsWQ,12757
60
60
  cloe_nessy/models/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
61
  cloe_nessy/models/mixins/read_instance_mixin.py,sha256=HT42qor6IltR-BTfe_DYa5ylntE7qbArs-3cSjo4KXU,4649
62
62
  cloe_nessy/models/mixins/template_loader_mixin.py,sha256=5MXhEGBFlq3dwZvINEyBowSlipNnVun2H_TmhI_fsS4,549
@@ -66,14 +66,14 @@ cloe_nessy/object_manager/__init__.py,sha256=3sle0vNpPwBOkycxA3XVS9m4XZf5LD3Qd4N
66
66
  cloe_nessy/object_manager/table_manager.py,sha256=M4JWgga7A8MHll5QJ42TUU5X4eUrQzhbwaZtVBgoQPY,13902
67
67
  cloe_nessy/object_manager/volume_manager.py,sha256=6epd3KXzcNH04EvaKubAfLsaUm9qBMeT3KNvMK04gGs,2727
68
68
  cloe_nessy/pipeline/__init__.py,sha256=BUzL4HJaCXWmK7OgKaxdwK72JrrdzfzIvyxOGtM28U0,417
69
- cloe_nessy/pipeline/pipeline.py,sha256=L4wk3b06LNWRj01nnAkuQpeRrwFTyaV1xTpgYAg4sak,10819
69
+ cloe_nessy/pipeline/pipeline.py,sha256=-JcdRL8PV1Leo0984Z2jOJ4EL2nuMwnqX4rIwu4JxEg,10831
70
70
  cloe_nessy/pipeline/pipeline_action.py,sha256=S7IVFdmG12fRBzHuE_DiWn7qlMtApz6IloVd2Fj31Sg,1944
71
71
  cloe_nessy/pipeline/pipeline_builder.py,sha256=_BBl43two0pherkTXZ-Yrpt6XcLW8Q-Z98qxbFIsMao,7929
72
72
  cloe_nessy/pipeline/pipeline_config.py,sha256=oVQ-IH4etTGZVVEnE-5iDPLYOtWpvDlltWFv1nevnqQ,3229
73
73
  cloe_nessy/pipeline/pipeline_context.py,sha256=eCOcjyE16rGRom3L85Gy_BbncfQD6i1x31yrWqZws-4,1881
74
- cloe_nessy/pipeline/pipeline_parsing_service.py,sha256=qVDckjEqv4qYD0jUABs4BSbgYC3w-A_SpqBmqr7qCxk,9978
74
+ cloe_nessy/pipeline/pipeline_parsing_service.py,sha256=XJu2v4qzSBMuLRZs35CIabLPQHY3zV42Vurf4VuN9v0,11363
75
75
  cloe_nessy/pipeline/pipeline_plotting_service.py,sha256=OjdYDQW19yXjdcqs7bDMlyWmv0cQz7Qn4I2cakBgN2E,13139
76
- cloe_nessy/pipeline/pipeline_step.py,sha256=oTnlvRpB0fbOBQXbPe1URstA5fv-97igCHt_41fKCAk,2082
76
+ cloe_nessy/pipeline/pipeline_step.py,sha256=ui3hDOAuuX-j62gg6o8RogHzCjZHNsTYWpLZwO76VhE,2753
77
77
  cloe_nessy/pipeline/actions/__init__.py,sha256=FfAnSIl-0T6pnaWhClkDqV8nfTdvLvZZJdwycsZMLPw,2990
78
78
  cloe_nessy/pipeline/actions/read_api.py,sha256=YMOWPCyxitU5v6HHH_AZqpbHQenPU4-WlaOrb-NsIIk,16245
79
79
  cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=MK67NIB5qXTDwCC0EFwHYoOkelFnwY6Z4REpC5BlFb4,6359
@@ -101,7 +101,7 @@ cloe_nessy/pipeline/actions/transform_union.py,sha256=SZtEzh567CIExUj9yMEgshE28h
101
101
  cloe_nessy/pipeline/actions/transform_with_column.py,sha256=5EG_H4MTiQVCfgkjfyof4qqw9xB3MJh-v0lsoYlbVCU,4967
102
102
  cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=rYK_V08D8w4CBClRBGwj18amhZOVmP6VTTO-xqy7We8,4849
103
103
  cloe_nessy/pipeline/actions/write_delta_append.py,sha256=e1g4mDhwAZdKyt4Gb7ZzHcQrJ1duSl8qOn6ONizRsoM,2934
104
- cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=5Svi9Z7xg2Ey0WPq4MzS509vMDOM7mDGUa5OBaWbjkc,6438
104
+ cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=ndJf9n3VP8YhTSVLgyURnbtOmC5i3YfVQWtfRjBV1j0,6613
105
105
  cloe_nessy/pipeline/actions/write_file.py,sha256=JZ8UZslxUn_ttYt5wDyvtHFq2FqYk3vOR8kvExJI8pk,3212
106
106
  cloe_nessy/pipeline/utils/__init__.py,sha256=xi02UjBMiXWD7b9gDvww4gyRyowb0eRd_6Wbu0F_cro,118
107
107
  cloe_nessy/pipeline/utils/delta_load_utils.py,sha256=KitMNruxePEkecI0h4Jint1JwJpaEog5mCOchMkgan8,1495
@@ -113,6 +113,6 @@ cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_Up
113
113
  cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
114
114
  cloe_nessy/utils/column_names.py,sha256=dCNtm61mc5aLkY2oE4rlfN3VLCrpot6fOESjAZmCmhA,361
115
115
  cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
116
- cloe_nessy-1.0.10.dist-info/METADATA,sha256=NaIr91k5JZzPptDRnVq28odbhx2bhVxDuixXGbL3QIc,3290
117
- cloe_nessy-1.0.10.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
118
- cloe_nessy-1.0.10.dist-info/RECORD,,
116
+ cloe_nessy-1.0.12.dist-info/METADATA,sha256=W3Gs63IM-gG61xJVlCgs7R3FB9VcI94VRdaFfElczUg,3291
117
+ cloe_nessy-1.0.12.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
118
+ cloe_nessy-1.0.12.dist-info/RECORD,,