cloe-nessy 1.0.9__py3-none-any.whl → 1.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -39,7 +39,10 @@ class DeltaLoader(ABC, LoggerMixin):
39
39
  or f"{self.table_identifier.split('.')[0]}.{self.table_identifier.split('.')[1]}.metadata_delta_load"
40
40
  )
41
41
  table_manager = TableManager()
42
- table_manager.create_table(table=DeltaLoaderMetadataTable(identifier=self.metadata_table_identifier))
42
+ table_manager.create_table(
43
+ table=DeltaLoaderMetadataTable(identifier=self.metadata_table_identifier),
44
+ ignore_if_exists=True,
45
+ )
43
46
 
44
47
  @abstractmethod
45
48
  def read_data(
@@ -18,7 +18,7 @@ class UnityCatalogAdapter(LoggerMixin):
18
18
  """Initializes the UnityCatalogAdapter class."""
19
19
  self._spark = spark or SessionManager.get_spark_session()
20
20
  self._console_logger = self.get_console_logger()
21
- self._catalogs = self.get_catalogs()
21
+ self._catalogs: list[Catalog] | None = None
22
22
 
23
23
  def _execute_sql(self, query):
24
24
  """Execute a SQL query and return a DataFrame.
@@ -57,6 +57,8 @@ class UnityCatalogAdapter(LoggerMixin):
57
57
  Returns:
58
58
  The Catalog with the specified name.
59
59
  """
60
+ if self._catalogs is None:
61
+ self._catalogs: list[Catalog] = self.get_catalogs()
60
62
  for catalog in self._catalogs:
61
63
  if catalog.name == name:
62
64
  return catalog
@@ -106,6 +106,8 @@ class WriteDeltaMergeAction(PipelineAction):
106
106
  raise ValueError("Data is required for the merge operation.")
107
107
  if context.table_metadata is None and table_identifier is None:
108
108
  raise ValueError("Table metadata or a table identifier are required for the merge operation.")
109
+ if context.table_metadata is not None and table_identifier is not None:
110
+ raise ValueError("Provide either table metadata or a table identifier, not both.")
109
111
 
110
112
  if table_identifier is not None:
111
113
  context.table_metadata = UnityCatalogAdapter().get_table_by_name(table_identifier)
@@ -64,15 +64,64 @@ class PipelineParsingService:
64
64
  if not yaml_str:
65
65
  raise ValueError("YAML content is empty.")
66
66
 
67
- secrets_repl_yaml_str = PipelineParsingService._replace_secret_refs(yaml_str)
68
- fixed_yaml_str = PipelineParsingService._fix_yaml_str_with_templates(secrets_repl_yaml_str)
69
- config = yaml.safe_load(fixed_yaml_str)
67
+ try:
68
+ secrets_repl_yaml_str = PipelineParsingService._replace_secret_refs(yaml_str)
69
+ fixed_yaml_str = PipelineParsingService._fix_yaml_str_with_templates(secrets_repl_yaml_str)
70
+ PipelineParsingService._validate_yaml(fixed_yaml_str)
71
+ config = yaml.safe_load(fixed_yaml_str)
72
+ except yaml.YAMLError as e:
73
+ console_logger.error("Failed to parse YAML. Check for syntax errors or unquoted template variables.")
74
+ console_logger.error("Original error: %s", str(e))
75
+ raise ValueError(
76
+ f"YAML parsing failed. Ensure all template variables like {{env:VAR}} are "
77
+ f"properly formatted and YAML syntax is correct. Error: {e}"
78
+ ) from e
70
79
  pipeline_config = PipelineConfig.metadata_to_instance(config)
71
80
  steps = PipelineParsingService._get_steps(pipeline_config.steps, pipeline_config.env)
72
81
  pipeline = Pipeline(name=pipeline_config.name, steps=steps) # type: ignore
73
- console_logger.info("Pipeline [ '%s' ] parsed successfully with %d steps.", pipeline.name, len(pipeline.steps))
82
+ console_logger.info(
83
+ "Pipeline [ '%s' ] parsed successfully with %d steps.",
84
+ pipeline.name,
85
+ len(pipeline.steps),
86
+ )
74
87
  return pipeline
75
88
 
89
+ @staticmethod
90
+ def _validate_yaml(yaml_str: str) -> None:
91
+ """Validates YAML string including checking for duplicate keys.
92
+
93
+ Args:
94
+ yaml_str: A string that can be parsed in YAML format.
95
+
96
+ Raises:
97
+ yaml.YAMLError: If the YAML is invalid.
98
+ ValueError: If duplicate keys are found.
99
+ """
100
+
101
+ def no_duplicates_constructor(loader, node, deep=False):
102
+ """Check for duplicate keys in YAML mappings."""
103
+ mapping = {}
104
+ for key_node, value_node in node.value:
105
+ key = loader.construct_object(key_node, deep=deep)
106
+ if key in mapping:
107
+ raise ValueError(f"Duplicate key found in YAML: '{key}'")
108
+ value = loader.construct_object(value_node, deep=deep)
109
+ mapping[key] = value
110
+ return mapping
111
+
112
+ class DuplicateCheckLoader(yaml.SafeLoader):
113
+ pass
114
+
115
+ DuplicateCheckLoader.add_constructor(
116
+ yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
117
+ no_duplicates_constructor,
118
+ )
119
+
120
+ try:
121
+ yaml.load(yaml_str, Loader=DuplicateCheckLoader)
122
+ except yaml.YAMLError as e:
123
+ raise yaml.YAMLError(f"Invalid YAML: {e}") from e
124
+
76
125
  @staticmethod
77
126
  def _get_steps(
78
127
  step_configs: OrderedDict[str, PipelineStepConfig],
@@ -211,11 +260,24 @@ class PipelineParsingService:
211
260
 
212
261
  @staticmethod
213
262
  def _fix_yaml_str_with_templates(yaml_str: str) -> str:
214
- """Fixes unquoted {{env:...}} templates (with optional prefix/suffix) before yaml.safe_load. Skips values already quoted with ' or "."""
215
- unquoted_template = re.compile(r"(:)\s*(?!['\"])([^\s#]*\{\{env:[^}]+\}\}[^\s#]*)", re.MULTILINE)
263
+ """Fixes unquoted template patterns before yaml.safe_load.
264
+
265
+ Handles:
266
+ - {{env:...}} - environment variables
267
+ - ((step:...)) - step references
268
+
269
+ Skips values already quoted with ' or ".
270
+ """
271
+ # Match any unquoted template pattern (env or step)
272
+ # This regex matches: colon + optional whitespace + unquoted value containing template
273
+ unquoted_template = re.compile(
274
+ r"(:)\s*(?!['\"])([^\s#]*(?:\{\{env:[^}]+\}\}|\(\(step:[^)]+\)\))[^\s#]*)", re.MULTILINE
275
+ )
216
276
 
217
277
  def replacer(match):
218
278
  colon, template = match.groups()
219
- return f'{colon} "{template}"'
279
+ # Escape any double quotes in the template value
280
+ escaped_template = template.replace('"', '\\"')
281
+ return f'{colon} "{escaped_template}"'
220
282
 
221
283
  return unquoted_template.sub(replacer, yaml_str)
@@ -36,6 +36,7 @@ class PipelineStep:
36
36
  _table_metadata_ref: str | None = None
37
37
 
38
38
  def __post_init__(self) -> None:
39
+ self._validate_step_name()
39
40
  if not isinstance(self.action, PipelineAction):
40
41
  raise ValueError("action must be a PipelineAction subclass.")
41
42
  if self._context_ref:
@@ -47,6 +48,22 @@ class PipelineStep:
47
48
  if isinstance(val, PipelineStep):
48
49
  self._predecessors.add(val.name)
49
50
 
51
+ def _validate_step_name(self) -> None:
52
+ """Validates that the step name does not contain forbidden characters.
53
+
54
+ These character have lead to parsing issues when using joins and unions
55
+ in the past.
56
+
57
+ Raises:
58
+ ValueError: If step name contains brackets or parentheses.
59
+ """
60
+ invalid_chars = ["[", "]", "(", ")", "{", "}"]
61
+ for char in invalid_chars:
62
+ if char in self.name:
63
+ raise ValueError(
64
+ f"Invalid step name '{self.name}': step names cannot contain brackets or parentheses ({', '.join(invalid_chars)})"
65
+ )
66
+
50
67
  def run(self) -> None:
51
68
  """Execute the action on the context."""
52
69
  self.result = self.action.run(context=self.context, **self.options)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cloe-nessy
3
- Version: 1.0.9
3
+ Version: 1.0.11
4
4
  Summary: Your friendly datalake monster.
5
5
  Project-URL: homepage, https://initions.com/
6
6
  Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
@@ -21,7 +21,7 @@ cloe_nessy/file_utilities/strategies/utils_strategy.py,sha256=w4nrS6IcPPN7UBFBws
21
21
  cloe_nessy/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  cloe_nessy/integration/delta_loader/__init__.py,sha256=ZdBDde1uPtTCL_KAhilVmtVmmGvH5dHb05QsOozkteE,438
23
23
  cloe_nessy/integration/delta_loader/delta_load_options.py,sha256=W3P3gwtkBW3pWbUEOK6agvJfj7QivXhEr_s0dwxhJxQ,1369
24
- cloe_nessy/integration/delta_loader/delta_loader.py,sha256=RiHCsCc5_70KVYT2tiZFbuJyb6xPKNtD1ExEtwLO-1c,6946
24
+ cloe_nessy/integration/delta_loader/delta_loader.py,sha256=2uSsfFpL3R_Qovx-bOYnhWh3x48r3Otp9xFv-gyoZy8,7004
25
25
  cloe_nessy/integration/delta_loader/delta_loader_factory.py,sha256=vB1cL6-Nc3SkLH1xtazMbMF1MnNYq8-g3GHZzRE3QmE,2251
26
26
  cloe_nessy/integration/delta_loader/delta_loader_metadata_table.py,sha256=G_EWUY76ZlbsPZB9LCGlOLVezk7DK6peYXEgt7-sTQE,1683
27
27
  cloe_nessy/integration/delta_loader/strategies/__init__.py,sha256=1o5fRWenL5KnUg1hf7kmTuTpG9pbMxchiQTub52Qvwo,255
@@ -56,7 +56,7 @@ cloe_nessy/models/table.py,sha256=-MQLwKthcO4JE99lbikJL1cnt27aEZmt3o96R9CtoTU,12
56
56
  cloe_nessy/models/types.py,sha256=XRbuJGdTNa6aXyE3IAzs_J9gVjbfkzMDLfGl-k6jI_4,223
57
57
  cloe_nessy/models/volume.py,sha256=BPEy6DW0Yig-mFQiNgm9P5JoO6Vcvtxcipnk2-H8Q_k,2456
58
58
  cloe_nessy/models/adapter/__init__.py,sha256=m36W_mqwB3dCYnCIt0fLOSHS4E1VU8FRGoaum4Gf95o,90
59
- cloe_nessy/models/adapter/unity_catalog_adapter.py,sha256=a-14Ys-AevVYQd0xeJU1syLxjT5Wzo4uog1hFSEs76M,12651
59
+ cloe_nessy/models/adapter/unity_catalog_adapter.py,sha256=75QNiYgvWGG2zhxslNyABIZRuKtgTP7DlYrF5PdfsWQ,12757
60
60
  cloe_nessy/models/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
61
  cloe_nessy/models/mixins/read_instance_mixin.py,sha256=HT42qor6IltR-BTfe_DYa5ylntE7qbArs-3cSjo4KXU,4649
62
62
  cloe_nessy/models/mixins/template_loader_mixin.py,sha256=5MXhEGBFlq3dwZvINEyBowSlipNnVun2H_TmhI_fsS4,549
@@ -71,9 +71,9 @@ cloe_nessy/pipeline/pipeline_action.py,sha256=S7IVFdmG12fRBzHuE_DiWn7qlMtApz6Ilo
71
71
  cloe_nessy/pipeline/pipeline_builder.py,sha256=_BBl43two0pherkTXZ-Yrpt6XcLW8Q-Z98qxbFIsMao,7929
72
72
  cloe_nessy/pipeline/pipeline_config.py,sha256=oVQ-IH4etTGZVVEnE-5iDPLYOtWpvDlltWFv1nevnqQ,3229
73
73
  cloe_nessy/pipeline/pipeline_context.py,sha256=eCOcjyE16rGRom3L85Gy_BbncfQD6i1x31yrWqZws-4,1881
74
- cloe_nessy/pipeline/pipeline_parsing_service.py,sha256=sMnUDAL0PBLbKupC_TWxBT1X9-Y9I5dPSqAxAkWyweA,9102
74
+ cloe_nessy/pipeline/pipeline_parsing_service.py,sha256=XJu2v4qzSBMuLRZs35CIabLPQHY3zV42Vurf4VuN9v0,11363
75
75
  cloe_nessy/pipeline/pipeline_plotting_service.py,sha256=OjdYDQW19yXjdcqs7bDMlyWmv0cQz7Qn4I2cakBgN2E,13139
76
- cloe_nessy/pipeline/pipeline_step.py,sha256=oTnlvRpB0fbOBQXbPe1URstA5fv-97igCHt_41fKCAk,2082
76
+ cloe_nessy/pipeline/pipeline_step.py,sha256=ui3hDOAuuX-j62gg6o8RogHzCjZHNsTYWpLZwO76VhE,2753
77
77
  cloe_nessy/pipeline/actions/__init__.py,sha256=FfAnSIl-0T6pnaWhClkDqV8nfTdvLvZZJdwycsZMLPw,2990
78
78
  cloe_nessy/pipeline/actions/read_api.py,sha256=YMOWPCyxitU5v6HHH_AZqpbHQenPU4-WlaOrb-NsIIk,16245
79
79
  cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=MK67NIB5qXTDwCC0EFwHYoOkelFnwY6Z4REpC5BlFb4,6359
@@ -101,7 +101,7 @@ cloe_nessy/pipeline/actions/transform_union.py,sha256=SZtEzh567CIExUj9yMEgshE28h
101
101
  cloe_nessy/pipeline/actions/transform_with_column.py,sha256=5EG_H4MTiQVCfgkjfyof4qqw9xB3MJh-v0lsoYlbVCU,4967
102
102
  cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=rYK_V08D8w4CBClRBGwj18amhZOVmP6VTTO-xqy7We8,4849
103
103
  cloe_nessy/pipeline/actions/write_delta_append.py,sha256=e1g4mDhwAZdKyt4Gb7ZzHcQrJ1duSl8qOn6ONizRsoM,2934
104
- cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=5Svi9Z7xg2Ey0WPq4MzS509vMDOM7mDGUa5OBaWbjkc,6438
104
+ cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=ndJf9n3VP8YhTSVLgyURnbtOmC5i3YfVQWtfRjBV1j0,6613
105
105
  cloe_nessy/pipeline/actions/write_file.py,sha256=JZ8UZslxUn_ttYt5wDyvtHFq2FqYk3vOR8kvExJI8pk,3212
106
106
  cloe_nessy/pipeline/utils/__init__.py,sha256=xi02UjBMiXWD7b9gDvww4gyRyowb0eRd_6Wbu0F_cro,118
107
107
  cloe_nessy/pipeline/utils/delta_load_utils.py,sha256=KitMNruxePEkecI0h4Jint1JwJpaEog5mCOchMkgan8,1495
@@ -113,6 +113,6 @@ cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_Up
113
113
  cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
114
114
  cloe_nessy/utils/column_names.py,sha256=dCNtm61mc5aLkY2oE4rlfN3VLCrpot6fOESjAZmCmhA,361
115
115
  cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
116
- cloe_nessy-1.0.9.dist-info/METADATA,sha256=imtWtMJfxQToZnS30gWkb_DW3XIahzqkbVjYk3cajVA,3289
117
- cloe_nessy-1.0.9.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
118
- cloe_nessy-1.0.9.dist-info/RECORD,,
116
+ cloe_nessy-1.0.11.dist-info/METADATA,sha256=cvl6CZwjaIn9-vbsnDsX3znHZmlzFga_DE8W_qntpek,3290
117
+ cloe_nessy-1.0.11.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
118
+ cloe_nessy-1.0.11.dist-info/RECORD,,