cloe-nessy 1.0.9__py3-none-any.whl → 1.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cloe_nessy/integration/delta_loader/delta_loader.py +4 -1
- cloe_nessy/models/adapter/unity_catalog_adapter.py +3 -1
- cloe_nessy/pipeline/actions/write_delta_merge.py +2 -0
- cloe_nessy/pipeline/pipeline_parsing_service.py +69 -7
- cloe_nessy/pipeline/pipeline_step.py +17 -0
- {cloe_nessy-1.0.9.dist-info → cloe_nessy-1.0.11.dist-info}/METADATA +1 -1
- {cloe_nessy-1.0.9.dist-info → cloe_nessy-1.0.11.dist-info}/RECORD +8 -8
- {cloe_nessy-1.0.9.dist-info → cloe_nessy-1.0.11.dist-info}/WHEEL +0 -0
|
@@ -39,7 +39,10 @@ class DeltaLoader(ABC, LoggerMixin):
|
|
|
39
39
|
or f"{self.table_identifier.split('.')[0]}.{self.table_identifier.split('.')[1]}.metadata_delta_load"
|
|
40
40
|
)
|
|
41
41
|
table_manager = TableManager()
|
|
42
|
-
table_manager.create_table(
|
|
42
|
+
table_manager.create_table(
|
|
43
|
+
table=DeltaLoaderMetadataTable(identifier=self.metadata_table_identifier),
|
|
44
|
+
ignore_if_exists=True,
|
|
45
|
+
)
|
|
43
46
|
|
|
44
47
|
@abstractmethod
|
|
45
48
|
def read_data(
|
|
@@ -18,7 +18,7 @@ class UnityCatalogAdapter(LoggerMixin):
|
|
|
18
18
|
"""Initializes the UnityCatalogAdapter class."""
|
|
19
19
|
self._spark = spark or SessionManager.get_spark_session()
|
|
20
20
|
self._console_logger = self.get_console_logger()
|
|
21
|
-
self._catalogs =
|
|
21
|
+
self._catalogs: list[Catalog] | None = None
|
|
22
22
|
|
|
23
23
|
def _execute_sql(self, query):
|
|
24
24
|
"""Execute a SQL query and return a DataFrame.
|
|
@@ -57,6 +57,8 @@ class UnityCatalogAdapter(LoggerMixin):
|
|
|
57
57
|
Returns:
|
|
58
58
|
The Catalog with the specified name.
|
|
59
59
|
"""
|
|
60
|
+
if self._catalogs is None:
|
|
61
|
+
self._catalogs: list[Catalog] = self.get_catalogs()
|
|
60
62
|
for catalog in self._catalogs:
|
|
61
63
|
if catalog.name == name:
|
|
62
64
|
return catalog
|
|
@@ -106,6 +106,8 @@ class WriteDeltaMergeAction(PipelineAction):
|
|
|
106
106
|
raise ValueError("Data is required for the merge operation.")
|
|
107
107
|
if context.table_metadata is None and table_identifier is None:
|
|
108
108
|
raise ValueError("Table metadata or a table identifier are required for the merge operation.")
|
|
109
|
+
if context.table_metadata is not None and table_identifier is not None:
|
|
110
|
+
raise ValueError("Provide either table metadata or a table identifier, not both.")
|
|
109
111
|
|
|
110
112
|
if table_identifier is not None:
|
|
111
113
|
context.table_metadata = UnityCatalogAdapter().get_table_by_name(table_identifier)
|
|
@@ -64,15 +64,64 @@ class PipelineParsingService:
|
|
|
64
64
|
if not yaml_str:
|
|
65
65
|
raise ValueError("YAML content is empty.")
|
|
66
66
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
67
|
+
try:
|
|
68
|
+
secrets_repl_yaml_str = PipelineParsingService._replace_secret_refs(yaml_str)
|
|
69
|
+
fixed_yaml_str = PipelineParsingService._fix_yaml_str_with_templates(secrets_repl_yaml_str)
|
|
70
|
+
PipelineParsingService._validate_yaml(fixed_yaml_str)
|
|
71
|
+
config = yaml.safe_load(fixed_yaml_str)
|
|
72
|
+
except yaml.YAMLError as e:
|
|
73
|
+
console_logger.error("Failed to parse YAML. Check for syntax errors or unquoted template variables.")
|
|
74
|
+
console_logger.error("Original error: %s", str(e))
|
|
75
|
+
raise ValueError(
|
|
76
|
+
f"YAML parsing failed. Ensure all template variables like {{env:VAR}} are "
|
|
77
|
+
f"properly formatted and YAML syntax is correct. Error: {e}"
|
|
78
|
+
) from e
|
|
70
79
|
pipeline_config = PipelineConfig.metadata_to_instance(config)
|
|
71
80
|
steps = PipelineParsingService._get_steps(pipeline_config.steps, pipeline_config.env)
|
|
72
81
|
pipeline = Pipeline(name=pipeline_config.name, steps=steps) # type: ignore
|
|
73
|
-
console_logger.info(
|
|
82
|
+
console_logger.info(
|
|
83
|
+
"Pipeline [ '%s' ] parsed successfully with %d steps.",
|
|
84
|
+
pipeline.name,
|
|
85
|
+
len(pipeline.steps),
|
|
86
|
+
)
|
|
74
87
|
return pipeline
|
|
75
88
|
|
|
89
|
+
@staticmethod
|
|
90
|
+
def _validate_yaml(yaml_str: str) -> None:
|
|
91
|
+
"""Validates YAML string including checking for duplicate keys.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
yaml_str: A string that can be parsed in YAML format.
|
|
95
|
+
|
|
96
|
+
Raises:
|
|
97
|
+
yaml.YAMLError: If the YAML is invalid.
|
|
98
|
+
ValueError: If duplicate keys are found.
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
def no_duplicates_constructor(loader, node, deep=False):
|
|
102
|
+
"""Check for duplicate keys in YAML mappings."""
|
|
103
|
+
mapping = {}
|
|
104
|
+
for key_node, value_node in node.value:
|
|
105
|
+
key = loader.construct_object(key_node, deep=deep)
|
|
106
|
+
if key in mapping:
|
|
107
|
+
raise ValueError(f"Duplicate key found in YAML: '{key}'")
|
|
108
|
+
value = loader.construct_object(value_node, deep=deep)
|
|
109
|
+
mapping[key] = value
|
|
110
|
+
return mapping
|
|
111
|
+
|
|
112
|
+
class DuplicateCheckLoader(yaml.SafeLoader):
|
|
113
|
+
pass
|
|
114
|
+
|
|
115
|
+
DuplicateCheckLoader.add_constructor(
|
|
116
|
+
yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
|
|
117
|
+
no_duplicates_constructor,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
try:
|
|
121
|
+
yaml.load(yaml_str, Loader=DuplicateCheckLoader)
|
|
122
|
+
except yaml.YAMLError as e:
|
|
123
|
+
raise yaml.YAMLError(f"Invalid YAML: {e}") from e
|
|
124
|
+
|
|
76
125
|
@staticmethod
|
|
77
126
|
def _get_steps(
|
|
78
127
|
step_configs: OrderedDict[str, PipelineStepConfig],
|
|
@@ -211,11 +260,24 @@ class PipelineParsingService:
|
|
|
211
260
|
|
|
212
261
|
@staticmethod
|
|
213
262
|
def _fix_yaml_str_with_templates(yaml_str: str) -> str:
|
|
214
|
-
"""Fixes unquoted
|
|
215
|
-
|
|
263
|
+
"""Fixes unquoted template patterns before yaml.safe_load.
|
|
264
|
+
|
|
265
|
+
Handles:
|
|
266
|
+
- {{env:...}} - environment variables
|
|
267
|
+
- ((step:...)) - step references
|
|
268
|
+
|
|
269
|
+
Skips values already quoted with ' or ".
|
|
270
|
+
"""
|
|
271
|
+
# Match any unquoted template pattern (env or step)
|
|
272
|
+
# This regex matches: colon + optional whitespace + unquoted value containing template
|
|
273
|
+
unquoted_template = re.compile(
|
|
274
|
+
r"(:)\s*(?!['\"])([^\s#]*(?:\{\{env:[^}]+\}\}|\(\(step:[^)]+\)\))[^\s#]*)", re.MULTILINE
|
|
275
|
+
)
|
|
216
276
|
|
|
217
277
|
def replacer(match):
|
|
218
278
|
colon, template = match.groups()
|
|
219
|
-
|
|
279
|
+
# Escape any double quotes in the template value
|
|
280
|
+
escaped_template = template.replace('"', '\\"')
|
|
281
|
+
return f'{colon} "{escaped_template}"'
|
|
220
282
|
|
|
221
283
|
return unquoted_template.sub(replacer, yaml_str)
|
|
@@ -36,6 +36,7 @@ class PipelineStep:
|
|
|
36
36
|
_table_metadata_ref: str | None = None
|
|
37
37
|
|
|
38
38
|
def __post_init__(self) -> None:
|
|
39
|
+
self._validate_step_name()
|
|
39
40
|
if not isinstance(self.action, PipelineAction):
|
|
40
41
|
raise ValueError("action must be a PipelineAction subclass.")
|
|
41
42
|
if self._context_ref:
|
|
@@ -47,6 +48,22 @@ class PipelineStep:
|
|
|
47
48
|
if isinstance(val, PipelineStep):
|
|
48
49
|
self._predecessors.add(val.name)
|
|
49
50
|
|
|
51
|
+
def _validate_step_name(self) -> None:
|
|
52
|
+
"""Validates that the step name does not contain forbidden characters.
|
|
53
|
+
|
|
54
|
+
These character have lead to parsing issues when using joins and unions
|
|
55
|
+
in the past.
|
|
56
|
+
|
|
57
|
+
Raises:
|
|
58
|
+
ValueError: If step name contains brackets or parentheses.
|
|
59
|
+
"""
|
|
60
|
+
invalid_chars = ["[", "]", "(", ")", "{", "}"]
|
|
61
|
+
for char in invalid_chars:
|
|
62
|
+
if char in self.name:
|
|
63
|
+
raise ValueError(
|
|
64
|
+
f"Invalid step name '{self.name}': step names cannot contain brackets or parentheses ({', '.join(invalid_chars)})"
|
|
65
|
+
)
|
|
66
|
+
|
|
50
67
|
def run(self) -> None:
|
|
51
68
|
"""Execute the action on the context."""
|
|
52
69
|
self.result = self.action.run(context=self.context, **self.options)
|
|
@@ -21,7 +21,7 @@ cloe_nessy/file_utilities/strategies/utils_strategy.py,sha256=w4nrS6IcPPN7UBFBws
|
|
|
21
21
|
cloe_nessy/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
22
|
cloe_nessy/integration/delta_loader/__init__.py,sha256=ZdBDde1uPtTCL_KAhilVmtVmmGvH5dHb05QsOozkteE,438
|
|
23
23
|
cloe_nessy/integration/delta_loader/delta_load_options.py,sha256=W3P3gwtkBW3pWbUEOK6agvJfj7QivXhEr_s0dwxhJxQ,1369
|
|
24
|
-
cloe_nessy/integration/delta_loader/delta_loader.py,sha256=
|
|
24
|
+
cloe_nessy/integration/delta_loader/delta_loader.py,sha256=2uSsfFpL3R_Qovx-bOYnhWh3x48r3Otp9xFv-gyoZy8,7004
|
|
25
25
|
cloe_nessy/integration/delta_loader/delta_loader_factory.py,sha256=vB1cL6-Nc3SkLH1xtazMbMF1MnNYq8-g3GHZzRE3QmE,2251
|
|
26
26
|
cloe_nessy/integration/delta_loader/delta_loader_metadata_table.py,sha256=G_EWUY76ZlbsPZB9LCGlOLVezk7DK6peYXEgt7-sTQE,1683
|
|
27
27
|
cloe_nessy/integration/delta_loader/strategies/__init__.py,sha256=1o5fRWenL5KnUg1hf7kmTuTpG9pbMxchiQTub52Qvwo,255
|
|
@@ -56,7 +56,7 @@ cloe_nessy/models/table.py,sha256=-MQLwKthcO4JE99lbikJL1cnt27aEZmt3o96R9CtoTU,12
|
|
|
56
56
|
cloe_nessy/models/types.py,sha256=XRbuJGdTNa6aXyE3IAzs_J9gVjbfkzMDLfGl-k6jI_4,223
|
|
57
57
|
cloe_nessy/models/volume.py,sha256=BPEy6DW0Yig-mFQiNgm9P5JoO6Vcvtxcipnk2-H8Q_k,2456
|
|
58
58
|
cloe_nessy/models/adapter/__init__.py,sha256=m36W_mqwB3dCYnCIt0fLOSHS4E1VU8FRGoaum4Gf95o,90
|
|
59
|
-
cloe_nessy/models/adapter/unity_catalog_adapter.py,sha256=
|
|
59
|
+
cloe_nessy/models/adapter/unity_catalog_adapter.py,sha256=75QNiYgvWGG2zhxslNyABIZRuKtgTP7DlYrF5PdfsWQ,12757
|
|
60
60
|
cloe_nessy/models/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
61
61
|
cloe_nessy/models/mixins/read_instance_mixin.py,sha256=HT42qor6IltR-BTfe_DYa5ylntE7qbArs-3cSjo4KXU,4649
|
|
62
62
|
cloe_nessy/models/mixins/template_loader_mixin.py,sha256=5MXhEGBFlq3dwZvINEyBowSlipNnVun2H_TmhI_fsS4,549
|
|
@@ -71,9 +71,9 @@ cloe_nessy/pipeline/pipeline_action.py,sha256=S7IVFdmG12fRBzHuE_DiWn7qlMtApz6Ilo
|
|
|
71
71
|
cloe_nessy/pipeline/pipeline_builder.py,sha256=_BBl43two0pherkTXZ-Yrpt6XcLW8Q-Z98qxbFIsMao,7929
|
|
72
72
|
cloe_nessy/pipeline/pipeline_config.py,sha256=oVQ-IH4etTGZVVEnE-5iDPLYOtWpvDlltWFv1nevnqQ,3229
|
|
73
73
|
cloe_nessy/pipeline/pipeline_context.py,sha256=eCOcjyE16rGRom3L85Gy_BbncfQD6i1x31yrWqZws-4,1881
|
|
74
|
-
cloe_nessy/pipeline/pipeline_parsing_service.py,sha256=
|
|
74
|
+
cloe_nessy/pipeline/pipeline_parsing_service.py,sha256=XJu2v4qzSBMuLRZs35CIabLPQHY3zV42Vurf4VuN9v0,11363
|
|
75
75
|
cloe_nessy/pipeline/pipeline_plotting_service.py,sha256=OjdYDQW19yXjdcqs7bDMlyWmv0cQz7Qn4I2cakBgN2E,13139
|
|
76
|
-
cloe_nessy/pipeline/pipeline_step.py,sha256=
|
|
76
|
+
cloe_nessy/pipeline/pipeline_step.py,sha256=ui3hDOAuuX-j62gg6o8RogHzCjZHNsTYWpLZwO76VhE,2753
|
|
77
77
|
cloe_nessy/pipeline/actions/__init__.py,sha256=FfAnSIl-0T6pnaWhClkDqV8nfTdvLvZZJdwycsZMLPw,2990
|
|
78
78
|
cloe_nessy/pipeline/actions/read_api.py,sha256=YMOWPCyxitU5v6HHH_AZqpbHQenPU4-WlaOrb-NsIIk,16245
|
|
79
79
|
cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=MK67NIB5qXTDwCC0EFwHYoOkelFnwY6Z4REpC5BlFb4,6359
|
|
@@ -101,7 +101,7 @@ cloe_nessy/pipeline/actions/transform_union.py,sha256=SZtEzh567CIExUj9yMEgshE28h
|
|
|
101
101
|
cloe_nessy/pipeline/actions/transform_with_column.py,sha256=5EG_H4MTiQVCfgkjfyof4qqw9xB3MJh-v0lsoYlbVCU,4967
|
|
102
102
|
cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=rYK_V08D8w4CBClRBGwj18amhZOVmP6VTTO-xqy7We8,4849
|
|
103
103
|
cloe_nessy/pipeline/actions/write_delta_append.py,sha256=e1g4mDhwAZdKyt4Gb7ZzHcQrJ1duSl8qOn6ONizRsoM,2934
|
|
104
|
-
cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=
|
|
104
|
+
cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=ndJf9n3VP8YhTSVLgyURnbtOmC5i3YfVQWtfRjBV1j0,6613
|
|
105
105
|
cloe_nessy/pipeline/actions/write_file.py,sha256=JZ8UZslxUn_ttYt5wDyvtHFq2FqYk3vOR8kvExJI8pk,3212
|
|
106
106
|
cloe_nessy/pipeline/utils/__init__.py,sha256=xi02UjBMiXWD7b9gDvww4gyRyowb0eRd_6Wbu0F_cro,118
|
|
107
107
|
cloe_nessy/pipeline/utils/delta_load_utils.py,sha256=KitMNruxePEkecI0h4Jint1JwJpaEog5mCOchMkgan8,1495
|
|
@@ -113,6 +113,6 @@ cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_Up
|
|
|
113
113
|
cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
114
114
|
cloe_nessy/utils/column_names.py,sha256=dCNtm61mc5aLkY2oE4rlfN3VLCrpot6fOESjAZmCmhA,361
|
|
115
115
|
cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
|
|
116
|
-
cloe_nessy-1.0.
|
|
117
|
-
cloe_nessy-1.0.
|
|
118
|
-
cloe_nessy-1.0.
|
|
116
|
+
cloe_nessy-1.0.11.dist-info/METADATA,sha256=cvl6CZwjaIn9-vbsnDsX3znHZmlzFga_DE8W_qntpek,3290
|
|
117
|
+
cloe_nessy-1.0.11.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
118
|
+
cloe_nessy-1.0.11.dist-info/RECORD,,
|
|
File without changes
|