cloe-nessy 1.0.10__py3-none-any.whl → 1.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cloe_nessy/logging/logger_mixin.py +1 -0
- cloe_nessy/models/adapter/unity_catalog_adapter.py +3 -1
- cloe_nessy/pipeline/actions/write_delta_merge.py +2 -0
- cloe_nessy/pipeline/pipeline.py +3 -3
- cloe_nessy/pipeline/pipeline_parsing_service.py +42 -1
- cloe_nessy/pipeline/pipeline_step.py +17 -0
- {cloe_nessy-1.0.10.dist-info → cloe_nessy-1.0.12.dist-info}/METADATA +2 -2
- {cloe_nessy-1.0.10.dist-info → cloe_nessy-1.0.12.dist-info}/RECORD +9 -9
- {cloe_nessy-1.0.10.dist-info → cloe_nessy-1.0.12.dist-info}/WHEEL +0 -0
|
@@ -34,6 +34,7 @@ class LoggerMixin:
|
|
|
34
34
|
logger_name=f"Console:{self.__class__.__name__}",
|
|
35
35
|
logging_level=level if level is not None else logging_settings.log_level_console,
|
|
36
36
|
log_format=log_format if log_format is not None else logging_settings.log_format_console,
|
|
37
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
37
38
|
)
|
|
38
39
|
return logger
|
|
39
40
|
|
|
@@ -18,7 +18,7 @@ class UnityCatalogAdapter(LoggerMixin):
|
|
|
18
18
|
"""Initializes the UnityCatalogAdapter class."""
|
|
19
19
|
self._spark = spark or SessionManager.get_spark_session()
|
|
20
20
|
self._console_logger = self.get_console_logger()
|
|
21
|
-
self._catalogs =
|
|
21
|
+
self._catalogs: list[Catalog] | None = None
|
|
22
22
|
|
|
23
23
|
def _execute_sql(self, query):
|
|
24
24
|
"""Execute a SQL query and return a DataFrame.
|
|
@@ -57,6 +57,8 @@ class UnityCatalogAdapter(LoggerMixin):
|
|
|
57
57
|
Returns:
|
|
58
58
|
The Catalog with the specified name.
|
|
59
59
|
"""
|
|
60
|
+
if self._catalogs is None:
|
|
61
|
+
self._catalogs: list[Catalog] = self.get_catalogs()
|
|
60
62
|
for catalog in self._catalogs:
|
|
61
63
|
if catalog.name == name:
|
|
62
64
|
return catalog
|
|
@@ -106,6 +106,8 @@ class WriteDeltaMergeAction(PipelineAction):
|
|
|
106
106
|
raise ValueError("Data is required for the merge operation.")
|
|
107
107
|
if context.table_metadata is None and table_identifier is None:
|
|
108
108
|
raise ValueError("Table metadata or a table identifier are required for the merge operation.")
|
|
109
|
+
if context.table_metadata is not None and table_identifier is not None:
|
|
110
|
+
raise ValueError("Provide either table metadata or a table identifier, not both.")
|
|
109
111
|
|
|
110
112
|
if table_identifier is not None:
|
|
111
113
|
context.table_metadata = UnityCatalogAdapter().get_table_by_name(table_identifier)
|
cloe_nessy/pipeline/pipeline.py
CHANGED
|
@@ -56,13 +56,13 @@ class Pipeline(LoggerMixin):
|
|
|
56
56
|
step.context.table_metadata = self.steps[step._table_metadata_ref].result.table_metadata
|
|
57
57
|
|
|
58
58
|
try:
|
|
59
|
-
self._console_logger.info(f"Starting execution of step: {step.name}")
|
|
59
|
+
self._console_logger.info(f">>> Starting execution of step: {step.name}")
|
|
60
60
|
step.run()
|
|
61
61
|
except Exception as err:
|
|
62
|
-
self._console_logger.error(f"Execution of step {step.name} failed with error: {str(err)}")
|
|
62
|
+
self._console_logger.error(f">>> Execution of step {step.name} failed with error: {str(err)}")
|
|
63
63
|
raise err
|
|
64
64
|
else:
|
|
65
|
-
self._console_logger.info(f"Execution of step {step.name} succeeded.")
|
|
65
|
+
self._console_logger.info(f">>> Execution of step {step.name} succeeded.")
|
|
66
66
|
|
|
67
67
|
def _get_ready_to_run_steps(self, remaining_steps: list[str], g: nx.DiGraph) -> set[str]:
|
|
68
68
|
"""Identifies and returns the steps that are ready to run.
|
|
@@ -67,6 +67,7 @@ class PipelineParsingService:
|
|
|
67
67
|
try:
|
|
68
68
|
secrets_repl_yaml_str = PipelineParsingService._replace_secret_refs(yaml_str)
|
|
69
69
|
fixed_yaml_str = PipelineParsingService._fix_yaml_str_with_templates(secrets_repl_yaml_str)
|
|
70
|
+
PipelineParsingService._validate_yaml(fixed_yaml_str)
|
|
70
71
|
config = yaml.safe_load(fixed_yaml_str)
|
|
71
72
|
except yaml.YAMLError as e:
|
|
72
73
|
console_logger.error("Failed to parse YAML. Check for syntax errors or unquoted template variables.")
|
|
@@ -78,9 +79,49 @@ class PipelineParsingService:
|
|
|
78
79
|
pipeline_config = PipelineConfig.metadata_to_instance(config)
|
|
79
80
|
steps = PipelineParsingService._get_steps(pipeline_config.steps, pipeline_config.env)
|
|
80
81
|
pipeline = Pipeline(name=pipeline_config.name, steps=steps) # type: ignore
|
|
81
|
-
console_logger.info(
|
|
82
|
+
console_logger.info(
|
|
83
|
+
"Pipeline [ '%s' ] parsed successfully with %d steps.",
|
|
84
|
+
pipeline.name,
|
|
85
|
+
len(pipeline.steps),
|
|
86
|
+
)
|
|
82
87
|
return pipeline
|
|
83
88
|
|
|
89
|
+
@staticmethod
|
|
90
|
+
def _validate_yaml(yaml_str: str) -> None:
|
|
91
|
+
"""Validates YAML string including checking for duplicate keys.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
yaml_str: A string that can be parsed in YAML format.
|
|
95
|
+
|
|
96
|
+
Raises:
|
|
97
|
+
yaml.YAMLError: If the YAML is invalid.
|
|
98
|
+
ValueError: If duplicate keys are found.
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
def no_duplicates_constructor(loader, node, deep=False):
|
|
102
|
+
"""Check for duplicate keys in YAML mappings."""
|
|
103
|
+
mapping = {}
|
|
104
|
+
for key_node, value_node in node.value:
|
|
105
|
+
key = loader.construct_object(key_node, deep=deep)
|
|
106
|
+
if key in mapping:
|
|
107
|
+
raise ValueError(f"Duplicate key found in YAML: '{key}'")
|
|
108
|
+
value = loader.construct_object(value_node, deep=deep)
|
|
109
|
+
mapping[key] = value
|
|
110
|
+
return mapping
|
|
111
|
+
|
|
112
|
+
class DuplicateCheckLoader(yaml.SafeLoader):
|
|
113
|
+
pass
|
|
114
|
+
|
|
115
|
+
DuplicateCheckLoader.add_constructor(
|
|
116
|
+
yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
|
|
117
|
+
no_duplicates_constructor,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
try:
|
|
121
|
+
yaml.load(yaml_str, Loader=DuplicateCheckLoader)
|
|
122
|
+
except yaml.YAMLError as e:
|
|
123
|
+
raise yaml.YAMLError(f"Invalid YAML: {e}") from e
|
|
124
|
+
|
|
84
125
|
@staticmethod
|
|
85
126
|
def _get_steps(
|
|
86
127
|
step_configs: OrderedDict[str, PipelineStepConfig],
|
|
@@ -36,6 +36,7 @@ class PipelineStep:
|
|
|
36
36
|
_table_metadata_ref: str | None = None
|
|
37
37
|
|
|
38
38
|
def __post_init__(self) -> None:
|
|
39
|
+
self._validate_step_name()
|
|
39
40
|
if not isinstance(self.action, PipelineAction):
|
|
40
41
|
raise ValueError("action must be a PipelineAction subclass.")
|
|
41
42
|
if self._context_ref:
|
|
@@ -47,6 +48,22 @@ class PipelineStep:
|
|
|
47
48
|
if isinstance(val, PipelineStep):
|
|
48
49
|
self._predecessors.add(val.name)
|
|
49
50
|
|
|
51
|
+
def _validate_step_name(self) -> None:
|
|
52
|
+
"""Validates that the step name does not contain forbidden characters.
|
|
53
|
+
|
|
54
|
+
These character have lead to parsing issues when using joins and unions
|
|
55
|
+
in the past.
|
|
56
|
+
|
|
57
|
+
Raises:
|
|
58
|
+
ValueError: If step name contains brackets or parentheses.
|
|
59
|
+
"""
|
|
60
|
+
invalid_chars = ["[", "]", "(", ")", "{", "}"]
|
|
61
|
+
for char in invalid_chars:
|
|
62
|
+
if char in self.name:
|
|
63
|
+
raise ValueError(
|
|
64
|
+
f"Invalid step name '{self.name}': step names cannot contain brackets or parentheses ({', '.join(invalid_chars)})"
|
|
65
|
+
)
|
|
66
|
+
|
|
50
67
|
def run(self) -> None:
|
|
51
68
|
"""Execute the action on the context."""
|
|
52
69
|
self.result = self.action.run(context=self.context, **self.options)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cloe-nessy
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.12
|
|
4
4
|
Summary: Your friendly datalake monster.
|
|
5
5
|
Project-URL: homepage, https://initions.com/
|
|
6
6
|
Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
|
|
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3
|
|
|
14
14
|
Classifier: Topic :: Database
|
|
15
15
|
Requires-Python: <3.14,>=3.11
|
|
16
16
|
Requires-Dist: azure-identity<2.0.0,>=1.19.0
|
|
17
|
-
Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.
|
|
17
|
+
Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.11
|
|
18
18
|
Requires-Dist: databricks-sdk<1.0.0,>=0.36.0
|
|
19
19
|
Requires-Dist: fsspec<2026.1.1,>=2026.1.0
|
|
20
20
|
Requires-Dist: httpx<1.0.0,>=0.27.2
|
|
@@ -45,7 +45,7 @@ cloe_nessy/integration/writer/delta_writer/delta_table_operation_type.py,sha256=
|
|
|
45
45
|
cloe_nessy/integration/writer/delta_writer/delta_writer_base.py,sha256=B7PwPHKrsJL0ZxBT-H9wWSy0gn7shqNDJ0AbrpMHyMg,10135
|
|
46
46
|
cloe_nessy/integration/writer/delta_writer/exceptions.py,sha256=xPmGiYV0xQXauln5Oh34E5vbm0rVcs6xCh-SJSb2bw0,107
|
|
47
47
|
cloe_nessy/logging/__init__.py,sha256=ySVCVbdyR3Dno_tl2ZfiER_7EVaDoQMHVkNyfdMZumY,65
|
|
48
|
-
cloe_nessy/logging/logger_mixin.py,sha256=
|
|
48
|
+
cloe_nessy/logging/logger_mixin.py,sha256=Ws3sirAi7CVyWWc_ClQs0wzdMIJ1LNNrMMkFlO1V0dw,7450
|
|
49
49
|
cloe_nessy/models/__init__.py,sha256=-FmWEJ1Oq1njSopjc0R7GmT64mLSmALkm8PkHNzy9Y8,327
|
|
50
50
|
cloe_nessy/models/catalog.py,sha256=ayC1sMp4cNLAZtu0ICVV3Us6-o4hn8U9tpzzvxC9RAs,177
|
|
51
51
|
cloe_nessy/models/column.py,sha256=W4V1Ls1d60VyZ1Ko9Yu9eSipcMbxSzKicn0aloHPiR0,2027
|
|
@@ -56,7 +56,7 @@ cloe_nessy/models/table.py,sha256=-MQLwKthcO4JE99lbikJL1cnt27aEZmt3o96R9CtoTU,12
|
|
|
56
56
|
cloe_nessy/models/types.py,sha256=XRbuJGdTNa6aXyE3IAzs_J9gVjbfkzMDLfGl-k6jI_4,223
|
|
57
57
|
cloe_nessy/models/volume.py,sha256=BPEy6DW0Yig-mFQiNgm9P5JoO6Vcvtxcipnk2-H8Q_k,2456
|
|
58
58
|
cloe_nessy/models/adapter/__init__.py,sha256=m36W_mqwB3dCYnCIt0fLOSHS4E1VU8FRGoaum4Gf95o,90
|
|
59
|
-
cloe_nessy/models/adapter/unity_catalog_adapter.py,sha256=
|
|
59
|
+
cloe_nessy/models/adapter/unity_catalog_adapter.py,sha256=75QNiYgvWGG2zhxslNyABIZRuKtgTP7DlYrF5PdfsWQ,12757
|
|
60
60
|
cloe_nessy/models/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
61
61
|
cloe_nessy/models/mixins/read_instance_mixin.py,sha256=HT42qor6IltR-BTfe_DYa5ylntE7qbArs-3cSjo4KXU,4649
|
|
62
62
|
cloe_nessy/models/mixins/template_loader_mixin.py,sha256=5MXhEGBFlq3dwZvINEyBowSlipNnVun2H_TmhI_fsS4,549
|
|
@@ -66,14 +66,14 @@ cloe_nessy/object_manager/__init__.py,sha256=3sle0vNpPwBOkycxA3XVS9m4XZf5LD3Qd4N
|
|
|
66
66
|
cloe_nessy/object_manager/table_manager.py,sha256=M4JWgga7A8MHll5QJ42TUU5X4eUrQzhbwaZtVBgoQPY,13902
|
|
67
67
|
cloe_nessy/object_manager/volume_manager.py,sha256=6epd3KXzcNH04EvaKubAfLsaUm9qBMeT3KNvMK04gGs,2727
|
|
68
68
|
cloe_nessy/pipeline/__init__.py,sha256=BUzL4HJaCXWmK7OgKaxdwK72JrrdzfzIvyxOGtM28U0,417
|
|
69
|
-
cloe_nessy/pipeline/pipeline.py,sha256
|
|
69
|
+
cloe_nessy/pipeline/pipeline.py,sha256=-JcdRL8PV1Leo0984Z2jOJ4EL2nuMwnqX4rIwu4JxEg,10831
|
|
70
70
|
cloe_nessy/pipeline/pipeline_action.py,sha256=S7IVFdmG12fRBzHuE_DiWn7qlMtApz6IloVd2Fj31Sg,1944
|
|
71
71
|
cloe_nessy/pipeline/pipeline_builder.py,sha256=_BBl43two0pherkTXZ-Yrpt6XcLW8Q-Z98qxbFIsMao,7929
|
|
72
72
|
cloe_nessy/pipeline/pipeline_config.py,sha256=oVQ-IH4etTGZVVEnE-5iDPLYOtWpvDlltWFv1nevnqQ,3229
|
|
73
73
|
cloe_nessy/pipeline/pipeline_context.py,sha256=eCOcjyE16rGRom3L85Gy_BbncfQD6i1x31yrWqZws-4,1881
|
|
74
|
-
cloe_nessy/pipeline/pipeline_parsing_service.py,sha256=
|
|
74
|
+
cloe_nessy/pipeline/pipeline_parsing_service.py,sha256=XJu2v4qzSBMuLRZs35CIabLPQHY3zV42Vurf4VuN9v0,11363
|
|
75
75
|
cloe_nessy/pipeline/pipeline_plotting_service.py,sha256=OjdYDQW19yXjdcqs7bDMlyWmv0cQz7Qn4I2cakBgN2E,13139
|
|
76
|
-
cloe_nessy/pipeline/pipeline_step.py,sha256=
|
|
76
|
+
cloe_nessy/pipeline/pipeline_step.py,sha256=ui3hDOAuuX-j62gg6o8RogHzCjZHNsTYWpLZwO76VhE,2753
|
|
77
77
|
cloe_nessy/pipeline/actions/__init__.py,sha256=FfAnSIl-0T6pnaWhClkDqV8nfTdvLvZZJdwycsZMLPw,2990
|
|
78
78
|
cloe_nessy/pipeline/actions/read_api.py,sha256=YMOWPCyxitU5v6HHH_AZqpbHQenPU4-WlaOrb-NsIIk,16245
|
|
79
79
|
cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=MK67NIB5qXTDwCC0EFwHYoOkelFnwY6Z4REpC5BlFb4,6359
|
|
@@ -101,7 +101,7 @@ cloe_nessy/pipeline/actions/transform_union.py,sha256=SZtEzh567CIExUj9yMEgshE28h
|
|
|
101
101
|
cloe_nessy/pipeline/actions/transform_with_column.py,sha256=5EG_H4MTiQVCfgkjfyof4qqw9xB3MJh-v0lsoYlbVCU,4967
|
|
102
102
|
cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=rYK_V08D8w4CBClRBGwj18amhZOVmP6VTTO-xqy7We8,4849
|
|
103
103
|
cloe_nessy/pipeline/actions/write_delta_append.py,sha256=e1g4mDhwAZdKyt4Gb7ZzHcQrJ1duSl8qOn6ONizRsoM,2934
|
|
104
|
-
cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=
|
|
104
|
+
cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=ndJf9n3VP8YhTSVLgyURnbtOmC5i3YfVQWtfRjBV1j0,6613
|
|
105
105
|
cloe_nessy/pipeline/actions/write_file.py,sha256=JZ8UZslxUn_ttYt5wDyvtHFq2FqYk3vOR8kvExJI8pk,3212
|
|
106
106
|
cloe_nessy/pipeline/utils/__init__.py,sha256=xi02UjBMiXWD7b9gDvww4gyRyowb0eRd_6Wbu0F_cro,118
|
|
107
107
|
cloe_nessy/pipeline/utils/delta_load_utils.py,sha256=KitMNruxePEkecI0h4Jint1JwJpaEog5mCOchMkgan8,1495
|
|
@@ -113,6 +113,6 @@ cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_Up
|
|
|
113
113
|
cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
114
114
|
cloe_nessy/utils/column_names.py,sha256=dCNtm61mc5aLkY2oE4rlfN3VLCrpot6fOESjAZmCmhA,361
|
|
115
115
|
cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
|
|
116
|
-
cloe_nessy-1.0.
|
|
117
|
-
cloe_nessy-1.0.
|
|
118
|
-
cloe_nessy-1.0.
|
|
116
|
+
cloe_nessy-1.0.12.dist-info/METADATA,sha256=W3Gs63IM-gG61xJVlCgs7R3FB9VcI94VRdaFfElczUg,3291
|
|
117
|
+
cloe_nessy-1.0.12.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
118
|
+
cloe_nessy-1.0.12.dist-info/RECORD,,
|
|
File without changes
|