cloe-nessy 0.3.13.2b0__py3-none-any.whl → 0.3.13.4b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py +5 -1
- cloe_nessy/object_manager/table_manager.py +27 -5
- cloe_nessy/pipeline/actions/read_metadata_yaml.py +61 -33
- cloe_nessy/pipeline/actions/write_delta_merge.py +1 -0
- {cloe_nessy-0.3.13.2b0.dist-info → cloe_nessy-0.3.13.4b0.dist-info}/METADATA +17 -17
- {cloe_nessy-0.3.13.2b0.dist-info → cloe_nessy-0.3.13.4b0.dist-info}/RECORD +7 -8
- {cloe_nessy-0.3.13.2b0.dist-info → cloe_nessy-0.3.13.4b0.dist-info}/WHEEL +1 -2
- cloe_nessy-0.3.13.2b0.dist-info/top_level.txt +0 -1
|
@@ -196,7 +196,11 @@ class DeltaMergeWriter(BaseDeltaWriter):
|
|
|
196
196
|
|
|
197
197
|
config = DeltaMergeConfig(dataframe_columns=data_frame.columns, **kwargs)
|
|
198
198
|
|
|
199
|
-
delta_table = self.table_manager.get_delta_table(
|
|
199
|
+
delta_table = self.table_manager.get_delta_table(
|
|
200
|
+
table=table,
|
|
201
|
+
location=storage_path,
|
|
202
|
+
spark=data_frame.sparkSession,
|
|
203
|
+
)
|
|
200
204
|
|
|
201
205
|
match_conditions = self._build_match_conditions(data_frame, config)
|
|
202
206
|
|
|
@@ -186,6 +186,9 @@ class TableManager(LoggerMixin):
|
|
|
186
186
|
def get_delta_table(self, table: Table | None = None, location: str | None = None, spark=None) -> DeltaTable:
|
|
187
187
|
"""Get the DeltaTable object from the Table objects location or a location string.
|
|
188
188
|
|
|
189
|
+
For managed tables, uses the table identifier to access the DeltaTable.
|
|
190
|
+
For external tables or when a location is provided, uses the storage path.
|
|
191
|
+
|
|
189
192
|
Args:
|
|
190
193
|
table: A Table object representing the Delta table.
|
|
191
194
|
location: A string representing the table location.
|
|
@@ -195,15 +198,34 @@ class TableManager(LoggerMixin):
|
|
|
195
198
|
The DeltaTable object corresponding to the given Table object or location string.
|
|
196
199
|
|
|
197
200
|
Raises:
|
|
198
|
-
ValueError: If neither table nor location is provided
|
|
201
|
+
ValueError: If neither table nor location is provided.
|
|
199
202
|
"""
|
|
200
|
-
if
|
|
201
|
-
|
|
203
|
+
if table is None and location is None:
|
|
204
|
+
self._console_logger.error("Invalid parameters: both table and location are None")
|
|
205
|
+
raise ValueError("Either table or location must be provided.")
|
|
206
|
+
|
|
207
|
+
spark_session = spark or self._spark
|
|
208
|
+
|
|
209
|
+
if table is not None and location is not None:
|
|
210
|
+
self._console_logger.info(
|
|
211
|
+
f"Both table ({table.identifier}) and location ({location}) provided. Using table object as priority."
|
|
212
|
+
)
|
|
202
213
|
|
|
203
214
|
if table is not None:
|
|
204
|
-
|
|
215
|
+
if table.is_external is False:
|
|
216
|
+
self._console_logger.info(f"Getting DeltaTable object for managed table: {table.identifier}")
|
|
217
|
+
return DeltaTable.forName(spark_session, table.identifier)
|
|
218
|
+
|
|
219
|
+
table_location = str(table.storage_path)
|
|
220
|
+
self._console_logger.info(f"Getting DeltaTable object for external table location: {table_location}")
|
|
221
|
+
return DeltaTable.forPath(spark_session, table_location)
|
|
222
|
+
|
|
223
|
+
self._console_logger.info(f"No table object provided, using location: {location}")
|
|
224
|
+
if location is None:
|
|
225
|
+
self._console_logger.error("Location is None - this should not happen!")
|
|
226
|
+
raise ValueError("Location cannot be None when no table object is provided")
|
|
205
227
|
self._console_logger.info(f"Getting DeltaTable object for location: {location}")
|
|
206
|
-
return DeltaTable.forPath(
|
|
228
|
+
return DeltaTable.forPath(spark_session, str(location))
|
|
207
229
|
|
|
208
230
|
def table_exists(self, table: Table | None = None, table_identifier: str | None = None) -> bool:
|
|
209
231
|
"""Checks if a table exists in the catalog.
|
|
@@ -1,66 +1,94 @@
|
|
|
1
|
-
import
|
|
1
|
+
from pathlib import Path
|
|
2
2
|
from typing import Any
|
|
3
3
|
|
|
4
|
-
from ...models import
|
|
4
|
+
from ...models import Table
|
|
5
5
|
from ..pipeline_action import PipelineAction
|
|
6
6
|
from ..pipeline_context import PipelineContext
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class ReadMetadataYAMLAction(PipelineAction):
|
|
10
|
-
"""Reads
|
|
10
|
+
"""Reads table metadata from a yaml file using the [`Table`][cloe_nessy.models.table] model.
|
|
11
11
|
|
|
12
12
|
Example:
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
13
|
+
=== "Managed Table"
|
|
14
|
+
```yaml
|
|
15
|
+
Read Table Metadata:
|
|
16
|
+
action: READ_METADATA_YAML_ACTION
|
|
17
|
+
options:
|
|
18
|
+
file_path: metadata/schemas/bronze/sales_table.yml
|
|
19
|
+
catalog_name: production
|
|
20
|
+
schema_name: sales_data
|
|
21
|
+
```
|
|
22
|
+
=== "External Table"
|
|
23
|
+
```yaml
|
|
24
|
+
Read Table Metadata:
|
|
25
|
+
action: READ_METADATA_YAML_ACTION
|
|
26
|
+
options:
|
|
27
|
+
file_path: metadata/schemas/bronze/sales_table.yml
|
|
28
|
+
catalog_name: production
|
|
29
|
+
schema_name: sales_data
|
|
30
|
+
storage_path: abfs://external_storage/sales_data/sales_table
|
|
31
|
+
```
|
|
21
32
|
"""
|
|
22
33
|
|
|
23
34
|
name: str = "READ_METADATA_YAML_ACTION"
|
|
24
35
|
|
|
25
|
-
@staticmethod
|
|
26
36
|
def run(
|
|
37
|
+
self,
|
|
27
38
|
context: PipelineContext,
|
|
28
39
|
*,
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
40
|
+
file_path: str | None = None,
|
|
41
|
+
catalog_name: str | None = None,
|
|
42
|
+
schema_name: str | None = None,
|
|
43
|
+
storage_path: str | None = None,
|
|
32
44
|
**_: Any,
|
|
33
45
|
) -> PipelineContext:
|
|
34
|
-
"""Reads
|
|
46
|
+
"""Reads table metadata from a yaml file using the [`Table`][cloe_nessy.models.table] model.
|
|
35
47
|
|
|
36
48
|
Args:
|
|
37
49
|
context: The context in which this Action is executed.
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
50
|
+
file_path: The path to the file that defines the table.
|
|
51
|
+
catalog_name: The name of the catalog for the table.
|
|
52
|
+
schema_name: The name of the schema for the table.
|
|
53
|
+
storage_path: The storage path for the table, if applicable. If not
|
|
54
|
+
provided, the table will be considered a managed table.
|
|
41
55
|
|
|
42
56
|
Raises:
|
|
43
|
-
ValueError: If any issues occur while reading the
|
|
44
|
-
missing file, or missing
|
|
57
|
+
ValueError: If any issues occur while reading the table metadata, such as an invalid table,
|
|
58
|
+
missing file, missing path, or missing catalog/schema names.
|
|
45
59
|
|
|
46
60
|
Returns:
|
|
47
61
|
The context after the execution of this Action, containing the table metadata.
|
|
48
62
|
"""
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
63
|
+
missing_params = []
|
|
64
|
+
if not file_path:
|
|
65
|
+
missing_params.append("file_path")
|
|
66
|
+
if not catalog_name:
|
|
67
|
+
missing_params.append("catalog_name")
|
|
68
|
+
if not schema_name:
|
|
69
|
+
missing_params.append("schema_name")
|
|
55
70
|
|
|
56
|
-
|
|
71
|
+
if missing_params:
|
|
72
|
+
raise ValueError(
|
|
73
|
+
f"Missing required parameters: {', '.join(missing_params)}. Please specify all required parameters."
|
|
74
|
+
)
|
|
57
75
|
|
|
58
|
-
|
|
76
|
+
final_file_path = Path(file_path) if file_path else Path()
|
|
77
|
+
|
|
78
|
+
table, errors = Table.read_instance_from_file(
|
|
79
|
+
final_file_path,
|
|
80
|
+
catalog_name=catalog_name,
|
|
81
|
+
schema_name=schema_name,
|
|
82
|
+
)
|
|
59
83
|
if errors:
|
|
60
|
-
raise ValueError(f"Errors while reading
|
|
61
|
-
if not
|
|
62
|
-
raise ValueError("No
|
|
84
|
+
raise ValueError(f"Errors while reading table metadata: {errors}")
|
|
85
|
+
if not table:
|
|
86
|
+
raise ValueError("No table found in metadata.")
|
|
63
87
|
|
|
64
|
-
table
|
|
88
|
+
if not table.storage_path and storage_path:
|
|
89
|
+
self._console_logger.info(f"Setting storage path for table [ '{table.name}' ] to [ '{storage_path}' ]")
|
|
90
|
+
table.storage_path = storage_path
|
|
91
|
+
table.is_external = True
|
|
65
92
|
|
|
93
|
+
self._console_logger.info(f"Table [ '{table.name}' ] metadata read successfully from [ '{file_path}' ]")
|
|
66
94
|
return context.from_existing(table_metadata=table)
|
|
@@ -98,6 +98,7 @@ class WriteDeltaMergeAction(PipelineAction):
|
|
|
98
98
|
|
|
99
99
|
delta_merge_writer.write(
|
|
100
100
|
table_identifier=context.table_metadata.identifier,
|
|
101
|
+
table=context.table_metadata,
|
|
101
102
|
storage_path=str(context.table_metadata.storage_path),
|
|
102
103
|
data_frame=context.data,
|
|
103
104
|
key_columns=key_columns,
|
|
@@ -1,36 +1,36 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cloe-nessy
|
|
3
|
-
Version: 0.3.13.
|
|
3
|
+
Version: 0.3.13.4b0
|
|
4
4
|
Summary: Your friendly datalake monster.
|
|
5
|
+
Project-URL: homepage, https://initions.com/
|
|
5
6
|
Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
|
|
6
7
|
License: MIT
|
|
7
|
-
Project-URL: homepage, https://initions.com/
|
|
8
8
|
Classifier: Development Status :: 5 - Production/Stable
|
|
9
9
|
Classifier: Environment :: Console
|
|
10
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
11
10
|
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
12
|
Classifier: Operating System :: OS Independent
|
|
13
13
|
Classifier: Programming Language :: Python :: 3
|
|
14
14
|
Classifier: Topic :: Database
|
|
15
15
|
Requires-Python: <3.13,>=3.11
|
|
16
|
-
Description-Content-Type: text/markdown
|
|
17
|
-
Requires-Dist: pydantic<3.0.0,>=2.7.2
|
|
18
|
-
Requires-Dist: pyyaml<7.0.0,>=6.0.1
|
|
19
|
-
Requires-Dist: types-pyyaml<7.0.0.0,>=6.0.12.20240311
|
|
20
|
-
Requires-Dist: jinja2<4.0.0,>=3.1.4
|
|
21
|
-
Requires-Dist: pydantic-settings<3.0.0,>=2.4.0
|
|
22
|
-
Requires-Dist: openpyxl<4.0.0,>=3.1.5
|
|
23
|
-
Requires-Dist: requests<3.0.0,>=2.32.3
|
|
24
|
-
Requires-Dist: types-requests<3.0.0.0,>=2.32.0.20240712
|
|
25
|
-
Requires-Dist: pandas-stubs<3.0.0.0,>=2.2.2.240807
|
|
26
16
|
Requires-Dist: azure-identity<2.0.0,>=1.19.0
|
|
27
|
-
Requires-Dist:
|
|
17
|
+
Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.8
|
|
28
18
|
Requires-Dist: databricks-sdk<1.0.0,>=0.36.0
|
|
29
|
-
Requires-Dist:
|
|
19
|
+
Requires-Dist: fsspec<2025.6.0,>=2025.5.1
|
|
20
|
+
Requires-Dist: httpx<1.0.0,>=0.27.2
|
|
21
|
+
Requires-Dist: jinja2<4.0.0,>=3.1.4
|
|
30
22
|
Requires-Dist: matplotlib<4.0.0,>=3.9.2
|
|
23
|
+
Requires-Dist: networkx<4.0,>=3.3
|
|
24
|
+
Requires-Dist: openpyxl<4.0.0,>=3.1.5
|
|
25
|
+
Requires-Dist: pandas-stubs<3.0.0.0,>=2.2.2.240807
|
|
26
|
+
Requires-Dist: pydantic-settings<3.0.0,>=2.4.0
|
|
27
|
+
Requires-Dist: pydantic<3.0.0,>=2.7.2
|
|
28
|
+
Requires-Dist: pyyaml<7.0.0,>=6.0.1
|
|
29
|
+
Requires-Dist: requests<3.0.0,>=2.32.3
|
|
31
30
|
Requires-Dist: types-networkx<4.0.0.0,>=3.2.1.20240820
|
|
32
|
-
Requires-Dist:
|
|
33
|
-
Requires-Dist:
|
|
31
|
+
Requires-Dist: types-pyyaml<7.0.0.0,>=6.0.12.20240311
|
|
32
|
+
Requires-Dist: types-requests<3.0.0.0,>=2.32.0.20240712
|
|
33
|
+
Description-Content-Type: text/markdown
|
|
34
34
|
|
|
35
35
|
# cloe-nessy
|
|
36
36
|
|
|
@@ -30,7 +30,7 @@ cloe_nessy/integration/writer/file_writer.py,sha256=SUDbN13ZzDhbM8DpOGFgM_Gkg70T
|
|
|
30
30
|
cloe_nessy/integration/writer/writer.py,sha256=elFPLFrWR-qVE9qnBtzzzhyRALLQcRVuOsPS0rNmRt4,1741
|
|
31
31
|
cloe_nessy/integration/writer/delta_writer/__init__.py,sha256=h2CT6Hllmk0nodlek27uqwniCzVZKMkYcPGyG9K2Z24,164
|
|
32
32
|
cloe_nessy/integration/writer/delta_writer/delta_append_writer.py,sha256=TbpW-j87_H9dcUza34uR6VWslJez406y3_5N1ip0SnM,4740
|
|
33
|
-
cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=
|
|
33
|
+
cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=zhqPIPfAJTzSLFgBUCwFesUW7CcF1zCPRU-N_8yYjok,10172
|
|
34
34
|
cloe_nessy/integration/writer/delta_writer/delta_table_operation_type.py,sha256=kiacqQ2FYQSzakJqZ9-ZHH3os4X7--QuER_2xx9y21k,971
|
|
35
35
|
cloe_nessy/integration/writer/delta_writer/delta_writer_base.py,sha256=upUtDZMzwYFU0kzmkelVgkpFToXkrypcR3h_jvGjz14,8596
|
|
36
36
|
cloe_nessy/integration/writer/delta_writer/exceptions.py,sha256=xPmGiYV0xQXauln5Oh34E5vbm0rVcs6xCh-SJSb2bw0,107
|
|
@@ -53,7 +53,7 @@ cloe_nessy/models/mixins/template_loader_mixin.py,sha256=5MXhEGBFlq3dwZvINEyBowS
|
|
|
53
53
|
cloe_nessy/models/templates/create_table.sql.j2,sha256=QWbiTXwmGaIlZUAIGL4pAlHkDbP9mq1vGAkdKCPOqm4,1669
|
|
54
54
|
cloe_nessy/models/templates/create_volume.sql.j2,sha256=XIUf1cHcvAxcGTyhzUiv4xpQ1cfDw_ra3_FKmOuLoBs,289
|
|
55
55
|
cloe_nessy/object_manager/__init__.py,sha256=3sle0vNpPwBOkycxA3XVS9m4XZf5LD3Qd4NGxdqcHno,186
|
|
56
|
-
cloe_nessy/object_manager/table_manager.py,sha256=
|
|
56
|
+
cloe_nessy/object_manager/table_manager.py,sha256=suHx56TYXagaJ2dVkvTP7vwSI4xgTqXNkHYBbYh2pd4,13913
|
|
57
57
|
cloe_nessy/object_manager/volume_manager.py,sha256=6epd3KXzcNH04EvaKubAfLsaUm9qBMeT3KNvMK04gGs,2727
|
|
58
58
|
cloe_nessy/pipeline/__init__.py,sha256=sespmJ5JsgyiFyZiedTiL2kg--zGIX7cjTYsD5vemEg,325
|
|
59
59
|
cloe_nessy/pipeline/pipeline.py,sha256=-1tJVs9rZf8CcwieH4IP7mqJZ6mL7bQUZ56TNKt8eO8,11154
|
|
@@ -67,7 +67,7 @@ cloe_nessy/pipeline/actions/read_api.py,sha256=RBv5XeHtjTXuCP09Fqo6JNx6iIhQQI-nu
|
|
|
67
67
|
cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=oXbqbc6BfR82dSIGclwzWiTN8EVmpFjNIYLKm4qOU50,2754
|
|
68
68
|
cloe_nessy/pipeline/actions/read_excel.py,sha256=Mhl3r_2Hqk2XN7Fl5WqqAyE4JdnwSiivbhWMglyBtkE,7961
|
|
69
69
|
cloe_nessy/pipeline/actions/read_files.py,sha256=N9bFgtG1tovhp2JayxE5YiN9PiO2lgG2-6h_Y6tD2eU,5220
|
|
70
|
-
cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=
|
|
70
|
+
cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=i8fQceV63eAqx_x0ANisCkXWfMHyhqsfFHVFH5yP2po,3544
|
|
71
71
|
cloe_nessy/pipeline/actions/transform_change_datatype.py,sha256=24Tn6R3TvUkWCh8V6naLdyNbCbqvyPOOoer-hy_Ebq4,2077
|
|
72
72
|
cloe_nessy/pipeline/actions/transform_clean_column_names.py,sha256=-CEdcXb7Fz5DQNitGlJ8EVBE_LzxfsInyCIO-D7b4iY,3042
|
|
73
73
|
cloe_nessy/pipeline/actions/transform_concat_columns.py,sha256=Nk8YbhxDnFZsWzW9Dj5Yl76Uq6VrcMlevQPHGms65L8,3777
|
|
@@ -86,7 +86,7 @@ cloe_nessy/pipeline/actions/transform_select_columns.py,sha256=-GhSEsb7iNnZIsYRm
|
|
|
86
86
|
cloe_nessy/pipeline/actions/transform_union.py,sha256=s81Vge0AbYPc7VkskCYfOQ_LEjqcmfNFyDkytfjcZyo,2720
|
|
87
87
|
cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=j7gRuG3Fedh8JgevIFBbHKock3laJVq4l6Mx3CGU5eo,2676
|
|
88
88
|
cloe_nessy/pipeline/actions/write_delta_append.py,sha256=fuL29SK9G5K14ycckU3iPexeK0XNXUfQscCwhXHxbKA,2498
|
|
89
|
-
cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=
|
|
89
|
+
cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=gh3oD0ZGjDq0hw56NiRimK4HHCruDofqqdzFFgYLve8,5085
|
|
90
90
|
cloe_nessy/pipeline/actions/write_file.py,sha256=H8LRst045yij-8XJ5pRB9m5d1lZpZjFa0WSVdSFesPo,2984
|
|
91
91
|
cloe_nessy/session/__init__.py,sha256=t7_YjUhJYW3km_FrucaUdbIl1boQtwkyhw_8yE10qzc,74
|
|
92
92
|
cloe_nessy/session/session_manager.py,sha256=f4OeeyGD3becDQGkdDbck3jVH9ulOCBWjW6Jaj_MIrc,7765
|
|
@@ -94,7 +94,6 @@ cloe_nessy/settings/__init__.py,sha256=ZbkneO3WaKOxon7qHFHnou7EnBOSnBFyKMDZblIEv
|
|
|
94
94
|
cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_UpVM,3653
|
|
95
95
|
cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
96
96
|
cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
|
|
97
|
-
cloe_nessy-0.3.13.
|
|
98
|
-
cloe_nessy-0.3.13.
|
|
99
|
-
cloe_nessy-0.3.13.
|
|
100
|
-
cloe_nessy-0.3.13.2b0.dist-info/RECORD,,
|
|
97
|
+
cloe_nessy-0.3.13.4b0.dist-info/METADATA,sha256=Z9Bby2T96l0nm453ZwbiAq3YDP0uau5GfPORfD9KE7E,3294
|
|
98
|
+
cloe_nessy-0.3.13.4b0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
99
|
+
cloe_nessy-0.3.13.4b0.dist-info/RECORD,,
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
cloe_nessy
|