cloe-nessy 0.3.13.2b0__py3-none-any.whl → 0.3.13.4b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -196,7 +196,11 @@ class DeltaMergeWriter(BaseDeltaWriter):
196
196
 
197
197
  config = DeltaMergeConfig(dataframe_columns=data_frame.columns, **kwargs)
198
198
 
199
- delta_table = self.table_manager.get_delta_table(location=storage_path, spark=data_frame.sparkSession)
199
+ delta_table = self.table_manager.get_delta_table(
200
+ table=table,
201
+ location=storage_path,
202
+ spark=data_frame.sparkSession,
203
+ )
200
204
 
201
205
  match_conditions = self._build_match_conditions(data_frame, config)
202
206
 
@@ -186,6 +186,9 @@ class TableManager(LoggerMixin):
186
186
  def get_delta_table(self, table: Table | None = None, location: str | None = None, spark=None) -> DeltaTable:
187
187
  """Get the DeltaTable object from the Table objects location or a location string.
188
188
 
189
+ For managed tables, uses the table identifier to access the DeltaTable.
190
+ For external tables or when a location is provided, uses the storage path.
191
+
189
192
  Args:
190
193
  table: A Table object representing the Delta table.
191
194
  location: A string representing the table location.
@@ -195,15 +198,34 @@ class TableManager(LoggerMixin):
195
198
  The DeltaTable object corresponding to the given Table object or location string.
196
199
 
197
200
  Raises:
198
- ValueError: If neither table nor location is provided, or if both are provided.
201
+ ValueError: If neither table nor location is provided.
199
202
  """
200
- if (table is None and location is None) or (table is not None and location is not None):
201
- raise ValueError("Either table or location must be provided, but not both.")
203
+ if table is None and location is None:
204
+ self._console_logger.error("Invalid parameters: both table and location are None")
205
+ raise ValueError("Either table or location must be provided.")
206
+
207
+ spark_session = spark or self._spark
208
+
209
+ if table is not None and location is not None:
210
+ self._console_logger.info(
211
+ f"Both table ({table.identifier}) and location ({location}) provided. Using table object as priority."
212
+ )
202
213
 
203
214
  if table is not None:
204
- location = str(table.storage_path)
215
+ if table.is_external is False:
216
+ self._console_logger.info(f"Getting DeltaTable object for managed table: {table.identifier}")
217
+ return DeltaTable.forName(spark_session, table.identifier)
218
+
219
+ table_location = str(table.storage_path)
220
+ self._console_logger.info(f"Getting DeltaTable object for external table location: {table_location}")
221
+ return DeltaTable.forPath(spark_session, table_location)
222
+
223
+ self._console_logger.info(f"No table object provided, using location: {location}")
224
+ if location is None:
225
+ self._console_logger.error("Location is None - this should not happen!")
226
+ raise ValueError("Location cannot be None when no table object is provided")
205
227
  self._console_logger.info(f"Getting DeltaTable object for location: {location}")
206
- return DeltaTable.forPath(spark or self._spark, str(location))
228
+ return DeltaTable.forPath(spark_session, str(location))
207
229
 
208
230
  def table_exists(self, table: Table | None = None, table_identifier: str | None = None) -> bool:
209
231
  """Checks if a table exists in the catalog.
@@ -1,66 +1,94 @@
1
- import pathlib
1
+ from pathlib import Path
2
2
  from typing import Any
3
3
 
4
- from ...models import Schema
4
+ from ...models import Table
5
5
  from ..pipeline_action import PipelineAction
6
6
  from ..pipeline_context import PipelineContext
7
7
 
8
8
 
9
9
  class ReadMetadataYAMLAction(PipelineAction):
10
- """Reads schema metadata from a yaml file using the [`Schema`][cloe_nessy.models.schema] model.
10
+ """Reads table metadata from a yaml file using the [`Table`][cloe_nessy.models.table] model.
11
11
 
12
12
  Example:
13
- ```yaml
14
- Read Schema Metadata:
15
- action: READ_METADATA_YAML_ACTION
16
- options:
17
- path: excel_file_folder/excel_files_june/
18
- file_name: sales_schema.yml
19
- table_name: sales
20
- ```
13
+ === "Managed Table"
14
+ ```yaml
15
+ Read Table Metadata:
16
+ action: READ_METADATA_YAML_ACTION
17
+ options:
18
+ file_path: metadata/schemas/bronze/sales_table.yml
19
+ catalog_name: production
20
+ schema_name: sales_data
21
+ ```
22
+ === "External Table"
23
+ ```yaml
24
+ Read Table Metadata:
25
+ action: READ_METADATA_YAML_ACTION
26
+ options:
27
+ file_path: metadata/schemas/bronze/sales_table.yml
28
+ catalog_name: production
29
+ schema_name: sales_data
30
+ storage_path: abfs://external_storage/sales_data/sales_table
31
+ ```
21
32
  """
22
33
 
23
34
  name: str = "READ_METADATA_YAML_ACTION"
24
35
 
25
- @staticmethod
26
36
  def run(
37
+ self,
27
38
  context: PipelineContext,
28
39
  *,
29
- path: str | None = None,
30
- file_name: str | None = None,
31
- table_name: str | None = None,
40
+ file_path: str | None = None,
41
+ catalog_name: str | None = None,
42
+ schema_name: str | None = None,
43
+ storage_path: str | None = None,
32
44
  **_: Any,
33
45
  ) -> PipelineContext:
34
- """Reads schema metadata from a yaml file using the [`Schema`][cloe_nessy.models.schema] model.
46
+ """Reads table metadata from a yaml file using the [`Table`][cloe_nessy.models.table] model.
35
47
 
36
48
  Args:
37
49
  context: The context in which this Action is executed.
38
- path: The path to the data contract directory.
39
- file_name: The name of the file that defines the schema.
40
- table_name: The name of the table for which to retrieve metadata.
50
+ file_path: The path to the file that defines the table.
51
+ catalog_name: The name of the catalog for the table.
52
+ schema_name: The name of the schema for the table.
53
+ storage_path: The storage path for the table, if applicable. If not
54
+ provided, the table will be considered a managed table.
41
55
 
42
56
  Raises:
43
- ValueError: If any issues occur while reading the schema, such as an invalid schema,
44
- missing file, or missing path.
57
+ ValueError: If any issues occur while reading the table metadata, such as an invalid table,
58
+ missing file, missing path, or missing catalog/schema names.
45
59
 
46
60
  Returns:
47
61
  The context after the execution of this Action, containing the table metadata.
48
62
  """
49
- if not path:
50
- raise ValueError("No path provided. Please specify path to schema metadata.")
51
- if not file_name:
52
- raise ValueError("No file_name provided. Please specify file name.")
53
- if not table_name:
54
- raise ValueError("No table_name provided. Please specify table name.")
63
+ missing_params = []
64
+ if not file_path:
65
+ missing_params.append("file_path")
66
+ if not catalog_name:
67
+ missing_params.append("catalog_name")
68
+ if not schema_name:
69
+ missing_params.append("schema_name")
55
70
 
56
- path_obj = pathlib.Path(path)
71
+ if missing_params:
72
+ raise ValueError(
73
+ f"Missing required parameters: {', '.join(missing_params)}. Please specify all required parameters."
74
+ )
57
75
 
58
- schema, errors = Schema.read_instance_from_file(path_obj / file_name)
76
+ final_file_path = Path(file_path) if file_path else Path()
77
+
78
+ table, errors = Table.read_instance_from_file(
79
+ final_file_path,
80
+ catalog_name=catalog_name,
81
+ schema_name=schema_name,
82
+ )
59
83
  if errors:
60
- raise ValueError(f"Errors while reading schema metadata: {errors}")
61
- if not schema:
62
- raise ValueError("No schema found in metadata.")
84
+ raise ValueError(f"Errors while reading table metadata: {errors}")
85
+ if not table:
86
+ raise ValueError("No table found in metadata.")
63
87
 
64
- table = schema.get_table_by_name(table_name=table_name)
88
+ if not table.storage_path and storage_path:
89
+ self._console_logger.info(f"Setting storage path for table [ '{table.name}' ] to [ '{storage_path}' ]")
90
+ table.storage_path = storage_path
91
+ table.is_external = True
65
92
 
93
+ self._console_logger.info(f"Table [ '{table.name}' ] metadata read successfully from [ '{file_path}' ]")
66
94
  return context.from_existing(table_metadata=table)
@@ -98,6 +98,7 @@ class WriteDeltaMergeAction(PipelineAction):
98
98
 
99
99
  delta_merge_writer.write(
100
100
  table_identifier=context.table_metadata.identifier,
101
+ table=context.table_metadata,
101
102
  storage_path=str(context.table_metadata.storage_path),
102
103
  data_frame=context.data,
103
104
  key_columns=key_columns,
@@ -1,36 +1,36 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cloe-nessy
3
- Version: 0.3.13.2b0
3
+ Version: 0.3.13.4b0
4
4
  Summary: Your friendly datalake monster.
5
+ Project-URL: homepage, https://initions.com/
5
6
  Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
6
7
  License: MIT
7
- Project-URL: homepage, https://initions.com/
8
8
  Classifier: Development Status :: 5 - Production/Stable
9
9
  Classifier: Environment :: Console
10
- Classifier: License :: OSI Approved :: MIT License
11
10
  Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
12
  Classifier: Operating System :: OS Independent
13
13
  Classifier: Programming Language :: Python :: 3
14
14
  Classifier: Topic :: Database
15
15
  Requires-Python: <3.13,>=3.11
16
- Description-Content-Type: text/markdown
17
- Requires-Dist: pydantic<3.0.0,>=2.7.2
18
- Requires-Dist: pyyaml<7.0.0,>=6.0.1
19
- Requires-Dist: types-pyyaml<7.0.0.0,>=6.0.12.20240311
20
- Requires-Dist: jinja2<4.0.0,>=3.1.4
21
- Requires-Dist: pydantic-settings<3.0.0,>=2.4.0
22
- Requires-Dist: openpyxl<4.0.0,>=3.1.5
23
- Requires-Dist: requests<3.0.0,>=2.32.3
24
- Requires-Dist: types-requests<3.0.0.0,>=2.32.0.20240712
25
- Requires-Dist: pandas-stubs<3.0.0.0,>=2.2.2.240807
26
16
  Requires-Dist: azure-identity<2.0.0,>=1.19.0
27
- Requires-Dist: httpx<1.0.0,>=0.27.2
17
+ Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.8
28
18
  Requires-Dist: databricks-sdk<1.0.0,>=0.36.0
29
- Requires-Dist: networkx<4.0,>=3.3
19
+ Requires-Dist: fsspec<2025.6.0,>=2025.5.1
20
+ Requires-Dist: httpx<1.0.0,>=0.27.2
21
+ Requires-Dist: jinja2<4.0.0,>=3.1.4
30
22
  Requires-Dist: matplotlib<4.0.0,>=3.9.2
23
+ Requires-Dist: networkx<4.0,>=3.3
24
+ Requires-Dist: openpyxl<4.0.0,>=3.1.5
25
+ Requires-Dist: pandas-stubs<3.0.0.0,>=2.2.2.240807
26
+ Requires-Dist: pydantic-settings<3.0.0,>=2.4.0
27
+ Requires-Dist: pydantic<3.0.0,>=2.7.2
28
+ Requires-Dist: pyyaml<7.0.0,>=6.0.1
29
+ Requires-Dist: requests<3.0.0,>=2.32.3
31
30
  Requires-Dist: types-networkx<4.0.0.0,>=3.2.1.20240820
32
- Requires-Dist: fsspec<2025.6.0,>=2025.5.1
33
- Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.8
31
+ Requires-Dist: types-pyyaml<7.0.0.0,>=6.0.12.20240311
32
+ Requires-Dist: types-requests<3.0.0.0,>=2.32.0.20240712
33
+ Description-Content-Type: text/markdown
34
34
 
35
35
  # cloe-nessy
36
36
 
@@ -30,7 +30,7 @@ cloe_nessy/integration/writer/file_writer.py,sha256=SUDbN13ZzDhbM8DpOGFgM_Gkg70T
30
30
  cloe_nessy/integration/writer/writer.py,sha256=elFPLFrWR-qVE9qnBtzzzhyRALLQcRVuOsPS0rNmRt4,1741
31
31
  cloe_nessy/integration/writer/delta_writer/__init__.py,sha256=h2CT6Hllmk0nodlek27uqwniCzVZKMkYcPGyG9K2Z24,164
32
32
  cloe_nessy/integration/writer/delta_writer/delta_append_writer.py,sha256=TbpW-j87_H9dcUza34uR6VWslJez406y3_5N1ip0SnM,4740
33
- cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=no2GOLqMAJd0fEy2mqMevMj_CvutcJPRmXJC2tD4icA,10112
33
+ cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=zhqPIPfAJTzSLFgBUCwFesUW7CcF1zCPRU-N_8yYjok,10172
34
34
  cloe_nessy/integration/writer/delta_writer/delta_table_operation_type.py,sha256=kiacqQ2FYQSzakJqZ9-ZHH3os4X7--QuER_2xx9y21k,971
35
35
  cloe_nessy/integration/writer/delta_writer/delta_writer_base.py,sha256=upUtDZMzwYFU0kzmkelVgkpFToXkrypcR3h_jvGjz14,8596
36
36
  cloe_nessy/integration/writer/delta_writer/exceptions.py,sha256=xPmGiYV0xQXauln5Oh34E5vbm0rVcs6xCh-SJSb2bw0,107
@@ -53,7 +53,7 @@ cloe_nessy/models/mixins/template_loader_mixin.py,sha256=5MXhEGBFlq3dwZvINEyBowS
53
53
  cloe_nessy/models/templates/create_table.sql.j2,sha256=QWbiTXwmGaIlZUAIGL4pAlHkDbP9mq1vGAkdKCPOqm4,1669
54
54
  cloe_nessy/models/templates/create_volume.sql.j2,sha256=XIUf1cHcvAxcGTyhzUiv4xpQ1cfDw_ra3_FKmOuLoBs,289
55
55
  cloe_nessy/object_manager/__init__.py,sha256=3sle0vNpPwBOkycxA3XVS9m4XZf5LD3Qd4NGxdqcHno,186
56
- cloe_nessy/object_manager/table_manager.py,sha256=0ERkDY1H0LfO632NF9QT85o3zfOO16EimcYTzFh9M-E,12763
56
+ cloe_nessy/object_manager/table_manager.py,sha256=suHx56TYXagaJ2dVkvTP7vwSI4xgTqXNkHYBbYh2pd4,13913
57
57
  cloe_nessy/object_manager/volume_manager.py,sha256=6epd3KXzcNH04EvaKubAfLsaUm9qBMeT3KNvMK04gGs,2727
58
58
  cloe_nessy/pipeline/__init__.py,sha256=sespmJ5JsgyiFyZiedTiL2kg--zGIX7cjTYsD5vemEg,325
59
59
  cloe_nessy/pipeline/pipeline.py,sha256=-1tJVs9rZf8CcwieH4IP7mqJZ6mL7bQUZ56TNKt8eO8,11154
@@ -67,7 +67,7 @@ cloe_nessy/pipeline/actions/read_api.py,sha256=RBv5XeHtjTXuCP09Fqo6JNx6iIhQQI-nu
67
67
  cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=oXbqbc6BfR82dSIGclwzWiTN8EVmpFjNIYLKm4qOU50,2754
68
68
  cloe_nessy/pipeline/actions/read_excel.py,sha256=Mhl3r_2Hqk2XN7Fl5WqqAyE4JdnwSiivbhWMglyBtkE,7961
69
69
  cloe_nessy/pipeline/actions/read_files.py,sha256=N9bFgtG1tovhp2JayxE5YiN9PiO2lgG2-6h_Y6tD2eU,5220
70
- cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=3ZDy9qiDYtM1oDQzHPC23hLOvHjhdk5zg1wVHE60m9k,2295
70
+ cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=i8fQceV63eAqx_x0ANisCkXWfMHyhqsfFHVFH5yP2po,3544
71
71
  cloe_nessy/pipeline/actions/transform_change_datatype.py,sha256=24Tn6R3TvUkWCh8V6naLdyNbCbqvyPOOoer-hy_Ebq4,2077
72
72
  cloe_nessy/pipeline/actions/transform_clean_column_names.py,sha256=-CEdcXb7Fz5DQNitGlJ8EVBE_LzxfsInyCIO-D7b4iY,3042
73
73
  cloe_nessy/pipeline/actions/transform_concat_columns.py,sha256=Nk8YbhxDnFZsWzW9Dj5Yl76Uq6VrcMlevQPHGms65L8,3777
@@ -86,7 +86,7 @@ cloe_nessy/pipeline/actions/transform_select_columns.py,sha256=-GhSEsb7iNnZIsYRm
86
86
  cloe_nessy/pipeline/actions/transform_union.py,sha256=s81Vge0AbYPc7VkskCYfOQ_LEjqcmfNFyDkytfjcZyo,2720
87
87
  cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=j7gRuG3Fedh8JgevIFBbHKock3laJVq4l6Mx3CGU5eo,2676
88
88
  cloe_nessy/pipeline/actions/write_delta_append.py,sha256=fuL29SK9G5K14ycckU3iPexeK0XNXUfQscCwhXHxbKA,2498
89
- cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=Hir7QZZZJ9hmQZXiJ9iz6u06OCmcHFpyKFVB_I1saSM,5043
89
+ cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=gh3oD0ZGjDq0hw56NiRimK4HHCruDofqqdzFFgYLve8,5085
90
90
  cloe_nessy/pipeline/actions/write_file.py,sha256=H8LRst045yij-8XJ5pRB9m5d1lZpZjFa0WSVdSFesPo,2984
91
91
  cloe_nessy/session/__init__.py,sha256=t7_YjUhJYW3km_FrucaUdbIl1boQtwkyhw_8yE10qzc,74
92
92
  cloe_nessy/session/session_manager.py,sha256=f4OeeyGD3becDQGkdDbck3jVH9ulOCBWjW6Jaj_MIrc,7765
@@ -94,7 +94,6 @@ cloe_nessy/settings/__init__.py,sha256=ZbkneO3WaKOxon7qHFHnou7EnBOSnBFyKMDZblIEv
94
94
  cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_UpVM,3653
95
95
  cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
96
96
  cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
97
- cloe_nessy-0.3.13.2b0.dist-info/METADATA,sha256=J4RdJEWMu175oGTxcpU2Pv8U0TEuk8R6qiozFK6x07E,3294
98
- cloe_nessy-0.3.13.2b0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
99
- cloe_nessy-0.3.13.2b0.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
100
- cloe_nessy-0.3.13.2b0.dist-info/RECORD,,
97
+ cloe_nessy-0.3.13.4b0.dist-info/METADATA,sha256=Z9Bby2T96l0nm453ZwbiAq3YDP0uau5GfPORfD9KE7E,3294
98
+ cloe_nessy-0.3.13.4b0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
99
+ cloe_nessy-0.3.13.4b0.dist-info/RECORD,,
@@ -1,5 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
-
@@ -1 +0,0 @@
1
- cloe_nessy