cloe-nessy 0.3.13.4b0__py3-none-any.whl → 0.3.14.1b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -196,11 +196,7 @@ class DeltaMergeWriter(BaseDeltaWriter):
196
196
 
197
197
  config = DeltaMergeConfig(dataframe_columns=data_frame.columns, **kwargs)
198
198
 
199
- delta_table = self.table_manager.get_delta_table(
200
- table=table,
201
- location=storage_path,
202
- spark=data_frame.sparkSession,
203
- )
199
+ delta_table = self.table_manager.get_delta_table(location=storage_path, spark=data_frame.sparkSession)
204
200
 
205
201
  match_conditions = self._build_match_conditions(data_frame, config)
206
202
 
@@ -43,7 +43,6 @@ class Schema(ReadInstancesMixin):
43
43
  raise FileNotFoundError("Schema file not found.")
44
44
 
45
45
  schema, schema_errors = super().read_instance_from_file(processed_instance_path)
46
- table_errors: list[ValidationErrorType] = []
47
46
  if schema:
48
47
  schema.storage_path = "" if not schema.storage_path else schema.storage_path
49
48
  tables, table_errors = Table.read_instances_from_directory(
@@ -186,9 +186,6 @@ class TableManager(LoggerMixin):
186
186
  def get_delta_table(self, table: Table | None = None, location: str | None = None, spark=None) -> DeltaTable:
187
187
  """Get the DeltaTable object from the Table objects location or a location string.
188
188
 
189
- For managed tables, uses the table identifier to access the DeltaTable.
190
- For external tables or when a location is provided, uses the storage path.
191
-
192
189
  Args:
193
190
  table: A Table object representing the Delta table.
194
191
  location: A string representing the table location.
@@ -198,34 +195,15 @@ class TableManager(LoggerMixin):
198
195
  The DeltaTable object corresponding to the given Table object or location string.
199
196
 
200
197
  Raises:
201
- ValueError: If neither table nor location is provided.
198
+ ValueError: If neither table nor location is provided, or if both are provided.
202
199
  """
203
- if table is None and location is None:
204
- self._console_logger.error("Invalid parameters: both table and location are None")
205
- raise ValueError("Either table or location must be provided.")
206
-
207
- spark_session = spark or self._spark
208
-
209
- if table is not None and location is not None:
210
- self._console_logger.info(
211
- f"Both table ({table.identifier}) and location ({location}) provided. Using table object as priority."
212
- )
200
+ if (table is None and location is None) or (table is not None and location is not None):
201
+ raise ValueError("Either table or location must be provided, but not both.")
213
202
 
214
203
  if table is not None:
215
- if table.is_external is False:
216
- self._console_logger.info(f"Getting DeltaTable object for managed table: {table.identifier}")
217
- return DeltaTable.forName(spark_session, table.identifier)
218
-
219
- table_location = str(table.storage_path)
220
- self._console_logger.info(f"Getting DeltaTable object for external table location: {table_location}")
221
- return DeltaTable.forPath(spark_session, table_location)
222
-
223
- self._console_logger.info(f"No table object provided, using location: {location}")
224
- if location is None:
225
- self._console_logger.error("Location is None - this should not happen!")
226
- raise ValueError("Location cannot be None when no table object is provided")
204
+ location = str(table.storage_path)
227
205
  self._console_logger.info(f"Getting DeltaTable object for location: {location}")
228
- return DeltaTable.forPath(spark_session, str(location))
206
+ return DeltaTable.forPath(spark or self._spark, str(location))
229
207
 
230
208
  def table_exists(self, table: Table | None = None, table_identifier: str | None = None) -> bool:
231
209
  """Checks if a table exists in the catalog.
@@ -255,10 +233,9 @@ class TableManager(LoggerMixin):
255
233
  raise ValueError("Invalid table identifier format. Expected 'catalog.schema.table'.")
256
234
 
257
235
  query_result = self._spark.sql(
258
- # Using both upper and lower case to ensure compatibility with case changes in Databricks
259
236
  f"""
260
237
  SELECT 1 FROM {catalog}.information_schema.tables
261
- WHERE table_name in ('{table_name}', '{table_name.lower()}')
238
+ WHERE table_name = '{table_name}'
262
239
  AND table_schema = '{schema}'
263
240
  LIMIT 1""",
264
241
  )
@@ -1,94 +1,66 @@
1
- from pathlib import Path
1
+ import pathlib
2
2
  from typing import Any
3
3
 
4
- from ...models import Table
4
+ from ...models import Schema
5
5
  from ..pipeline_action import PipelineAction
6
6
  from ..pipeline_context import PipelineContext
7
7
 
8
8
 
9
9
  class ReadMetadataYAMLAction(PipelineAction):
10
- """Reads table metadata from a yaml file using the [`Table`][cloe_nessy.models.table] model.
10
+ """Reads schema metadata from a yaml file using the [`Schema`][cloe_nessy.models.schema] model.
11
11
 
12
12
  Example:
13
- === "Managed Table"
14
- ```yaml
15
- Read Table Metadata:
16
- action: READ_METADATA_YAML_ACTION
17
- options:
18
- file_path: metadata/schemas/bronze/sales_table.yml
19
- catalog_name: production
20
- schema_name: sales_data
21
- ```
22
- === "External Table"
23
- ```yaml
24
- Read Table Metadata:
25
- action: READ_METADATA_YAML_ACTION
26
- options:
27
- file_path: metadata/schemas/bronze/sales_table.yml
28
- catalog_name: production
29
- schema_name: sales_data
30
- storage_path: abfs://external_storage/sales_data/sales_table
31
- ```
13
+ ```yaml
14
+ Read Schema Metadata:
15
+ action: READ_METADATA_YAML_ACTION
16
+ options:
17
+ path: excel_file_folder/excel_files_june/
18
+ file_name: sales_schema.yml
19
+ table_name: sales
20
+ ```
32
21
  """
33
22
 
34
23
  name: str = "READ_METADATA_YAML_ACTION"
35
24
 
25
+ @staticmethod
36
26
  def run(
37
- self,
38
27
  context: PipelineContext,
39
28
  *,
40
- file_path: str | None = None,
41
- catalog_name: str | None = None,
42
- schema_name: str | None = None,
43
- storage_path: str | None = None,
29
+ path: str | None = None,
30
+ file_name: str | None = None,
31
+ table_name: str | None = None,
44
32
  **_: Any,
45
33
  ) -> PipelineContext:
46
- """Reads table metadata from a yaml file using the [`Table`][cloe_nessy.models.table] model.
34
+ """Reads schema metadata from a yaml file using the [`Schema`][cloe_nessy.models.schema] model.
47
35
 
48
36
  Args:
49
37
  context: The context in which this Action is executed.
50
- file_path: The path to the file that defines the table.
51
- catalog_name: The name of the catalog for the table.
52
- schema_name: The name of the schema for the table.
53
- storage_path: The storage path for the table, if applicable. If not
54
- provided, the table will be considered a managed table.
38
+ path: The path to the data contract directory.
39
+ file_name: The name of the file that defines the schema.
40
+ table_name: The name of the table for which to retrieve metadata.
55
41
 
56
42
  Raises:
57
- ValueError: If any issues occur while reading the table metadata, such as an invalid table,
58
- missing file, missing path, or missing catalog/schema names.
43
+ ValueError: If any issues occur while reading the schema, such as an invalid schema,
44
+ missing file, or missing path.
59
45
 
60
46
  Returns:
61
47
  The context after the execution of this Action, containing the table metadata.
62
48
  """
63
- missing_params = []
64
- if not file_path:
65
- missing_params.append("file_path")
66
- if not catalog_name:
67
- missing_params.append("catalog_name")
68
- if not schema_name:
69
- missing_params.append("schema_name")
49
+ if not path:
50
+ raise ValueError("No path provided. Please specify path to schema metadata.")
51
+ if not file_name:
52
+ raise ValueError("No file_name provided. Please specify file name.")
53
+ if not table_name:
54
+ raise ValueError("No table_name provided. Please specify table name.")
70
55
 
71
- if missing_params:
72
- raise ValueError(
73
- f"Missing required parameters: {', '.join(missing_params)}. Please specify all required parameters."
74
- )
56
+ path_obj = pathlib.Path(path)
75
57
 
76
- final_file_path = Path(file_path) if file_path else Path()
77
-
78
- table, errors = Table.read_instance_from_file(
79
- final_file_path,
80
- catalog_name=catalog_name,
81
- schema_name=schema_name,
82
- )
58
+ schema, errors = Schema.read_instance_from_file(path_obj / file_name)
83
59
  if errors:
84
- raise ValueError(f"Errors while reading table metadata: {errors}")
85
- if not table:
86
- raise ValueError("No table found in metadata.")
60
+ raise ValueError(f"Errors while reading schema metadata: {errors}")
61
+ if not schema:
62
+ raise ValueError("No schema found in metadata.")
87
63
 
88
- if not table.storage_path and storage_path:
89
- self._console_logger.info(f"Setting storage path for table [ '{table.name}' ] to [ '{storage_path}' ]")
90
- table.storage_path = storage_path
91
- table.is_external = True
64
+ table = schema.get_table_by_name(table_name=table_name)
92
65
 
93
- self._console_logger.info(f"Table [ '{table.name}' ] metadata read successfully from [ '{file_path}' ]")
94
66
  return context.from_existing(table_metadata=table)
@@ -98,7 +98,6 @@ class WriteDeltaMergeAction(PipelineAction):
98
98
 
99
99
  delta_merge_writer.write(
100
100
  table_identifier=context.table_metadata.identifier,
101
- table=context.table_metadata,
102
101
  storage_path=str(context.table_metadata.storage_path),
103
102
  data_frame=context.data,
104
103
  key_columns=key_columns,
@@ -60,8 +60,11 @@ class SessionManager:
60
60
  nessy_spark_config = os.getenv("NESSY_SPARK_CONFIG")
61
61
  if nessy_spark_config:
62
62
  try:
63
- # Parse the JSON configuration from the environment variable
64
63
  config = json.loads(nessy_spark_config)
64
+ if "remote" in config:
65
+ builder = builder.remote(config["remote"])
66
+ del config["remote"]
67
+ # Parse the JSON configuration from the environment variable
65
68
  except json.JSONDecodeError as e:
66
69
  raise ValueError(f"Invalid JSON in NESSY_SPARK_CONFIG: {e}") from e
67
70
 
@@ -1,36 +1,36 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cloe-nessy
3
- Version: 0.3.13.4b0
3
+ Version: 0.3.14.1b0
4
4
  Summary: Your friendly datalake monster.
5
- Project-URL: homepage, https://initions.com/
6
5
  Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
7
6
  License: MIT
7
+ Project-URL: homepage, https://initions.com/
8
8
  Classifier: Development Status :: 5 - Production/Stable
9
9
  Classifier: Environment :: Console
10
- Classifier: Intended Audience :: Developers
11
10
  Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Intended Audience :: Developers
12
12
  Classifier: Operating System :: OS Independent
13
13
  Classifier: Programming Language :: Python :: 3
14
14
  Classifier: Topic :: Database
15
15
  Requires-Python: <3.13,>=3.11
16
- Requires-Dist: azure-identity<2.0.0,>=1.19.0
17
- Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.8
18
- Requires-Dist: databricks-sdk<1.0.0,>=0.36.0
19
- Requires-Dist: fsspec<2025.6.0,>=2025.5.1
20
- Requires-Dist: httpx<1.0.0,>=0.27.2
21
- Requires-Dist: jinja2<4.0.0,>=3.1.4
22
- Requires-Dist: matplotlib<4.0.0,>=3.9.2
23
- Requires-Dist: networkx<4.0,>=3.3
24
- Requires-Dist: openpyxl<4.0.0,>=3.1.5
25
- Requires-Dist: pandas-stubs<3.0.0.0,>=2.2.2.240807
26
- Requires-Dist: pydantic-settings<3.0.0,>=2.4.0
16
+ Description-Content-Type: text/markdown
27
17
  Requires-Dist: pydantic<3.0.0,>=2.7.2
28
18
  Requires-Dist: pyyaml<7.0.0,>=6.0.1
29
- Requires-Dist: requests<3.0.0,>=2.32.3
30
- Requires-Dist: types-networkx<4.0.0.0,>=3.2.1.20240820
31
19
  Requires-Dist: types-pyyaml<7.0.0.0,>=6.0.12.20240311
20
+ Requires-Dist: jinja2<4.0.0,>=3.1.4
21
+ Requires-Dist: pydantic-settings<3.0.0,>=2.4.0
22
+ Requires-Dist: openpyxl<4.0.0,>=3.1.5
23
+ Requires-Dist: requests<3.0.0,>=2.32.3
32
24
  Requires-Dist: types-requests<3.0.0.0,>=2.32.0.20240712
33
- Description-Content-Type: text/markdown
25
+ Requires-Dist: pandas-stubs<3.0.0.0,>=2.2.2.240807
26
+ Requires-Dist: azure-identity<2.0.0,>=1.19.0
27
+ Requires-Dist: httpx<1.0.0,>=0.27.2
28
+ Requires-Dist: databricks-sdk<1.0.0,>=0.36.0
29
+ Requires-Dist: networkx<4.0,>=3.3
30
+ Requires-Dist: matplotlib<4.0.0,>=3.9.2
31
+ Requires-Dist: types-networkx<4.0.0.0,>=3.2.1.20240820
32
+ Requires-Dist: fsspec<2025.7.1,>=2025.7.0
33
+ Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.8
34
34
 
35
35
  # cloe-nessy
36
36
 
@@ -30,7 +30,7 @@ cloe_nessy/integration/writer/file_writer.py,sha256=SUDbN13ZzDhbM8DpOGFgM_Gkg70T
30
30
  cloe_nessy/integration/writer/writer.py,sha256=elFPLFrWR-qVE9qnBtzzzhyRALLQcRVuOsPS0rNmRt4,1741
31
31
  cloe_nessy/integration/writer/delta_writer/__init__.py,sha256=h2CT6Hllmk0nodlek27uqwniCzVZKMkYcPGyG9K2Z24,164
32
32
  cloe_nessy/integration/writer/delta_writer/delta_append_writer.py,sha256=TbpW-j87_H9dcUza34uR6VWslJez406y3_5N1ip0SnM,4740
33
- cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=zhqPIPfAJTzSLFgBUCwFesUW7CcF1zCPRU-N_8yYjok,10172
33
+ cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=no2GOLqMAJd0fEy2mqMevMj_CvutcJPRmXJC2tD4icA,10112
34
34
  cloe_nessy/integration/writer/delta_writer/delta_table_operation_type.py,sha256=kiacqQ2FYQSzakJqZ9-ZHH3os4X7--QuER_2xx9y21k,971
35
35
  cloe_nessy/integration/writer/delta_writer/delta_writer_base.py,sha256=upUtDZMzwYFU0kzmkelVgkpFToXkrypcR3h_jvGjz14,8596
36
36
  cloe_nessy/integration/writer/delta_writer/exceptions.py,sha256=xPmGiYV0xQXauln5Oh34E5vbm0rVcs6xCh-SJSb2bw0,107
@@ -41,7 +41,7 @@ cloe_nessy/models/catalog.py,sha256=ayC1sMp4cNLAZtu0ICVV3Us6-o4hn8U9tpzzvxC9RAs,
41
41
  cloe_nessy/models/column.py,sha256=53fBwRnino72XKACsHZpN9QfCBqqSXyKLHZlM0huumg,1988
42
42
  cloe_nessy/models/constraint.py,sha256=hsFlhn4n928z81O3dl3v5bMetewPWzMjkJK3_4kASSM,178
43
43
  cloe_nessy/models/foreign_key.py,sha256=DwRVHs9sShqqPV-NL7ow_3AmPPWX0Od26yZn_I565pU,1001
44
- cloe_nessy/models/schema.py,sha256=cNSrH7K4hLRrkg1E6fW6DUIBMZdR2A5B21POj5iQ4GA,3429
44
+ cloe_nessy/models/schema.py,sha256=yUrjjEhAH5zbCymE67Az_jPnVB8hGO-_UNfqzeZCD_Y,3376
45
45
  cloe_nessy/models/table.py,sha256=O9vcJ1XBIb6kA-NAI3SNpB5b7MGDo3p4wMJdonPaBfA,12076
46
46
  cloe_nessy/models/types.py,sha256=XRbuJGdTNa6aXyE3IAzs_J9gVjbfkzMDLfGl-k6jI_4,223
47
47
  cloe_nessy/models/volume.py,sha256=51BE06FrL1Wv6zblFwJ_HTiR6WQqH7pSmrdH90rqwLg,2444
@@ -53,7 +53,7 @@ cloe_nessy/models/mixins/template_loader_mixin.py,sha256=5MXhEGBFlq3dwZvINEyBowS
53
53
  cloe_nessy/models/templates/create_table.sql.j2,sha256=QWbiTXwmGaIlZUAIGL4pAlHkDbP9mq1vGAkdKCPOqm4,1669
54
54
  cloe_nessy/models/templates/create_volume.sql.j2,sha256=XIUf1cHcvAxcGTyhzUiv4xpQ1cfDw_ra3_FKmOuLoBs,289
55
55
  cloe_nessy/object_manager/__init__.py,sha256=3sle0vNpPwBOkycxA3XVS9m4XZf5LD3Qd4NGxdqcHno,186
56
- cloe_nessy/object_manager/table_manager.py,sha256=suHx56TYXagaJ2dVkvTP7vwSI4xgTqXNkHYBbYh2pd4,13913
56
+ cloe_nessy/object_manager/table_manager.py,sha256=oYcYiZR0-JyoadcCcDelxfFb-ATeKDIZerYaZc-moiI,12634
57
57
  cloe_nessy/object_manager/volume_manager.py,sha256=6epd3KXzcNH04EvaKubAfLsaUm9qBMeT3KNvMK04gGs,2727
58
58
  cloe_nessy/pipeline/__init__.py,sha256=sespmJ5JsgyiFyZiedTiL2kg--zGIX7cjTYsD5vemEg,325
59
59
  cloe_nessy/pipeline/pipeline.py,sha256=-1tJVs9rZf8CcwieH4IP7mqJZ6mL7bQUZ56TNKt8eO8,11154
@@ -67,7 +67,7 @@ cloe_nessy/pipeline/actions/read_api.py,sha256=RBv5XeHtjTXuCP09Fqo6JNx6iIhQQI-nu
67
67
  cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=oXbqbc6BfR82dSIGclwzWiTN8EVmpFjNIYLKm4qOU50,2754
68
68
  cloe_nessy/pipeline/actions/read_excel.py,sha256=Mhl3r_2Hqk2XN7Fl5WqqAyE4JdnwSiivbhWMglyBtkE,7961
69
69
  cloe_nessy/pipeline/actions/read_files.py,sha256=N9bFgtG1tovhp2JayxE5YiN9PiO2lgG2-6h_Y6tD2eU,5220
70
- cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=i8fQceV63eAqx_x0ANisCkXWfMHyhqsfFHVFH5yP2po,3544
70
+ cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=3ZDy9qiDYtM1oDQzHPC23hLOvHjhdk5zg1wVHE60m9k,2295
71
71
  cloe_nessy/pipeline/actions/transform_change_datatype.py,sha256=24Tn6R3TvUkWCh8V6naLdyNbCbqvyPOOoer-hy_Ebq4,2077
72
72
  cloe_nessy/pipeline/actions/transform_clean_column_names.py,sha256=-CEdcXb7Fz5DQNitGlJ8EVBE_LzxfsInyCIO-D7b4iY,3042
73
73
  cloe_nessy/pipeline/actions/transform_concat_columns.py,sha256=Nk8YbhxDnFZsWzW9Dj5Yl76Uq6VrcMlevQPHGms65L8,3777
@@ -86,14 +86,15 @@ cloe_nessy/pipeline/actions/transform_select_columns.py,sha256=-GhSEsb7iNnZIsYRm
86
86
  cloe_nessy/pipeline/actions/transform_union.py,sha256=s81Vge0AbYPc7VkskCYfOQ_LEjqcmfNFyDkytfjcZyo,2720
87
87
  cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=j7gRuG3Fedh8JgevIFBbHKock3laJVq4l6Mx3CGU5eo,2676
88
88
  cloe_nessy/pipeline/actions/write_delta_append.py,sha256=fuL29SK9G5K14ycckU3iPexeK0XNXUfQscCwhXHxbKA,2498
89
- cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=gh3oD0ZGjDq0hw56NiRimK4HHCruDofqqdzFFgYLve8,5085
89
+ cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=Hir7QZZZJ9hmQZXiJ9iz6u06OCmcHFpyKFVB_I1saSM,5043
90
90
  cloe_nessy/pipeline/actions/write_file.py,sha256=H8LRst045yij-8XJ5pRB9m5d1lZpZjFa0WSVdSFesPo,2984
91
91
  cloe_nessy/session/__init__.py,sha256=t7_YjUhJYW3km_FrucaUdbIl1boQtwkyhw_8yE10qzc,74
92
- cloe_nessy/session/session_manager.py,sha256=f4OeeyGD3becDQGkdDbck3jVH9ulOCBWjW6Jaj_MIrc,7765
92
+ cloe_nessy/session/session_manager.py,sha256=whWEXenVKan4xy99Y2vShEe4BDcq1viGLUNRW-PyyKo,7908
93
93
  cloe_nessy/settings/__init__.py,sha256=ZbkneO3WaKOxon7qHFHnou7EnBOSnBFyKMDZblIEvzM,101
94
94
  cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_UpVM,3653
95
95
  cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
96
96
  cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
97
- cloe_nessy-0.3.13.4b0.dist-info/METADATA,sha256=Z9Bby2T96l0nm453ZwbiAq3YDP0uau5GfPORfD9KE7E,3294
98
- cloe_nessy-0.3.13.4b0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
99
- cloe_nessy-0.3.13.4b0.dist-info/RECORD,,
97
+ cloe_nessy-0.3.14.1b0.dist-info/METADATA,sha256=1GJtUcYEA_BhjoT-op3vn-lG5o19s3hFqgFGF9UXKJc,3294
98
+ cloe_nessy-0.3.14.1b0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
99
+ cloe_nessy-0.3.14.1b0.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
100
+ cloe_nessy-0.3.14.1b0.dist-info/RECORD,,
@@ -1,4 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.27.0
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ cloe_nessy