cloe-nessy 0.3.13.3b0__py3-none-any.whl → 0.3.14.1b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py +1 -3
- cloe_nessy/models/schema.py +0 -1
- cloe_nessy/object_manager/table_manager.py +6 -29
- cloe_nessy/pipeline/actions/write_delta_merge.py +0 -1
- cloe_nessy/session/session_manager.py +4 -1
- {cloe_nessy-0.3.13.3b0.dist-info → cloe_nessy-0.3.14.1b0.dist-info}/METADATA +17 -17
- {cloe_nessy-0.3.13.3b0.dist-info → cloe_nessy-0.3.14.1b0.dist-info}/RECORD +9 -8
- {cloe_nessy-0.3.13.3b0.dist-info → cloe_nessy-0.3.14.1b0.dist-info}/WHEEL +2 -1
- cloe_nessy-0.3.14.1b0.dist-info/top_level.txt +1 -0
|
@@ -196,9 +196,7 @@ class DeltaMergeWriter(BaseDeltaWriter):
|
|
|
196
196
|
|
|
197
197
|
config = DeltaMergeConfig(dataframe_columns=data_frame.columns, **kwargs)
|
|
198
198
|
|
|
199
|
-
delta_table = self.table_manager.get_delta_table(
|
|
200
|
-
table=table, location=storage_path, spark=data_frame.sparkSession
|
|
201
|
-
)
|
|
199
|
+
delta_table = self.table_manager.get_delta_table(location=storage_path, spark=data_frame.sparkSession)
|
|
202
200
|
|
|
203
201
|
match_conditions = self._build_match_conditions(data_frame, config)
|
|
204
202
|
|
cloe_nessy/models/schema.py
CHANGED
|
@@ -43,7 +43,6 @@ class Schema(ReadInstancesMixin):
|
|
|
43
43
|
raise FileNotFoundError("Schema file not found.")
|
|
44
44
|
|
|
45
45
|
schema, schema_errors = super().read_instance_from_file(processed_instance_path)
|
|
46
|
-
table_errors: list[ValidationErrorType] = []
|
|
47
46
|
if schema:
|
|
48
47
|
schema.storage_path = "" if not schema.storage_path else schema.storage_path
|
|
49
48
|
tables, table_errors = Table.read_instances_from_directory(
|
|
@@ -186,9 +186,6 @@ class TableManager(LoggerMixin):
|
|
|
186
186
|
def get_delta_table(self, table: Table | None = None, location: str | None = None, spark=None) -> DeltaTable:
|
|
187
187
|
"""Get the DeltaTable object from the Table objects location or a location string.
|
|
188
188
|
|
|
189
|
-
For managed tables, uses the table identifier to access the DeltaTable.
|
|
190
|
-
For external tables or when a location is provided, uses the storage path.
|
|
191
|
-
|
|
192
189
|
Args:
|
|
193
190
|
table: A Table object representing the Delta table.
|
|
194
191
|
location: A string representing the table location.
|
|
@@ -198,34 +195,15 @@ class TableManager(LoggerMixin):
|
|
|
198
195
|
The DeltaTable object corresponding to the given Table object or location string.
|
|
199
196
|
|
|
200
197
|
Raises:
|
|
201
|
-
ValueError: If neither table nor location is provided.
|
|
198
|
+
ValueError: If neither table nor location is provided, or if both are provided.
|
|
202
199
|
"""
|
|
203
|
-
if table is None and location is None:
|
|
204
|
-
|
|
205
|
-
raise ValueError("Either table or location must be provided.")
|
|
206
|
-
|
|
207
|
-
spark_session = spark or self._spark
|
|
208
|
-
|
|
209
|
-
if table is not None and location is not None:
|
|
210
|
-
self._console_logger.info(
|
|
211
|
-
f"Both table ({table.identifier}) and location ({location}) provided. Using table object as priority."
|
|
212
|
-
)
|
|
200
|
+
if (table is None and location is None) or (table is not None and location is not None):
|
|
201
|
+
raise ValueError("Either table or location must be provided, but not both.")
|
|
213
202
|
|
|
214
203
|
if table is not None:
|
|
215
|
-
|
|
216
|
-
self._console_logger.info(f"Getting DeltaTable object for managed table: {table.identifier}")
|
|
217
|
-
return DeltaTable.forName(spark_session, table.identifier)
|
|
218
|
-
|
|
219
|
-
table_location = str(table.storage_path)
|
|
220
|
-
self._console_logger.info(f"Getting DeltaTable object for external table location: {table_location}")
|
|
221
|
-
return DeltaTable.forPath(spark_session, table_location)
|
|
222
|
-
|
|
223
|
-
self._console_logger.info(f"No table object provided, using location: {location}")
|
|
224
|
-
if location is None:
|
|
225
|
-
self._console_logger.error("Location is None - this should not happen!")
|
|
226
|
-
raise ValueError("Location cannot be None when no table object is provided")
|
|
204
|
+
location = str(table.storage_path)
|
|
227
205
|
self._console_logger.info(f"Getting DeltaTable object for location: {location}")
|
|
228
|
-
return DeltaTable.forPath(
|
|
206
|
+
return DeltaTable.forPath(spark or self._spark, str(location))
|
|
229
207
|
|
|
230
208
|
def table_exists(self, table: Table | None = None, table_identifier: str | None = None) -> bool:
|
|
231
209
|
"""Checks if a table exists in the catalog.
|
|
@@ -255,10 +233,9 @@ class TableManager(LoggerMixin):
|
|
|
255
233
|
raise ValueError("Invalid table identifier format. Expected 'catalog.schema.table'.")
|
|
256
234
|
|
|
257
235
|
query_result = self._spark.sql(
|
|
258
|
-
# Using both upper and lower case to ensure compatibility with case changes in Databricks
|
|
259
236
|
f"""
|
|
260
237
|
SELECT 1 FROM {catalog}.information_schema.tables
|
|
261
|
-
WHERE table_name
|
|
238
|
+
WHERE table_name = '{table_name}'
|
|
262
239
|
AND table_schema = '{schema}'
|
|
263
240
|
LIMIT 1""",
|
|
264
241
|
)
|
|
@@ -98,7 +98,6 @@ class WriteDeltaMergeAction(PipelineAction):
|
|
|
98
98
|
|
|
99
99
|
delta_merge_writer.write(
|
|
100
100
|
table_identifier=context.table_metadata.identifier,
|
|
101
|
-
table=context.table_metadata,
|
|
102
101
|
storage_path=str(context.table_metadata.storage_path),
|
|
103
102
|
data_frame=context.data,
|
|
104
103
|
key_columns=key_columns,
|
|
@@ -60,8 +60,11 @@ class SessionManager:
|
|
|
60
60
|
nessy_spark_config = os.getenv("NESSY_SPARK_CONFIG")
|
|
61
61
|
if nessy_spark_config:
|
|
62
62
|
try:
|
|
63
|
-
# Parse the JSON configuration from the environment variable
|
|
64
63
|
config = json.loads(nessy_spark_config)
|
|
64
|
+
if "remote" in config:
|
|
65
|
+
builder = builder.remote(config["remote"])
|
|
66
|
+
del config["remote"]
|
|
67
|
+
# Parse the JSON configuration from the environment variable
|
|
65
68
|
except json.JSONDecodeError as e:
|
|
66
69
|
raise ValueError(f"Invalid JSON in NESSY_SPARK_CONFIG: {e}") from e
|
|
67
70
|
|
|
@@ -1,36 +1,36 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cloe-nessy
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.14.1b0
|
|
4
4
|
Summary: Your friendly datalake monster.
|
|
5
|
-
Project-URL: homepage, https://initions.com/
|
|
6
5
|
Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
|
|
7
6
|
License: MIT
|
|
7
|
+
Project-URL: homepage, https://initions.com/
|
|
8
8
|
Classifier: Development Status :: 5 - Production/Stable
|
|
9
9
|
Classifier: Environment :: Console
|
|
10
|
-
Classifier: Intended Audience :: Developers
|
|
11
10
|
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
12
|
Classifier: Operating System :: OS Independent
|
|
13
13
|
Classifier: Programming Language :: Python :: 3
|
|
14
14
|
Classifier: Topic :: Database
|
|
15
15
|
Requires-Python: <3.13,>=3.11
|
|
16
|
-
|
|
17
|
-
Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.8
|
|
18
|
-
Requires-Dist: databricks-sdk<1.0.0,>=0.36.0
|
|
19
|
-
Requires-Dist: fsspec<2025.6.0,>=2025.5.1
|
|
20
|
-
Requires-Dist: httpx<1.0.0,>=0.27.2
|
|
21
|
-
Requires-Dist: jinja2<4.0.0,>=3.1.4
|
|
22
|
-
Requires-Dist: matplotlib<4.0.0,>=3.9.2
|
|
23
|
-
Requires-Dist: networkx<4.0,>=3.3
|
|
24
|
-
Requires-Dist: openpyxl<4.0.0,>=3.1.5
|
|
25
|
-
Requires-Dist: pandas-stubs<3.0.0.0,>=2.2.2.240807
|
|
26
|
-
Requires-Dist: pydantic-settings<3.0.0,>=2.4.0
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
27
17
|
Requires-Dist: pydantic<3.0.0,>=2.7.2
|
|
28
18
|
Requires-Dist: pyyaml<7.0.0,>=6.0.1
|
|
29
|
-
Requires-Dist: requests<3.0.0,>=2.32.3
|
|
30
|
-
Requires-Dist: types-networkx<4.0.0.0,>=3.2.1.20240820
|
|
31
19
|
Requires-Dist: types-pyyaml<7.0.0.0,>=6.0.12.20240311
|
|
20
|
+
Requires-Dist: jinja2<4.0.0,>=3.1.4
|
|
21
|
+
Requires-Dist: pydantic-settings<3.0.0,>=2.4.0
|
|
22
|
+
Requires-Dist: openpyxl<4.0.0,>=3.1.5
|
|
23
|
+
Requires-Dist: requests<3.0.0,>=2.32.3
|
|
32
24
|
Requires-Dist: types-requests<3.0.0.0,>=2.32.0.20240712
|
|
33
|
-
|
|
25
|
+
Requires-Dist: pandas-stubs<3.0.0.0,>=2.2.2.240807
|
|
26
|
+
Requires-Dist: azure-identity<2.0.0,>=1.19.0
|
|
27
|
+
Requires-Dist: httpx<1.0.0,>=0.27.2
|
|
28
|
+
Requires-Dist: databricks-sdk<1.0.0,>=0.36.0
|
|
29
|
+
Requires-Dist: networkx<4.0,>=3.3
|
|
30
|
+
Requires-Dist: matplotlib<4.0.0,>=3.9.2
|
|
31
|
+
Requires-Dist: types-networkx<4.0.0.0,>=3.2.1.20240820
|
|
32
|
+
Requires-Dist: fsspec<2025.7.1,>=2025.7.0
|
|
33
|
+
Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.8
|
|
34
34
|
|
|
35
35
|
# cloe-nessy
|
|
36
36
|
|
|
@@ -30,7 +30,7 @@ cloe_nessy/integration/writer/file_writer.py,sha256=SUDbN13ZzDhbM8DpOGFgM_Gkg70T
|
|
|
30
30
|
cloe_nessy/integration/writer/writer.py,sha256=elFPLFrWR-qVE9qnBtzzzhyRALLQcRVuOsPS0rNmRt4,1741
|
|
31
31
|
cloe_nessy/integration/writer/delta_writer/__init__.py,sha256=h2CT6Hllmk0nodlek27uqwniCzVZKMkYcPGyG9K2Z24,164
|
|
32
32
|
cloe_nessy/integration/writer/delta_writer/delta_append_writer.py,sha256=TbpW-j87_H9dcUza34uR6VWslJez406y3_5N1ip0SnM,4740
|
|
33
|
-
cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=
|
|
33
|
+
cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=no2GOLqMAJd0fEy2mqMevMj_CvutcJPRmXJC2tD4icA,10112
|
|
34
34
|
cloe_nessy/integration/writer/delta_writer/delta_table_operation_type.py,sha256=kiacqQ2FYQSzakJqZ9-ZHH3os4X7--QuER_2xx9y21k,971
|
|
35
35
|
cloe_nessy/integration/writer/delta_writer/delta_writer_base.py,sha256=upUtDZMzwYFU0kzmkelVgkpFToXkrypcR3h_jvGjz14,8596
|
|
36
36
|
cloe_nessy/integration/writer/delta_writer/exceptions.py,sha256=xPmGiYV0xQXauln5Oh34E5vbm0rVcs6xCh-SJSb2bw0,107
|
|
@@ -41,7 +41,7 @@ cloe_nessy/models/catalog.py,sha256=ayC1sMp4cNLAZtu0ICVV3Us6-o4hn8U9tpzzvxC9RAs,
|
|
|
41
41
|
cloe_nessy/models/column.py,sha256=53fBwRnino72XKACsHZpN9QfCBqqSXyKLHZlM0huumg,1988
|
|
42
42
|
cloe_nessy/models/constraint.py,sha256=hsFlhn4n928z81O3dl3v5bMetewPWzMjkJK3_4kASSM,178
|
|
43
43
|
cloe_nessy/models/foreign_key.py,sha256=DwRVHs9sShqqPV-NL7ow_3AmPPWX0Od26yZn_I565pU,1001
|
|
44
|
-
cloe_nessy/models/schema.py,sha256=
|
|
44
|
+
cloe_nessy/models/schema.py,sha256=yUrjjEhAH5zbCymE67Az_jPnVB8hGO-_UNfqzeZCD_Y,3376
|
|
45
45
|
cloe_nessy/models/table.py,sha256=O9vcJ1XBIb6kA-NAI3SNpB5b7MGDo3p4wMJdonPaBfA,12076
|
|
46
46
|
cloe_nessy/models/types.py,sha256=XRbuJGdTNa6aXyE3IAzs_J9gVjbfkzMDLfGl-k6jI_4,223
|
|
47
47
|
cloe_nessy/models/volume.py,sha256=51BE06FrL1Wv6zblFwJ_HTiR6WQqH7pSmrdH90rqwLg,2444
|
|
@@ -53,7 +53,7 @@ cloe_nessy/models/mixins/template_loader_mixin.py,sha256=5MXhEGBFlq3dwZvINEyBowS
|
|
|
53
53
|
cloe_nessy/models/templates/create_table.sql.j2,sha256=QWbiTXwmGaIlZUAIGL4pAlHkDbP9mq1vGAkdKCPOqm4,1669
|
|
54
54
|
cloe_nessy/models/templates/create_volume.sql.j2,sha256=XIUf1cHcvAxcGTyhzUiv4xpQ1cfDw_ra3_FKmOuLoBs,289
|
|
55
55
|
cloe_nessy/object_manager/__init__.py,sha256=3sle0vNpPwBOkycxA3XVS9m4XZf5LD3Qd4NGxdqcHno,186
|
|
56
|
-
cloe_nessy/object_manager/table_manager.py,sha256=
|
|
56
|
+
cloe_nessy/object_manager/table_manager.py,sha256=oYcYiZR0-JyoadcCcDelxfFb-ATeKDIZerYaZc-moiI,12634
|
|
57
57
|
cloe_nessy/object_manager/volume_manager.py,sha256=6epd3KXzcNH04EvaKubAfLsaUm9qBMeT3KNvMK04gGs,2727
|
|
58
58
|
cloe_nessy/pipeline/__init__.py,sha256=sespmJ5JsgyiFyZiedTiL2kg--zGIX7cjTYsD5vemEg,325
|
|
59
59
|
cloe_nessy/pipeline/pipeline.py,sha256=-1tJVs9rZf8CcwieH4IP7mqJZ6mL7bQUZ56TNKt8eO8,11154
|
|
@@ -86,14 +86,15 @@ cloe_nessy/pipeline/actions/transform_select_columns.py,sha256=-GhSEsb7iNnZIsYRm
|
|
|
86
86
|
cloe_nessy/pipeline/actions/transform_union.py,sha256=s81Vge0AbYPc7VkskCYfOQ_LEjqcmfNFyDkytfjcZyo,2720
|
|
87
87
|
cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=j7gRuG3Fedh8JgevIFBbHKock3laJVq4l6Mx3CGU5eo,2676
|
|
88
88
|
cloe_nessy/pipeline/actions/write_delta_append.py,sha256=fuL29SK9G5K14ycckU3iPexeK0XNXUfQscCwhXHxbKA,2498
|
|
89
|
-
cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=
|
|
89
|
+
cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=Hir7QZZZJ9hmQZXiJ9iz6u06OCmcHFpyKFVB_I1saSM,5043
|
|
90
90
|
cloe_nessy/pipeline/actions/write_file.py,sha256=H8LRst045yij-8XJ5pRB9m5d1lZpZjFa0WSVdSFesPo,2984
|
|
91
91
|
cloe_nessy/session/__init__.py,sha256=t7_YjUhJYW3km_FrucaUdbIl1boQtwkyhw_8yE10qzc,74
|
|
92
|
-
cloe_nessy/session/session_manager.py,sha256=
|
|
92
|
+
cloe_nessy/session/session_manager.py,sha256=whWEXenVKan4xy99Y2vShEe4BDcq1viGLUNRW-PyyKo,7908
|
|
93
93
|
cloe_nessy/settings/__init__.py,sha256=ZbkneO3WaKOxon7qHFHnou7EnBOSnBFyKMDZblIEvzM,101
|
|
94
94
|
cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_UpVM,3653
|
|
95
95
|
cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
96
96
|
cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
|
|
97
|
-
cloe_nessy-0.3.
|
|
98
|
-
cloe_nessy-0.3.
|
|
99
|
-
cloe_nessy-0.3.
|
|
97
|
+
cloe_nessy-0.3.14.1b0.dist-info/METADATA,sha256=1GJtUcYEA_BhjoT-op3vn-lG5o19s3hFqgFGF9UXKJc,3294
|
|
98
|
+
cloe_nessy-0.3.14.1b0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
99
|
+
cloe_nessy-0.3.14.1b0.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
|
|
100
|
+
cloe_nessy-0.3.14.1b0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
cloe_nessy
|