cloe-nessy 1.0.3__py3-none-any.whl → 1.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cloe_nessy/integration/delta_loader/delta_load_options.py +1 -1
- cloe_nessy/integration/delta_loader/strategies/delta_cdf_loader.py +1 -1
- cloe_nessy/integration/delta_loader/strategies/delta_timestamp_loader.py +1 -1
- cloe_nessy/integration/reader/api_reader.py +5 -2
- cloe_nessy/integration/reader/excel_reader.py +0 -1
- cloe_nessy/integration/reader/file_reader.py +1 -1
- cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py +9 -10
- cloe_nessy/logging/logger_mixin.py +4 -4
- cloe_nessy/models/mixins/read_instance_mixin.py +5 -1
- cloe_nessy/models/schema.py +2 -1
- cloe_nessy/models/table.py +9 -8
- cloe_nessy/models/volume.py +4 -4
- cloe_nessy/object_manager/table_manager.py +4 -3
- cloe_nessy/pipeline/actions/read_api.py +2 -2
- cloe_nessy/pipeline/actions/read_catalog_table.py +1 -7
- cloe_nessy/pipeline/actions/read_files.py +1 -1
- cloe_nessy/pipeline/actions/transform_deduplication.py +3 -3
- cloe_nessy/pipeline/actions/transform_regex_extract.py +1 -1
- cloe_nessy/pipeline/actions/write_catalog_table.py +1 -1
- cloe_nessy/pipeline/pipeline_plotting_service.py +2 -1
- cloe_nessy/session/session_manager.py +9 -12
- {cloe_nessy-1.0.3.dist-info → cloe_nessy-1.0.5.dist-info}/METADATA +2 -2
- {cloe_nessy-1.0.3.dist-info → cloe_nessy-1.0.5.dist-info}/RECORD +24 -24
- {cloe_nessy-1.0.3.dist-info → cloe_nessy-1.0.5.dist-info}/WHEEL +0 -0
|
@@ -8,7 +8,7 @@ from pydantic import BaseModel
|
|
|
8
8
|
class DeltaLoadOptions(BaseModel):
|
|
9
9
|
"""Options to configure the DeltaLoader.
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
Attributes:
|
|
12
12
|
strategy: Delta load strategy to use.
|
|
13
13
|
delta_load_identifier: Unique delta load identifier used to track the delta load metadata.
|
|
14
14
|
strategy_options: Options used to configure the chosen delta load strategy.
|
|
@@ -11,7 +11,7 @@ from ..delta_loader import DeltaLoader
|
|
|
11
11
|
class DeltaCDFConfig(BaseModel):
|
|
12
12
|
"""This class holds the config for the DeltaCDFLoader.
|
|
13
13
|
|
|
14
|
-
|
|
14
|
+
Attributes:
|
|
15
15
|
deduplication_columns: A list of columns used for deduplication.
|
|
16
16
|
from_commit_version: The starting commit version. If None, it starts from the first viable version.
|
|
17
17
|
to_commit_version: The ending commit version. If None, it goes up to the latest version.
|
|
@@ -13,7 +13,7 @@ from ..delta_loader import DeltaLoader
|
|
|
13
13
|
class DeltaTimestampConfig(BaseModel):
|
|
14
14
|
"""This class holds the config for the DeltaTimestampLoader.
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
Attributes:
|
|
17
17
|
timestamp_filter_cols: A list of columns used for timestamp filtering.
|
|
18
18
|
from_timestamp: The starting timestamp. If None, it starts from the beginning.
|
|
19
19
|
to_timestamp: The ending timestamp. If None, it goes up to the latest timestamp.
|
|
@@ -343,8 +343,9 @@ class APIReader(BaseReader):
|
|
|
343
343
|
pagination_config: Configuration for pagination.
|
|
344
344
|
max_retries: The maximum number of retries for the request.
|
|
345
345
|
backoff_factor: Factor for exponential backoff between retries.
|
|
346
|
-
|
|
347
|
-
|
|
346
|
+
dynamic_requests: A list of RequestSet dictionaries for making multiple API requests dynamically.
|
|
347
|
+
Each RequestSet should contain 'endpoint', 'params', and optionally 'headers', 'data', 'json_body'.
|
|
348
|
+
When provided, the reader will execute all requests and combine the results.
|
|
348
349
|
|
|
349
350
|
Returns:
|
|
350
351
|
DataFrame: The Spark DataFrame containing the read data in the json_object column.
|
|
@@ -457,6 +458,8 @@ class APIReader(BaseReader):
|
|
|
457
458
|
backoff_factor=backoff_factor,
|
|
458
459
|
)
|
|
459
460
|
else:
|
|
461
|
+
if not pagination_config:
|
|
462
|
+
raise ValueError("pagination_config must be provided for paginated requests.")
|
|
460
463
|
response_data = APIReader._read_from_api_with_pagination(
|
|
461
464
|
api_client=api_client,
|
|
462
465
|
endpoint=endpoint,
|
|
@@ -92,7 +92,6 @@ class ExcelDataFrameReader(BaseReader):
|
|
|
92
92
|
pyspark.pandas.read_excel and handed to TextFileReader.
|
|
93
93
|
load_as_strings: If True, converts all columns to string type to avoid datatype conversion errors in Spark.
|
|
94
94
|
add_metadata_column: If True, adds a metadata column containing the file location and sheet name.
|
|
95
|
-
**kwargs: Additional keyword arguments to maintain compatibility with the base class method.
|
|
96
95
|
"""
|
|
97
96
|
if options is None:
|
|
98
97
|
options = {}
|
|
@@ -152,7 +152,7 @@ class FileReader(BaseReader):
|
|
|
152
152
|
"""Reads specified location as a stream and returns streaming DataFrame.
|
|
153
153
|
|
|
154
154
|
Arguments:
|
|
155
|
-
location
|
|
155
|
+
location: Location of files to read.
|
|
156
156
|
format: Format of files to read.
|
|
157
157
|
schema: Schema of the file.
|
|
158
158
|
add_metadata_column: Whether to include __metadata column in the DataFrame.
|
|
@@ -13,7 +13,7 @@ from .delta_writer_base import BaseDeltaWriter
|
|
|
13
13
|
class DeltaMergeConfig(BaseModel):
|
|
14
14
|
"""Configuration for Merge options.
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
Attributes:
|
|
17
17
|
dataframe_columns: The columns of the DataFrame.
|
|
18
18
|
key_columns: List of column names that form the key for the merge
|
|
19
19
|
operation.
|
|
@@ -117,25 +117,23 @@ class DeltaMergeConfig(BaseModel):
|
|
|
117
117
|
return config
|
|
118
118
|
|
|
119
119
|
@model_validator(mode="after")
|
|
120
|
-
|
|
121
|
-
def _validate_partition_pruning(cls, config: Self):
|
|
120
|
+
def _validate_partition_pruning(self) -> Self:
|
|
122
121
|
"""If partition_pruning is set, the partition by columns must be known."""
|
|
123
|
-
if
|
|
122
|
+
if self.use_partition_pruning is True and not self.partition_by:
|
|
124
123
|
raise ValueError("Partition columns must be specified when using partition pruning.")
|
|
125
|
-
return
|
|
124
|
+
return self
|
|
126
125
|
|
|
127
126
|
@model_validator(mode="after")
|
|
128
|
-
|
|
129
|
-
def _validate_cols_exist(cls, config: Any):
|
|
127
|
+
def _validate_cols_exist(self) -> Self:
|
|
130
128
|
"""If partition_pruning is set, the partition by columns must be known."""
|
|
131
|
-
if any(col not in
|
|
132
|
-
col not in
|
|
129
|
+
if any(col not in self.cols_to_merge for col in self.cols_to_update) or any(
|
|
130
|
+
col not in self.cols_to_merge for col in self.cols_to_insert
|
|
133
131
|
):
|
|
134
132
|
raise ValueError(
|
|
135
133
|
"You specified column names for UPDATE or INSERT that either don't exist in the dataframe "
|
|
136
134
|
"or are explicitly excluded from the MERGE.",
|
|
137
135
|
)
|
|
138
|
-
return
|
|
136
|
+
return self
|
|
139
137
|
|
|
140
138
|
|
|
141
139
|
class DeltaMergeWriter(BaseDeltaWriter):
|
|
@@ -220,6 +218,7 @@ class DeltaMergeWriter(BaseDeltaWriter):
|
|
|
220
218
|
kwargs: Passed to the
|
|
221
219
|
[`DeltaMergeConfig`][cloe_nessy.integration.writer.delta_writer.delta_merge_writer.DeltaMergeConfig].
|
|
222
220
|
Common kwargs include:
|
|
221
|
+
|
|
223
222
|
- key_columns: List of target column names to use as merge keys.
|
|
224
223
|
- column_mapping: Dict mapping target column names to source column names.
|
|
225
224
|
- when_matched_update: Whether to update matching records.
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import cast
|
|
2
|
+
from typing import Any, cast
|
|
3
3
|
|
|
4
4
|
from cloe_logging import LoggerFactory
|
|
5
5
|
|
|
@@ -35,7 +35,7 @@ class LoggerMixin:
|
|
|
35
35
|
logging_level=level if level is not None else logging_settings.log_level_console,
|
|
36
36
|
log_format=log_format if log_format is not None else logging_settings.log_format_console,
|
|
37
37
|
)
|
|
38
|
-
return
|
|
38
|
+
return logger
|
|
39
39
|
|
|
40
40
|
def get_tabular_logger(
|
|
41
41
|
self,
|
|
@@ -110,13 +110,13 @@ class LoggerMixin:
|
|
|
110
110
|
log_type=log_type,
|
|
111
111
|
test_connectivity=False,
|
|
112
112
|
)
|
|
113
|
-
return
|
|
113
|
+
return logger
|
|
114
114
|
|
|
115
115
|
@staticmethod
|
|
116
116
|
def should_add_log_analytics_handler(
|
|
117
117
|
logging_settings: LoggingSettings,
|
|
118
118
|
add_log_analytics_logger: bool | None,
|
|
119
|
-
**kwargs, # noqa: ARG004
|
|
119
|
+
**kwargs: Any, # noqa: ARG004
|
|
120
120
|
) -> bool:
|
|
121
121
|
"""Determines if a LogAnalyticsHandler should be added to the logger.
|
|
122
122
|
|
|
@@ -60,7 +60,7 @@ class ReadInstancesMixin(BaseModel):
|
|
|
60
60
|
@classmethod
|
|
61
61
|
def read_instances_from_directory(
|
|
62
62
|
cls,
|
|
63
|
-
instance_path: pathlib.Path,
|
|
63
|
+
instance_path: str | pathlib.Path,
|
|
64
64
|
fail_on_missing_subfolder: bool = True,
|
|
65
65
|
**_: Any, # allow subclasses to pass additional arguments
|
|
66
66
|
) -> tuple[list[Self], list[ValidationErrorType]]:
|
|
@@ -77,6 +77,10 @@ class ReadInstancesMixin(BaseModel):
|
|
|
77
77
|
instances: list[Self] = []
|
|
78
78
|
errors: list[ValidationErrorType] = []
|
|
79
79
|
|
|
80
|
+
# Convert to Path if string
|
|
81
|
+
if isinstance(instance_path, str):
|
|
82
|
+
instance_path = pathlib.Path(instance_path)
|
|
83
|
+
|
|
80
84
|
if not instance_path.exists() or not instance_path.is_dir():
|
|
81
85
|
if fail_on_missing_subfolder:
|
|
82
86
|
raise FileNotFoundError(f"Directory not found: {instance_path}")
|
cloe_nessy/models/schema.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
-
from typing import Any, Self
|
|
2
|
+
from typing import Any, Self, cast
|
|
3
3
|
|
|
4
4
|
from pydantic import Field
|
|
5
5
|
|
|
@@ -43,6 +43,7 @@ class Schema(ReadInstancesMixin):
|
|
|
43
43
|
raise FileNotFoundError("Schema file not found.")
|
|
44
44
|
|
|
45
45
|
schema, schema_errors = super().read_instance_from_file(processed_instance_path)
|
|
46
|
+
schema = cast(Self | None, schema)
|
|
46
47
|
table_errors: list[ValidationErrorType] = []
|
|
47
48
|
if schema:
|
|
48
49
|
schema.storage_path = "" if not schema.storage_path else schema.storage_path
|
cloe_nessy/models/table.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
|
+
import pathlib
|
|
1
2
|
from pathlib import Path
|
|
2
|
-
from typing import Any, Self
|
|
3
|
+
from typing import Any, Self, cast
|
|
3
4
|
|
|
4
5
|
import yaml
|
|
5
6
|
from jinja2 import TemplateNotFound
|
|
@@ -96,16 +97,16 @@ class Table(TemplateLoaderMixin, ReadInstancesMixin, LoggerMixin):
|
|
|
96
97
|
return v
|
|
97
98
|
|
|
98
99
|
@model_validator(mode="after")
|
|
99
|
-
def _validate_is_external(
|
|
100
|
+
def _validate_is_external(self) -> Self:
|
|
100
101
|
"""If is_external is set to True, storage_path has to be set."""
|
|
101
|
-
if
|
|
102
|
+
if self.is_external and self.storage_path is None:
|
|
102
103
|
raise ValueError("is_external cannot be true while storage_path is None.")
|
|
103
|
-
return
|
|
104
|
+
return self
|
|
104
105
|
|
|
105
106
|
@classmethod
|
|
106
|
-
def read_instances_from_directory(
|
|
107
|
+
def read_instances_from_directory( # type: ignore[override]
|
|
107
108
|
cls,
|
|
108
|
-
instance_path: str | Path,
|
|
109
|
+
instance_path: str | pathlib.Path,
|
|
109
110
|
fail_on_missing_subfolder: bool = True,
|
|
110
111
|
catalog_name: str | None = None,
|
|
111
112
|
schema_name: str | None = None,
|
|
@@ -236,7 +237,7 @@ class Table(TemplateLoaderMixin, ReadInstancesMixin, LoggerMixin):
|
|
|
236
237
|
def get_create_statement(
|
|
237
238
|
self,
|
|
238
239
|
replace: bool = True,
|
|
239
|
-
):
|
|
240
|
+
) -> str:
|
|
240
241
|
"""Get the create statement for the Table.
|
|
241
242
|
|
|
242
243
|
Args:
|
|
@@ -257,7 +258,7 @@ class Table(TemplateLoaderMixin, ReadInstancesMixin, LoggerMixin):
|
|
|
257
258
|
self._console_logger.error(f"Template [ {template_name} ] not found.")
|
|
258
259
|
raise err
|
|
259
260
|
render = template.render(table=self, replace=replace)
|
|
260
|
-
return render
|
|
261
|
+
return cast(str, render)
|
|
261
262
|
|
|
262
263
|
def get_column_by_name(self, column_name: str) -> Column | None:
|
|
263
264
|
"""Get a column by name.
|
cloe_nessy/models/volume.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
-
from typing import Any
|
|
2
|
+
from typing import Any, cast
|
|
3
3
|
|
|
4
4
|
from jinja2 import TemplateNotFound
|
|
5
5
|
from pydantic import BaseModel, field_validator
|
|
@@ -25,7 +25,7 @@ class Volume(TemplateLoaderMixin, LoggerMixin, BaseModel):
|
|
|
25
25
|
@property
|
|
26
26
|
def storage_identifier(self) -> str:
|
|
27
27
|
"""Return the storage identifier."""
|
|
28
|
-
return f"/Volumes/{self.catalog}/{self.
|
|
28
|
+
return f"/Volumes/{self.catalog}/{self.schema_name}/{self.name}/"
|
|
29
29
|
|
|
30
30
|
@property
|
|
31
31
|
def catalog(self) -> str:
|
|
@@ -55,7 +55,7 @@ class Volume(TemplateLoaderMixin, LoggerMixin, BaseModel):
|
|
|
55
55
|
def get_create_statement(
|
|
56
56
|
self,
|
|
57
57
|
if_not_exists: bool = True,
|
|
58
|
-
):
|
|
58
|
+
) -> str:
|
|
59
59
|
"""Get the create statement for the Volume.
|
|
60
60
|
|
|
61
61
|
Args:
|
|
@@ -73,4 +73,4 @@ class Volume(TemplateLoaderMixin, LoggerMixin, BaseModel):
|
|
|
73
73
|
self._console_logger.error(f"Template [ {template_name} ] not found.")
|
|
74
74
|
raise err
|
|
75
75
|
render = template.render(volume=self, if_not_exists=if_not_exists)
|
|
76
|
-
return render
|
|
76
|
+
return cast(str, render)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import functools
|
|
2
2
|
import logging
|
|
3
3
|
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Any
|
|
4
5
|
|
|
5
6
|
from delta import DeltaTable # type: ignore
|
|
6
7
|
|
|
@@ -23,7 +24,7 @@ class TableManagerLogs:
|
|
|
23
24
|
)
|
|
24
25
|
|
|
25
26
|
|
|
26
|
-
def table_log_decorator(operation: str):
|
|
27
|
+
def table_log_decorator(operation: str) -> Any:
|
|
27
28
|
"""Creates a decorator that logs the start, failure (if any), and completion of a table operation.
|
|
28
29
|
|
|
29
30
|
The created decorator wraps a function that performs an operation on a table. The decorator logs
|
|
@@ -35,7 +36,7 @@ def table_log_decorator(operation: str):
|
|
|
35
36
|
operation: The name of the operation to be logged. This will be included in the log messages.
|
|
36
37
|
|
|
37
38
|
Returns:
|
|
38
|
-
|
|
39
|
+
A decorator that can be used to wrap a function that performs an operation on a table.
|
|
39
40
|
|
|
40
41
|
Example:
|
|
41
42
|
```python
|
|
@@ -183,7 +184,7 @@ class TableManager(LoggerMixin):
|
|
|
183
184
|
SessionManager.get_utils().fs.rm(location, recurse=True)
|
|
184
185
|
self._console_logger.info("... deleting physical data.")
|
|
185
186
|
|
|
186
|
-
def get_delta_table(self, table: Table | None = None, location: str | None = None, spark=None) -> DeltaTable:
|
|
187
|
+
def get_delta_table(self, table: Table | None = None, location: str | None = None, spark: Any = None) -> DeltaTable:
|
|
187
188
|
"""Get the DeltaTable object from the Table objects location or a location string.
|
|
188
189
|
|
|
189
190
|
For managed tables, uses the table identifier to access the DeltaTable.
|
|
@@ -18,10 +18,10 @@ def process_auth(
|
|
|
18
18
|
result: AuthBase | None = None
|
|
19
19
|
|
|
20
20
|
if isinstance(auth, list):
|
|
21
|
-
auths = [process_auth(sub_auth) for sub_auth in auth]
|
|
21
|
+
auths = [process_auth(sub_auth) for sub_auth in auth] # type: ignore[arg-type]
|
|
22
22
|
result = ChainedAuth(*auths)
|
|
23
23
|
elif isinstance(auth, dict):
|
|
24
|
-
match auth.get("type"):
|
|
24
|
+
match auth.get("type"): # type: ignore[arg-type]
|
|
25
25
|
case "basic":
|
|
26
26
|
result = HTTPBasicAuth(auth["username"], auth["password"])
|
|
27
27
|
case "secret_scope":
|
|
@@ -65,8 +65,8 @@ class ReadCatalogTableAction(PipelineAction):
|
|
|
65
65
|
|
|
66
66
|
name: str = "READ_CATALOG_TABLE"
|
|
67
67
|
|
|
68
|
-
@staticmethod
|
|
69
68
|
def run(
|
|
69
|
+
self,
|
|
70
70
|
context: PipelineContext,
|
|
71
71
|
*,
|
|
72
72
|
table_identifier: str | None = None,
|
|
@@ -90,12 +90,6 @@ class ReadCatalogTableAction(PipelineAction):
|
|
|
90
90
|
Configures the [`DeltaLoader`][cloe_nessy.integration.delta_loader].
|
|
91
91
|
behavior, such as filters or reading modes.
|
|
92
92
|
stream: If True, the action will read the table as a stream.
|
|
93
|
-
checkpoint_location: The location for storing
|
|
94
|
-
checkpoints if streaming is enabled.
|
|
95
|
-
trigger_dict: A dictionary specifying the trigger
|
|
96
|
-
configuration for the streaming query, such as processing time or
|
|
97
|
-
continuous processing.
|
|
98
|
-
behavior, such as filters or reading modes. Defaults to None.
|
|
99
93
|
|
|
100
94
|
Raises:
|
|
101
95
|
ValueError: If neither `table_identifier` nor `table_metadata.identifier` in the `context` is provided.
|
|
@@ -71,7 +71,7 @@ class TransformDeduplication(PipelineAction):
|
|
|
71
71
|
raise ValueError("The key_columns and order_by_columns cannot contain the same column")
|
|
72
72
|
|
|
73
73
|
# check if the key_columns and order_by_columns are not null
|
|
74
|
-
df_nulls = context.data.filter(F.greatest(*[F.col(c).isNull() for c in key_columns + order_by_columns]) == 1)
|
|
74
|
+
df_nulls = context.data.filter(F.greatest(*[F.col(c).isNull() for c in key_columns + order_by_columns]) == 1) # type: ignore[misc]
|
|
75
75
|
if df_nulls.head(1): # if the filteredDataFrame is not empty
|
|
76
76
|
raise ValueError(
|
|
77
77
|
"The key_columns and order_by_columns cannot be null. Please check the quality of the provided columns (null handling)"
|
|
@@ -100,9 +100,9 @@ class TransformDeduplication(PipelineAction):
|
|
|
100
100
|
|
|
101
101
|
# sort the order_by columns in the preferred order
|
|
102
102
|
if descending:
|
|
103
|
-
order_by_list = [F.col(col_name).desc() for col_name in order_by_columns]
|
|
103
|
+
order_by_list = [F.col(col_name).desc() for col_name in order_by_columns] # type: ignore[misc]
|
|
104
104
|
else:
|
|
105
|
-
order_by_list = [F.col(col_name).asc() for col_name in order_by_columns]
|
|
105
|
+
order_by_list = [F.col(col_name).asc() for col_name in order_by_columns] # type: ignore[misc]
|
|
106
106
|
|
|
107
107
|
window_specification = (
|
|
108
108
|
Window.partitionBy(key_columns)
|
|
@@ -148,7 +148,7 @@ class TransformRegexExtract(PipelineAction):
|
|
|
148
148
|
# Check if extraction is null or empty string
|
|
149
149
|
df = df.withColumn(
|
|
150
150
|
match_info_column_name,
|
|
151
|
-
F.when((F.col(extract_column_name).isNull()) | (F.col(extract_column_name) == ""), False).otherwise(
|
|
151
|
+
F.when((F.col(extract_column_name).isNull()) | (F.col(extract_column_name) == ""), False).otherwise( # type: ignore[misc]
|
|
152
152
|
True
|
|
153
153
|
),
|
|
154
154
|
)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Pipeline plotting service for visualizing pipeline graphs using matplotlib."""
|
|
2
2
|
|
|
3
3
|
import textwrap
|
|
4
|
+
from typing import Any
|
|
4
5
|
|
|
5
6
|
import matplotlib.patches as patches
|
|
6
7
|
import matplotlib.pyplot as plt
|
|
@@ -25,7 +26,7 @@ class PipelinePlottingService(LoggerMixin):
|
|
|
25
26
|
}
|
|
26
27
|
self._console_logger = self.get_console_logger()
|
|
27
28
|
|
|
28
|
-
def plot_graph(self, pipeline, save_path: str | None = None):
|
|
29
|
+
def plot_graph(self, pipeline: Any, save_path: str | None = None):
|
|
29
30
|
"""Plot and save the pipeline graph as an image.
|
|
30
31
|
|
|
31
32
|
Args:
|
|
@@ -234,22 +234,19 @@ class SessionManager(LoggerMixin):
|
|
|
234
234
|
"""Get the SparkSession builder based on the current environment."""
|
|
235
235
|
if cls._env is None:
|
|
236
236
|
cls._detect_env()
|
|
237
|
-
builders = {
|
|
238
|
-
cls.Environment.DATABRICKS_UI: SparkSession.builder,
|
|
239
|
-
cls.Environment.FABRIC_UI: SparkSession.builder,
|
|
240
|
-
cls.Environment.DATABRICKS_CONNECT: cls._get_databricks_connect_builder,
|
|
241
|
-
cls.Environment.OTHER_REMOTE_SPARK: SparkSession.builder,
|
|
242
|
-
cls.Environment.STANDALONE_SPARK: SparkSession.builder,
|
|
243
|
-
}
|
|
244
|
-
builder = builders.get(cls._env)
|
|
245
|
-
if builder is None:
|
|
246
|
-
raise ValueError(f"Unsupported environment: {cls._env}")
|
|
247
237
|
|
|
248
238
|
match cls._env:
|
|
249
239
|
case cls.Environment.DATABRICKS_CONNECT:
|
|
250
|
-
return
|
|
240
|
+
return cls._get_databricks_connect_builder()
|
|
241
|
+
case (
|
|
242
|
+
cls.Environment.DATABRICKS_UI
|
|
243
|
+
| cls.Environment.FABRIC_UI
|
|
244
|
+
| cls.Environment.OTHER_REMOTE_SPARK
|
|
245
|
+
| cls.Environment.STANDALONE_SPARK
|
|
246
|
+
):
|
|
247
|
+
return SparkSession.builder
|
|
251
248
|
case _:
|
|
252
|
-
|
|
249
|
+
raise ValueError(f"Unsupported environment: {cls._env}")
|
|
253
250
|
|
|
254
251
|
@staticmethod
|
|
255
252
|
def _get_databricks_connect_builder():
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cloe-nessy
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.5
|
|
4
4
|
Summary: Your friendly datalake monster.
|
|
5
5
|
Project-URL: homepage, https://initions.com/
|
|
6
6
|
Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
|
|
@@ -16,7 +16,7 @@ Requires-Python: <3.14,>=3.11
|
|
|
16
16
|
Requires-Dist: azure-identity<2.0.0,>=1.19.0
|
|
17
17
|
Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.8
|
|
18
18
|
Requires-Dist: databricks-sdk<1.0.0,>=0.36.0
|
|
19
|
-
Requires-Dist: fsspec<
|
|
19
|
+
Requires-Dist: fsspec<2026.1.1,>=2026.1.0
|
|
20
20
|
Requires-Dist: httpx<1.0.0,>=0.27.2
|
|
21
21
|
Requires-Dist: jinja2<4.0.0,>=3.1.4
|
|
22
22
|
Requires-Dist: matplotlib<4.0.0,>=3.9.2
|
|
@@ -20,19 +20,19 @@ cloe_nessy/file_utilities/strategies/onelake_strategy.py,sha256=Pni_JkKqfbKoEMOC
|
|
|
20
20
|
cloe_nessy/file_utilities/strategies/utils_strategy.py,sha256=w4nrS6IcPPN7UBFBwszCfxgTI6xSE5BdY2WiqGYsFyI,3223
|
|
21
21
|
cloe_nessy/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
22
|
cloe_nessy/integration/delta_loader/__init__.py,sha256=ZdBDde1uPtTCL_KAhilVmtVmmGvH5dHb05QsOozkteE,438
|
|
23
|
-
cloe_nessy/integration/delta_loader/delta_load_options.py,sha256=
|
|
23
|
+
cloe_nessy/integration/delta_loader/delta_load_options.py,sha256=W3P3gwtkBW3pWbUEOK6agvJfj7QivXhEr_s0dwxhJxQ,1369
|
|
24
24
|
cloe_nessy/integration/delta_loader/delta_loader.py,sha256=WOl44Udvo6hZ5PVFgabpehs8tt5nl9AYyDnnYBba5Ck,6872
|
|
25
25
|
cloe_nessy/integration/delta_loader/delta_loader_factory.py,sha256=vB1cL6-Nc3SkLH1xtazMbMF1MnNYq8-g3GHZzRE3QmE,2251
|
|
26
26
|
cloe_nessy/integration/delta_loader/delta_loader_metadata_table.py,sha256=G_EWUY76ZlbsPZB9LCGlOLVezk7DK6peYXEgt7-sTQE,1683
|
|
27
27
|
cloe_nessy/integration/delta_loader/strategies/__init__.py,sha256=1o5fRWenL5KnUg1hf7kmTuTpG9pbMxchiQTub52Qvwo,255
|
|
28
|
-
cloe_nessy/integration/delta_loader/strategies/delta_cdf_loader.py,sha256=
|
|
29
|
-
cloe_nessy/integration/delta_loader/strategies/delta_timestamp_loader.py,sha256=
|
|
28
|
+
cloe_nessy/integration/delta_loader/strategies/delta_cdf_loader.py,sha256=sDilieWLmaw8JsjQcNRL4znmLZVvkJAzy0exjuTHUKk,16688
|
|
29
|
+
cloe_nessy/integration/delta_loader/strategies/delta_timestamp_loader.py,sha256=QV2smynYVfi3W7goKotPrGMPkahvIFEWT32LO56eWEI,6191
|
|
30
30
|
cloe_nessy/integration/reader/__init__.py,sha256=NWQx-v6aKE8YOHhsxfeaZnMVq4KLKyRWXzUduf5aVsk,265
|
|
31
|
-
cloe_nessy/integration/reader/api_reader.py,sha256=
|
|
31
|
+
cloe_nessy/integration/reader/api_reader.py,sha256=H6alDoCuGsxuhxpuN-JbL9-eMDchE9ZMq9P0hnz7t1I,20007
|
|
32
32
|
cloe_nessy/integration/reader/catalog_reader.py,sha256=DlnykmFjV_v8SCBh3qaCvf24QM-6TdMFVHx5Mqv7Nvs,4850
|
|
33
|
-
cloe_nessy/integration/reader/excel_reader.py,sha256=
|
|
33
|
+
cloe_nessy/integration/reader/excel_reader.py,sha256=dq_XbuQ_BcQwFuxld-12vzBA2F7hVhNkmMJvgAM-_R8,7937
|
|
34
34
|
cloe_nessy/integration/reader/exceptions.py,sha256=_A9jFpe_RIDZCGY76qzjic9bsshxns6yXPSl141dq1c,203
|
|
35
|
-
cloe_nessy/integration/reader/file_reader.py,sha256=
|
|
35
|
+
cloe_nessy/integration/reader/file_reader.py,sha256=1pf3kVk8UMEf0JPQiwhMLIszl55aLYaEDwS2Fp_9TT8,8261
|
|
36
36
|
cloe_nessy/integration/reader/reader.py,sha256=YHriYkzsBduBjfI2FnP03VEo15a8UCRZ_sXtre8eaEs,1041
|
|
37
37
|
cloe_nessy/integration/writer/__init__.py,sha256=3yzCAGiWZdQWtsbzlTih01sxVTJV2DDYwvl34lEAUlE,243
|
|
38
38
|
cloe_nessy/integration/writer/catalog_writer.py,sha256=dQeXmtfs7J6rP6Ye3OCvxBraFScFX_3SHs7Md58hEeM,5296
|
|
@@ -40,30 +40,30 @@ cloe_nessy/integration/writer/file_writer.py,sha256=SUDbN13ZzDhbM8DpOGFgM_Gkg70T
|
|
|
40
40
|
cloe_nessy/integration/writer/writer.py,sha256=elFPLFrWR-qVE9qnBtzzzhyRALLQcRVuOsPS0rNmRt4,1741
|
|
41
41
|
cloe_nessy/integration/writer/delta_writer/__init__.py,sha256=h2CT6Hllmk0nodlek27uqwniCzVZKMkYcPGyG9K2Z24,164
|
|
42
42
|
cloe_nessy/integration/writer/delta_writer/delta_append_writer.py,sha256=nribgHmapp59v3Rw_AfJg0_BRYhP7x2IJIeE74Ia_6A,4748
|
|
43
|
-
cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=
|
|
43
|
+
cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=FvYNXK5k7tHC7Ek4j-q_oAQnuWyDnqWSDtkA9111wvk,13118
|
|
44
44
|
cloe_nessy/integration/writer/delta_writer/delta_table_operation_type.py,sha256=m4YFY9_WgaOcnpBviVt3Km-w3wf3NF25wPS-n0NBGcE,970
|
|
45
45
|
cloe_nessy/integration/writer/delta_writer/delta_writer_base.py,sha256=B7PwPHKrsJL0ZxBT-H9wWSy0gn7shqNDJ0AbrpMHyMg,10135
|
|
46
46
|
cloe_nessy/integration/writer/delta_writer/exceptions.py,sha256=xPmGiYV0xQXauln5Oh34E5vbm0rVcs6xCh-SJSb2bw0,107
|
|
47
47
|
cloe_nessy/logging/__init__.py,sha256=ySVCVbdyR3Dno_tl2ZfiER_7EVaDoQMHVkNyfdMZumY,65
|
|
48
|
-
cloe_nessy/logging/logger_mixin.py,sha256=
|
|
48
|
+
cloe_nessy/logging/logger_mixin.py,sha256=xA12LIVn4yUEoaKmm7nGC1-U3ddSo_HiL3I5MfkvEwU,7409
|
|
49
49
|
cloe_nessy/models/__init__.py,sha256=-FmWEJ1Oq1njSopjc0R7GmT64mLSmALkm8PkHNzy9Y8,327
|
|
50
50
|
cloe_nessy/models/catalog.py,sha256=ayC1sMp4cNLAZtu0ICVV3Us6-o4hn8U9tpzzvxC9RAs,177
|
|
51
51
|
cloe_nessy/models/column.py,sha256=W4V1Ls1d60VyZ1Ko9Yu9eSipcMbxSzKicn0aloHPiR0,2027
|
|
52
52
|
cloe_nessy/models/constraint.py,sha256=hsFlhn4n928z81O3dl3v5bMetewPWzMjkJK3_4kASSM,178
|
|
53
53
|
cloe_nessy/models/foreign_key.py,sha256=DwRVHs9sShqqPV-NL7ow_3AmPPWX0Od26yZn_I565pU,1001
|
|
54
|
-
cloe_nessy/models/schema.py,sha256=
|
|
55
|
-
cloe_nessy/models/table.py,sha256=
|
|
54
|
+
cloe_nessy/models/schema.py,sha256=9RA31esHyj9saLeHvvYzK9wjK3GNnr15UO66NtSM368,3478
|
|
55
|
+
cloe_nessy/models/table.py,sha256=XG6MazeamF53AV44L-MCnkKBceXbnVU76mCs9GTB5Lg,12171
|
|
56
56
|
cloe_nessy/models/types.py,sha256=XRbuJGdTNa6aXyE3IAzs_J9gVjbfkzMDLfGl-k6jI_4,223
|
|
57
|
-
cloe_nessy/models/volume.py,sha256=
|
|
57
|
+
cloe_nessy/models/volume.py,sha256=kfDDaCL6GzZsv6SshsyXO0NMe7a-yYv_GlLEWk6DOjo,2473
|
|
58
58
|
cloe_nessy/models/adapter/__init__.py,sha256=m36W_mqwB3dCYnCIt0fLOSHS4E1VU8FRGoaum4Gf95o,90
|
|
59
59
|
cloe_nessy/models/adapter/unity_catalog_adapter.py,sha256=a-14Ys-AevVYQd0xeJU1syLxjT5Wzo4uog1hFSEs76M,12651
|
|
60
60
|
cloe_nessy/models/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
61
|
-
cloe_nessy/models/mixins/read_instance_mixin.py,sha256=
|
|
61
|
+
cloe_nessy/models/mixins/read_instance_mixin.py,sha256=HT42qor6IltR-BTfe_DYa5ylntE7qbArs-3cSjo4KXU,4649
|
|
62
62
|
cloe_nessy/models/mixins/template_loader_mixin.py,sha256=5MXhEGBFlq3dwZvINEyBowSlipNnVun2H_TmhI_fsS4,549
|
|
63
63
|
cloe_nessy/models/templates/create_table.sql.j2,sha256=71JpUyUZ_ZYO2M0tfIrTXHR7JycypAGsELt2-2d3oO0,2479
|
|
64
64
|
cloe_nessy/models/templates/create_volume.sql.j2,sha256=XIUf1cHcvAxcGTyhzUiv4xpQ1cfDw_ra3_FKmOuLoBs,289
|
|
65
65
|
cloe_nessy/object_manager/__init__.py,sha256=3sle0vNpPwBOkycxA3XVS9m4XZf5LD3Qd4NGxdqcHno,186
|
|
66
|
-
cloe_nessy/object_manager/table_manager.py,sha256=
|
|
66
|
+
cloe_nessy/object_manager/table_manager.py,sha256=1LcwHvwRlHF8o4iiECg1gkAGVy5Wkpo1HBXsZlFGJXU,13907
|
|
67
67
|
cloe_nessy/object_manager/volume_manager.py,sha256=6epd3KXzcNH04EvaKubAfLsaUm9qBMeT3KNvMK04gGs,2727
|
|
68
68
|
cloe_nessy/pipeline/__init__.py,sha256=BUzL4HJaCXWmK7OgKaxdwK72JrrdzfzIvyxOGtM28U0,417
|
|
69
69
|
cloe_nessy/pipeline/pipeline.py,sha256=L4wk3b06LNWRj01nnAkuQpeRrwFTyaV1xTpgYAg4sak,10819
|
|
@@ -72,20 +72,20 @@ cloe_nessy/pipeline/pipeline_builder.py,sha256=_BBl43two0pherkTXZ-Yrpt6XcLW8Q-Z9
|
|
|
72
72
|
cloe_nessy/pipeline/pipeline_config.py,sha256=oVQ-IH4etTGZVVEnE-5iDPLYOtWpvDlltWFv1nevnqQ,3229
|
|
73
73
|
cloe_nessy/pipeline/pipeline_context.py,sha256=eCOcjyE16rGRom3L85Gy_BbncfQD6i1x31yrWqZws-4,1881
|
|
74
74
|
cloe_nessy/pipeline/pipeline_parsing_service.py,sha256=eeC4RbGBILGN6zkbUyjH-qGgEMtOWV4Kv_VxrHbHMY0,9021
|
|
75
|
-
cloe_nessy/pipeline/pipeline_plotting_service.py,sha256=
|
|
75
|
+
cloe_nessy/pipeline/pipeline_plotting_service.py,sha256=OjdYDQW19yXjdcqs7bDMlyWmv0cQz7Qn4I2cakBgN2E,13139
|
|
76
76
|
cloe_nessy/pipeline/pipeline_step.py,sha256=oTnlvRpB0fbOBQXbPe1URstA5fv-97igCHt_41fKCAk,2082
|
|
77
77
|
cloe_nessy/pipeline/actions/__init__.py,sha256=FfAnSIl-0T6pnaWhClkDqV8nfTdvLvZZJdwycsZMLPw,2990
|
|
78
|
-
cloe_nessy/pipeline/actions/read_api.py,sha256=
|
|
79
|
-
cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=
|
|
78
|
+
cloe_nessy/pipeline/actions/read_api.py,sha256=YMOWPCyxitU5v6HHH_AZqpbHQenPU4-WlaOrb-NsIIk,16245
|
|
79
|
+
cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=MK67NIB5qXTDwCC0EFwHYoOkelFnwY6Z4REpC5BlFb4,6359
|
|
80
80
|
cloe_nessy/pipeline/actions/read_excel.py,sha256=IG_VmDEt1TvGVEO0SY9Fm3awHNjfisR1_7DUmhC3NEE,7968
|
|
81
|
-
cloe_nessy/pipeline/actions/read_files.py,sha256=
|
|
81
|
+
cloe_nessy/pipeline/actions/read_files.py,sha256=o251vfM0S3JxAaEeRQHlBMIlRqhcLlX3fsaPWVKtkBg,7304
|
|
82
82
|
cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=i8fQceV63eAqx_x0ANisCkXWfMHyhqsfFHVFH5yP2po,3544
|
|
83
83
|
cloe_nessy/pipeline/actions/transform_change_datatype.py,sha256=24Tn6R3TvUkWCh8V6naLdyNbCbqvyPOOoer-hy_Ebq4,2077
|
|
84
84
|
cloe_nessy/pipeline/actions/transform_clean_column_names.py,sha256=VxvWqENW63c50L96JA1V_ioe4By6gGzx_iY86njOXEM,3044
|
|
85
85
|
cloe_nessy/pipeline/actions/transform_concat_columns.py,sha256=Nk8YbhxDnFZsWzW9Dj5Yl76Uq6VrcMlevQPHGms65L8,3777
|
|
86
86
|
cloe_nessy/pipeline/actions/transform_convert_timestamp.py,sha256=2SL078tBcOmytDbt-cR81jZbclwqELsUB4XDLjaCnNo,3579
|
|
87
87
|
cloe_nessy/pipeline/actions/transform_decode.py,sha256=_TQc2GFcgdJvtt6BVrCe1xVnJiSHB_J6mEHH01xIKMY,4464
|
|
88
|
-
cloe_nessy/pipeline/actions/transform_deduplication.py,sha256=
|
|
88
|
+
cloe_nessy/pipeline/actions/transform_deduplication.py,sha256=wOsyiBZOFCFyS9xIPyO1Tie6uBWJuj7XFqlt-cDCEWg,4976
|
|
89
89
|
cloe_nessy/pipeline/actions/transform_distinct.py,sha256=c7aBxANyqT4aKhm0cSELDtD-bP0Se9vxlBF0K4AgQWs,1976
|
|
90
90
|
cloe_nessy/pipeline/actions/transform_filter.py,sha256=Nz_ggRfKIcNzYFfFOsgq1QeatjdEis0up4I7cOWBdyo,1446
|
|
91
91
|
cloe_nessy/pipeline/actions/transform_generic_sql.py,sha256=_naWfmPdYAUKjPNeHu5qJAohOL7DHCSYz_kwoeRv3OI,2741
|
|
@@ -93,13 +93,13 @@ cloe_nessy/pipeline/actions/transform_group_aggregate.py,sha256=KUHeeP-RIDi34dpb
|
|
|
93
93
|
cloe_nessy/pipeline/actions/transform_hash_columns.py,sha256=M5_wolJwzJpPTSrZq4yWV3TH7H6BGqbjJkJCwtqPlQo,8507
|
|
94
94
|
cloe_nessy/pipeline/actions/transform_join.py,sha256=ez1M1wVc9khOZj1swMArJbBKXxEpjenUHrW1wL8H330,7200
|
|
95
95
|
cloe_nessy/pipeline/actions/transform_json_normalize.py,sha256=petF7pnNq1EKc8MqVdG0weFALAHNILSe_eAu4Z5XxIo,4833
|
|
96
|
-
cloe_nessy/pipeline/actions/transform_regex_extract.py,sha256=
|
|
96
|
+
cloe_nessy/pipeline/actions/transform_regex_extract.py,sha256=HCn2OzUOkxaiFg5GssVvsIwrBvAo-xb4sRu4lMgKMcE,6412
|
|
97
97
|
cloe_nessy/pipeline/actions/transform_rename_columns.py,sha256=4zJcPCONMU4C67qeuzsrX3AORRRHoq_selUI7FJyeg0,1952
|
|
98
98
|
cloe_nessy/pipeline/actions/transform_replace_values.py,sha256=1OPHTrjcphfyGepcO7ozYfeqfwA18pjlyHpVKUS_AAU,2049
|
|
99
99
|
cloe_nessy/pipeline/actions/transform_select_columns.py,sha256=-GhSEsb7iNnZIsYRm3BG9BX4_qUDJMbpj1DsKPY046w,4574
|
|
100
100
|
cloe_nessy/pipeline/actions/transform_union.py,sha256=SZtEzh567CIExUj9yMEgshE28h4dXKT7Wr2TDj4zB4k,2718
|
|
101
101
|
cloe_nessy/pipeline/actions/transform_with_column.py,sha256=c-E1yYkeYmovbN1maT7ImpdQlW0nYvYsHCtDvfe4wt8,3357
|
|
102
|
-
cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=
|
|
102
|
+
cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=rb4UOzJhe4tU3GEhAerPKWlzxW87FftyeqgMrRGTnYk,4848
|
|
103
103
|
cloe_nessy/pipeline/actions/write_delta_append.py,sha256=e1g4mDhwAZdKyt4Gb7ZzHcQrJ1duSl8qOn6ONizRsoM,2934
|
|
104
104
|
cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=kZL2PTIwB6Mj4UKg5f9SvU1VaakuYfFoymlcLf-L7dA,6443
|
|
105
105
|
cloe_nessy/pipeline/actions/write_file.py,sha256=JZ8UZslxUn_ttYt5wDyvtHFq2FqYk3vOR8kvExJI8pk,3212
|
|
@@ -107,12 +107,12 @@ cloe_nessy/pipeline/utils/__init__.py,sha256=xi02UjBMiXWD7b9gDvww4gyRyowb0eRd_6W
|
|
|
107
107
|
cloe_nessy/pipeline/utils/delta_load_utils.py,sha256=KitMNruxePEkecI0h4Jint1JwJpaEog5mCOchMkgan8,1495
|
|
108
108
|
cloe_nessy/session/__init__.py,sha256=-MifkekjFu_3A9rWm30CGFQZ4yjruGaydNpbu3uq7Ww,155
|
|
109
109
|
cloe_nessy/session/pyspark_compat.py,sha256=NrgSWAaWz3GgMNLmzpY4cPgneQytNQlOq_dWrD1MveE,444
|
|
110
|
-
cloe_nessy/session/session_manager.py,sha256=
|
|
110
|
+
cloe_nessy/session/session_manager.py,sha256=Bn-AKR0H8LvMRTqZIw2OPo-ta8C4_TNNdyl_RtftnZY,9541
|
|
111
111
|
cloe_nessy/settings/__init__.py,sha256=ZbkneO3WaKOxon7qHFHnou7EnBOSnBFyKMDZblIEvzM,101
|
|
112
112
|
cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_UpVM,3653
|
|
113
113
|
cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
114
114
|
cloe_nessy/utils/column_names.py,sha256=dCNtm61mc5aLkY2oE4rlfN3VLCrpot6fOESjAZmCmhA,361
|
|
115
115
|
cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
|
|
116
|
-
cloe_nessy-1.0.
|
|
117
|
-
cloe_nessy-1.0.
|
|
118
|
-
cloe_nessy-1.0.
|
|
116
|
+
cloe_nessy-1.0.5.dist-info/METADATA,sha256=NnYdhZTmeAouyXDAUTj4ELCOKUzNtDBkd_Rfvlf7ygM,3289
|
|
117
|
+
cloe_nessy-1.0.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
118
|
+
cloe_nessy-1.0.5.dist-info/RECORD,,
|
|
File without changes
|