cloe-nessy 0.3.14.4b0__py3-none-any.whl → 0.3.14.6b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cloe_nessy/pipeline/actions/transform_clean_column_names.py +2 -2
- cloe_nessy/session/session_manager.py +62 -32
- {cloe_nessy-0.3.14.4b0.dist-info → cloe_nessy-0.3.14.6b0.dist-info}/METADATA +1 -2
- {cloe_nessy-0.3.14.4b0.dist-info → cloe_nessy-0.3.14.6b0.dist-info}/RECORD +6 -6
- {cloe_nessy-0.3.14.4b0.dist-info → cloe_nessy-0.3.14.6b0.dist-info}/WHEEL +0 -0
- {cloe_nessy-0.3.14.4b0.dist-info → cloe_nessy-0.3.14.6b0.dist-info}/top_level.txt +0 -0
|
@@ -57,14 +57,14 @@ class TransformCleanColumnNamesAction(PipelineAction):
|
|
|
57
57
|
|
|
58
58
|
for c in context.data.schema:
|
|
59
59
|
old_name = c.name
|
|
60
|
-
new_name = re.sub(single_underscrore_at_beginning, "__", re.sub("\W", "_", old_name))
|
|
60
|
+
new_name = re.sub(single_underscrore_at_beginning, "__", re.sub(r"\W", "_", old_name))
|
|
61
61
|
with_columns_renamed[old_name] = new_name
|
|
62
62
|
|
|
63
63
|
if isinstance(c.dataType, (T.StructType | T.ArrayType | T.MapType)):
|
|
64
64
|
old_column_schema = c.dataType.json()
|
|
65
65
|
new_column_schema = re.sub(
|
|
66
66
|
r'(?<="name":")[^"]+',
|
|
67
|
-
lambda m: re.sub("\W", "_", str(m.group())),
|
|
67
|
+
lambda m: re.sub(r"\W", "_", str(m.group())),
|
|
68
68
|
old_column_schema,
|
|
69
69
|
)
|
|
70
70
|
if isinstance(c.dataType, T.StructType):
|
|
@@ -5,9 +5,15 @@ from typing import Any
|
|
|
5
5
|
|
|
6
6
|
from pyspark.sql import SparkSession
|
|
7
7
|
|
|
8
|
+
from ..logging import LoggerMixin
|
|
8
9
|
|
|
9
|
-
|
|
10
|
-
|
|
10
|
+
|
|
11
|
+
class SessionManager(LoggerMixin):
|
|
12
|
+
"""SessionManager is a singleton class that manages the SparkSession instance.
|
|
13
|
+
|
|
14
|
+
Logging can be configured via the nessy settings framework. The LoggerMixin provides
|
|
15
|
+
console logging capabilities with debug-level environment detection information.
|
|
16
|
+
"""
|
|
11
17
|
|
|
12
18
|
class Environment(Enum):
|
|
13
19
|
"""Enumeration of execution environments for Spark utilities.
|
|
@@ -60,11 +66,14 @@ class SessionManager:
|
|
|
60
66
|
nessy_spark_config = os.getenv("NESSY_SPARK_CONFIG")
|
|
61
67
|
if nessy_spark_config:
|
|
62
68
|
try:
|
|
63
|
-
|
|
64
|
-
if "remote" in
|
|
65
|
-
builder = builder.remote(
|
|
66
|
-
del
|
|
67
|
-
|
|
69
|
+
env_config = json.loads(nessy_spark_config)
|
|
70
|
+
if "remote" in env_config:
|
|
71
|
+
builder = builder.remote(env_config["remote"])
|
|
72
|
+
del env_config["remote"]
|
|
73
|
+
if config is None:
|
|
74
|
+
config = env_config
|
|
75
|
+
else:
|
|
76
|
+
config.update(env_config)
|
|
68
77
|
except json.JSONDecodeError as e:
|
|
69
78
|
raise ValueError(f"Invalid JSON in NESSY_SPARK_CONFIG: {e}") from e
|
|
70
79
|
|
|
@@ -152,58 +161,79 @@ class SessionManager:
|
|
|
152
161
|
RuntimeError: If the environment cannot be detected due to
|
|
153
162
|
import errors or other exceptions.
|
|
154
163
|
"""
|
|
164
|
+
# Create a temporary instance to access LoggerMixin methods
|
|
165
|
+
temp_instance = cls()
|
|
166
|
+
logger = temp_instance.get_console_logger()
|
|
167
|
+
|
|
155
168
|
if cls._env is not None:
|
|
169
|
+
logger.debug(f"Environment already detected: {cls._env}")
|
|
156
170
|
return cls._env
|
|
157
171
|
|
|
158
|
-
|
|
159
|
-
from databricks.sdk.dbutils import RemoteDbUtils # type: ignore
|
|
172
|
+
logger.debug("Starting environment detection...")
|
|
160
173
|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
except (ImportError, NameError):
|
|
165
|
-
pass
|
|
174
|
+
# Debug: Print relevant environment variables
|
|
175
|
+
databricks_host = os.getenv("DATABRICKS_HOST")
|
|
176
|
+
nessy_spark_config = os.getenv("NESSY_SPARK_CONFIG")
|
|
166
177
|
|
|
167
|
-
|
|
168
|
-
|
|
178
|
+
logger.debug(f"DATABRICKS_HOST = {databricks_host}")
|
|
179
|
+
logger.debug(f"NESSY_SPARK_CONFIG = {nessy_spark_config}")
|
|
169
180
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
181
|
+
if nessy_spark_config:
|
|
182
|
+
try:
|
|
183
|
+
config = json.loads(nessy_spark_config)
|
|
184
|
+
if "remote" in config:
|
|
185
|
+
logger.debug(f"Remote Spark configuration detected: {config['remote']}")
|
|
186
|
+
cls._env = cls.Environment.OTHER_REMOTE_SPARK
|
|
187
|
+
return cls.Environment.OTHER_REMOTE_SPARK
|
|
188
|
+
cls._env = cls.Environment.STANDALONE_SPARK
|
|
189
|
+
return cls.Environment.STANDALONE_SPARK
|
|
190
|
+
except json.JSONDecodeError as e:
|
|
191
|
+
logger.error(f"Invalid JSON in NESSY_SPARK_CONFIG: {e}")
|
|
192
|
+
raise ValueError(f"Invalid JSON in NESSY_SPARK_CONFIG: {e}") from e
|
|
174
193
|
|
|
194
|
+
logger.debug("Checking for Databricks UI...")
|
|
175
195
|
try:
|
|
176
196
|
from dbruntime.dbutils import DBUtils # type: ignore [import-not-found] # noqa: F401
|
|
177
197
|
|
|
198
|
+
logger.debug("✓ Detected DATABRICKS_UI via dbruntime.dbutils")
|
|
178
199
|
cls._env = cls.Environment.DATABRICKS_UI
|
|
179
200
|
return cls._env
|
|
180
201
|
except ImportError:
|
|
181
|
-
|
|
202
|
+
logger.debug("dbruntime.dbutils not available")
|
|
182
203
|
|
|
204
|
+
logger.debug("Checking for Databricks Connect...")
|
|
183
205
|
try:
|
|
184
|
-
from
|
|
185
|
-
|
|
186
|
-
)
|
|
206
|
+
from databricks.sdk.dbutils import RemoteDbUtils # type: ignore # noqa: F401
|
|
207
|
+
|
|
208
|
+
logger.debug("✓ Detected DATABRICKS_CONNECT via RemoteDbUtils instance")
|
|
209
|
+
cls._env = cls.Environment.DATABRICKS_CONNECT
|
|
210
|
+
return cls.Environment.DATABRICKS_CONNECT
|
|
187
211
|
|
|
188
|
-
cls._env = cls.Environment.OTHER_REMOTE_SPARK
|
|
189
|
-
return cls._env
|
|
190
212
|
except ImportError:
|
|
191
|
-
|
|
213
|
+
logger.debug("RemoteDbUtils not available")
|
|
192
214
|
|
|
215
|
+
logger.debug("Checking for Fabric UI...")
|
|
193
216
|
try:
|
|
194
|
-
from
|
|
217
|
+
from notebookutils import mssparkutils # type: ignore # noqa: F401
|
|
195
218
|
|
|
196
|
-
|
|
219
|
+
logger.debug("✓ Detected FABRIC_UI via notebookutils")
|
|
220
|
+
cls._env = cls.Environment.FABRIC_UI
|
|
197
221
|
return cls._env
|
|
198
222
|
except ImportError:
|
|
199
|
-
|
|
223
|
+
logger.debug("notebookutils not available")
|
|
200
224
|
|
|
201
|
-
|
|
225
|
+
logger.error("No environment could be detected")
|
|
226
|
+
raise RuntimeError(
|
|
227
|
+
"Cannot detect environment. This usually means you're not in a recognized Spark environment. "
|
|
228
|
+
"Ensure you're running in a supported environment (Databricks, Fabric, or with proper Spark "
|
|
229
|
+
"installation configured via NESSY_SPARK_CONFIG)."
|
|
230
|
+
)
|
|
202
231
|
|
|
203
232
|
@classmethod
|
|
204
233
|
def get_spark_builder(cls):
|
|
205
234
|
"""Get the SparkSession builder based on the current environment."""
|
|
206
|
-
cls.
|
|
235
|
+
if cls._env is None:
|
|
236
|
+
cls._detect_env()
|
|
207
237
|
builders = {
|
|
208
238
|
cls.Environment.DATABRICKS_UI: SparkSession.builder,
|
|
209
239
|
cls.Environment.FABRIC_UI: SparkSession.builder,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cloe-nessy
|
|
3
|
-
Version: 0.3.14.
|
|
3
|
+
Version: 0.3.14.6b0
|
|
4
4
|
Summary: Your friendly datalake monster.
|
|
5
5
|
Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
|
|
6
6
|
License: MIT
|
|
@@ -31,7 +31,6 @@ Requires-Dist: matplotlib<4.0.0,>=3.9.2
|
|
|
31
31
|
Requires-Dist: types-networkx<4.0.0.0,>=3.2.1.20240820
|
|
32
32
|
Requires-Dist: fsspec<2025.7.1,>=2025.7.0
|
|
33
33
|
Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.8
|
|
34
|
-
Requires-Dist: delta-spark>=3.3.2
|
|
35
34
|
|
|
36
35
|
# cloe-nessy
|
|
37
36
|
|
|
@@ -69,7 +69,7 @@ cloe_nessy/pipeline/actions/read_excel.py,sha256=Mhl3r_2Hqk2XN7Fl5WqqAyE4JdnwSii
|
|
|
69
69
|
cloe_nessy/pipeline/actions/read_files.py,sha256=N9bFgtG1tovhp2JayxE5YiN9PiO2lgG2-6h_Y6tD2eU,5220
|
|
70
70
|
cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=3ZDy9qiDYtM1oDQzHPC23hLOvHjhdk5zg1wVHE60m9k,2295
|
|
71
71
|
cloe_nessy/pipeline/actions/transform_change_datatype.py,sha256=24Tn6R3TvUkWCh8V6naLdyNbCbqvyPOOoer-hy_Ebq4,2077
|
|
72
|
-
cloe_nessy/pipeline/actions/transform_clean_column_names.py,sha256
|
|
72
|
+
cloe_nessy/pipeline/actions/transform_clean_column_names.py,sha256=VxvWqENW63c50L96JA1V_ioe4By6gGzx_iY86njOXEM,3044
|
|
73
73
|
cloe_nessy/pipeline/actions/transform_concat_columns.py,sha256=Nk8YbhxDnFZsWzW9Dj5Yl76Uq6VrcMlevQPHGms65L8,3777
|
|
74
74
|
cloe_nessy/pipeline/actions/transform_decode.py,sha256=JajMwHREtxa8u_1Q3RZDBVMjncoSel-WzQFVTO0MREg,4455
|
|
75
75
|
cloe_nessy/pipeline/actions/transform_deduplication.py,sha256=E0ypz9qkHMSatNfnHekP-E6svQVL149M4PV02M03drg,5099
|
|
@@ -89,12 +89,12 @@ cloe_nessy/pipeline/actions/write_delta_append.py,sha256=fuL29SK9G5K14ycckU3iPex
|
|
|
89
89
|
cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=Hir7QZZZJ9hmQZXiJ9iz6u06OCmcHFpyKFVB_I1saSM,5043
|
|
90
90
|
cloe_nessy/pipeline/actions/write_file.py,sha256=H8LRst045yij-8XJ5pRB9m5d1lZpZjFa0WSVdSFesPo,2984
|
|
91
91
|
cloe_nessy/session/__init__.py,sha256=t7_YjUhJYW3km_FrucaUdbIl1boQtwkyhw_8yE10qzc,74
|
|
92
|
-
cloe_nessy/session/session_manager.py,sha256=
|
|
92
|
+
cloe_nessy/session/session_manager.py,sha256=sgsN_U6c_IjYF1wSjd8Opj9FV1gjAvaqVy4ljFjk8AQ,9710
|
|
93
93
|
cloe_nessy/settings/__init__.py,sha256=ZbkneO3WaKOxon7qHFHnou7EnBOSnBFyKMDZblIEvzM,101
|
|
94
94
|
cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_UpVM,3653
|
|
95
95
|
cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
96
96
|
cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
|
|
97
|
-
cloe_nessy-0.3.14.
|
|
98
|
-
cloe_nessy-0.3.14.
|
|
99
|
-
cloe_nessy-0.3.14.
|
|
100
|
-
cloe_nessy-0.3.14.
|
|
97
|
+
cloe_nessy-0.3.14.6b0.dist-info/METADATA,sha256=zz5Bu1hIh0bRbgHua_OOAWCDpnw3R0g_zBkubifTmJo,3294
|
|
98
|
+
cloe_nessy-0.3.14.6b0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
99
|
+
cloe_nessy-0.3.14.6b0.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
|
|
100
|
+
cloe_nessy-0.3.14.6b0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|