cloe-nessy 0.3.14.5b0__py3-none-any.whl → 0.3.14.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cloe_nessy/pipeline/actions/transform_clean_column_names.py +2 -2
- cloe_nessy/session/session_manager.py +45 -30
- {cloe_nessy-0.3.14.5b0.dist-info → cloe_nessy-0.3.14.6.dist-info}/METADATA +1 -1
- {cloe_nessy-0.3.14.5b0.dist-info → cloe_nessy-0.3.14.6.dist-info}/RECORD +6 -6
- {cloe_nessy-0.3.14.5b0.dist-info → cloe_nessy-0.3.14.6.dist-info}/WHEEL +0 -0
- {cloe_nessy-0.3.14.5b0.dist-info → cloe_nessy-0.3.14.6.dist-info}/top_level.txt +0 -0
|
@@ -57,14 +57,14 @@ class TransformCleanColumnNamesAction(PipelineAction):
|
|
|
57
57
|
|
|
58
58
|
for c in context.data.schema:
|
|
59
59
|
old_name = c.name
|
|
60
|
-
new_name = re.sub(single_underscrore_at_beginning, "__", re.sub("\W", "_", old_name))
|
|
60
|
+
new_name = re.sub(single_underscrore_at_beginning, "__", re.sub(r"\W", "_", old_name))
|
|
61
61
|
with_columns_renamed[old_name] = new_name
|
|
62
62
|
|
|
63
63
|
if isinstance(c.dataType, (T.StructType | T.ArrayType | T.MapType)):
|
|
64
64
|
old_column_schema = c.dataType.json()
|
|
65
65
|
new_column_schema = re.sub(
|
|
66
66
|
r'(?<="name":")[^"]+',
|
|
67
|
-
lambda m: re.sub("\W", "_", str(m.group())),
|
|
67
|
+
lambda m: re.sub(r"\W", "_", str(m.group())),
|
|
68
68
|
old_column_schema,
|
|
69
69
|
)
|
|
70
70
|
if isinstance(c.dataType, T.StructType):
|
|
@@ -5,9 +5,15 @@ from typing import Any
|
|
|
5
5
|
|
|
6
6
|
from pyspark.sql import SparkSession
|
|
7
7
|
|
|
8
|
+
from ..logging import LoggerMixin
|
|
8
9
|
|
|
9
|
-
|
|
10
|
-
|
|
10
|
+
|
|
11
|
+
class SessionManager(LoggerMixin):
|
|
12
|
+
"""SessionManager is a singleton class that manages the SparkSession instance.
|
|
13
|
+
|
|
14
|
+
Logging can be configured via the nessy settings framework. The LoggerMixin provides
|
|
15
|
+
console logging capabilities with debug-level environment detection information.
|
|
16
|
+
"""
|
|
11
17
|
|
|
12
18
|
class Environment(Enum):
|
|
13
19
|
"""Enumeration of execution environments for Spark utilities.
|
|
@@ -60,11 +66,14 @@ class SessionManager:
|
|
|
60
66
|
nessy_spark_config = os.getenv("NESSY_SPARK_CONFIG")
|
|
61
67
|
if nessy_spark_config:
|
|
62
68
|
try:
|
|
63
|
-
|
|
64
|
-
if "remote" in
|
|
65
|
-
builder = builder.remote(
|
|
66
|
-
del
|
|
67
|
-
|
|
69
|
+
env_config = json.loads(nessy_spark_config)
|
|
70
|
+
if "remote" in env_config:
|
|
71
|
+
builder = builder.remote(env_config["remote"])
|
|
72
|
+
del env_config["remote"]
|
|
73
|
+
if config is None:
|
|
74
|
+
config = env_config
|
|
75
|
+
else:
|
|
76
|
+
config.update(env_config)
|
|
68
77
|
except json.JSONDecodeError as e:
|
|
69
78
|
raise ValueError(f"Invalid JSON in NESSY_SPARK_CONFIG: {e}") from e
|
|
70
79
|
|
|
@@ -152,62 +161,68 @@ class SessionManager:
|
|
|
152
161
|
RuntimeError: If the environment cannot be detected due to
|
|
153
162
|
import errors or other exceptions.
|
|
154
163
|
"""
|
|
164
|
+
# Create a temporary instance to access LoggerMixin methods
|
|
165
|
+
temp_instance = cls()
|
|
166
|
+
logger = temp_instance.get_console_logger()
|
|
167
|
+
|
|
155
168
|
if cls._env is not None:
|
|
156
|
-
|
|
169
|
+
logger.debug(f"Environment already detected: {cls._env}")
|
|
157
170
|
return cls._env
|
|
158
171
|
|
|
159
|
-
|
|
172
|
+
logger.debug("Starting environment detection...")
|
|
160
173
|
|
|
161
174
|
# Debug: Print relevant environment variables
|
|
162
175
|
databricks_host = os.getenv("DATABRICKS_HOST")
|
|
163
176
|
nessy_spark_config = os.getenv("NESSY_SPARK_CONFIG")
|
|
164
177
|
|
|
165
|
-
|
|
166
|
-
|
|
178
|
+
logger.debug(f"DATABRICKS_HOST = {databricks_host}")
|
|
179
|
+
logger.debug(f"NESSY_SPARK_CONFIG = {nessy_spark_config}")
|
|
167
180
|
|
|
168
181
|
if nessy_spark_config:
|
|
169
182
|
try:
|
|
170
183
|
config = json.loads(nessy_spark_config)
|
|
171
184
|
if "remote" in config:
|
|
172
|
-
|
|
185
|
+
logger.debug(f"Remote Spark configuration detected: {config['remote']}")
|
|
186
|
+
cls._env = cls.Environment.OTHER_REMOTE_SPARK
|
|
173
187
|
return cls.Environment.OTHER_REMOTE_SPARK
|
|
188
|
+
cls._env = cls.Environment.STANDALONE_SPARK
|
|
174
189
|
return cls.Environment.STANDALONE_SPARK
|
|
175
190
|
except json.JSONDecodeError as e:
|
|
176
|
-
|
|
191
|
+
logger.error(f"Invalid JSON in NESSY_SPARK_CONFIG: {e}")
|
|
177
192
|
raise ValueError(f"Invalid JSON in NESSY_SPARK_CONFIG: {e}") from e
|
|
178
193
|
|
|
179
|
-
|
|
194
|
+
logger.debug("Checking for Databricks UI...")
|
|
180
195
|
try:
|
|
181
|
-
from
|
|
182
|
-
|
|
183
|
-
print("DEBUG: ✓ Detected DATABRICKS_CONNECT via RemoteDbUtils instance")
|
|
184
|
-
cls._env = cls.Environment.DATABRICKS_CONNECT
|
|
185
|
-
return cls.Environment.DATABRICKS_CONNECT
|
|
196
|
+
from dbruntime.dbutils import DBUtils # type: ignore [import-not-found] # noqa: F401
|
|
186
197
|
|
|
198
|
+
logger.debug("✓ Detected DATABRICKS_UI via dbruntime.dbutils")
|
|
199
|
+
cls._env = cls.Environment.DATABRICKS_UI
|
|
200
|
+
return cls._env
|
|
187
201
|
except ImportError:
|
|
188
|
-
|
|
202
|
+
logger.debug("dbruntime.dbutils not available")
|
|
189
203
|
|
|
190
|
-
|
|
204
|
+
logger.debug("Checking for Databricks Connect...")
|
|
191
205
|
try:
|
|
192
|
-
from
|
|
206
|
+
from databricks.sdk.dbutils import RemoteDbUtils # type: ignore # noqa: F401
|
|
207
|
+
|
|
208
|
+
logger.debug("✓ Detected DATABRICKS_CONNECT via RemoteDbUtils instance")
|
|
209
|
+
cls._env = cls.Environment.DATABRICKS_CONNECT
|
|
210
|
+
return cls.Environment.DATABRICKS_CONNECT
|
|
193
211
|
|
|
194
|
-
print("DEBUG: ✓ Detected DATABRICKS_UI via dbruntime.dbutils")
|
|
195
|
-
cls._env = cls.Environment.DATABRICKS_UI
|
|
196
|
-
return cls._env
|
|
197
212
|
except ImportError:
|
|
198
|
-
|
|
213
|
+
logger.debug("RemoteDbUtils not available")
|
|
199
214
|
|
|
200
|
-
|
|
215
|
+
logger.debug("Checking for Fabric UI...")
|
|
201
216
|
try:
|
|
202
217
|
from notebookutils import mssparkutils # type: ignore # noqa: F401
|
|
203
218
|
|
|
204
|
-
|
|
219
|
+
logger.debug("✓ Detected FABRIC_UI via notebookutils")
|
|
205
220
|
cls._env = cls.Environment.FABRIC_UI
|
|
206
221
|
return cls._env
|
|
207
222
|
except ImportError:
|
|
208
|
-
|
|
223
|
+
logger.debug("notebookutils not available")
|
|
209
224
|
|
|
210
|
-
|
|
225
|
+
logger.error("No environment could be detected")
|
|
211
226
|
raise RuntimeError(
|
|
212
227
|
"Cannot detect environment. This usually means you're not in a recognized Spark environment. "
|
|
213
228
|
"Ensure you're running in a supported environment (Databricks, Fabric, or with proper Spark "
|
|
@@ -69,7 +69,7 @@ cloe_nessy/pipeline/actions/read_excel.py,sha256=Mhl3r_2Hqk2XN7Fl5WqqAyE4JdnwSii
|
|
|
69
69
|
cloe_nessy/pipeline/actions/read_files.py,sha256=N9bFgtG1tovhp2JayxE5YiN9PiO2lgG2-6h_Y6tD2eU,5220
|
|
70
70
|
cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=3ZDy9qiDYtM1oDQzHPC23hLOvHjhdk5zg1wVHE60m9k,2295
|
|
71
71
|
cloe_nessy/pipeline/actions/transform_change_datatype.py,sha256=24Tn6R3TvUkWCh8V6naLdyNbCbqvyPOOoer-hy_Ebq4,2077
|
|
72
|
-
cloe_nessy/pipeline/actions/transform_clean_column_names.py,sha256
|
|
72
|
+
cloe_nessy/pipeline/actions/transform_clean_column_names.py,sha256=VxvWqENW63c50L96JA1V_ioe4By6gGzx_iY86njOXEM,3044
|
|
73
73
|
cloe_nessy/pipeline/actions/transform_concat_columns.py,sha256=Nk8YbhxDnFZsWzW9Dj5Yl76Uq6VrcMlevQPHGms65L8,3777
|
|
74
74
|
cloe_nessy/pipeline/actions/transform_decode.py,sha256=JajMwHREtxa8u_1Q3RZDBVMjncoSel-WzQFVTO0MREg,4455
|
|
75
75
|
cloe_nessy/pipeline/actions/transform_deduplication.py,sha256=E0ypz9qkHMSatNfnHekP-E6svQVL149M4PV02M03drg,5099
|
|
@@ -89,12 +89,12 @@ cloe_nessy/pipeline/actions/write_delta_append.py,sha256=fuL29SK9G5K14ycckU3iPex
|
|
|
89
89
|
cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=Hir7QZZZJ9hmQZXiJ9iz6u06OCmcHFpyKFVB_I1saSM,5043
|
|
90
90
|
cloe_nessy/pipeline/actions/write_file.py,sha256=H8LRst045yij-8XJ5pRB9m5d1lZpZjFa0WSVdSFesPo,2984
|
|
91
91
|
cloe_nessy/session/__init__.py,sha256=t7_YjUhJYW3km_FrucaUdbIl1boQtwkyhw_8yE10qzc,74
|
|
92
|
-
cloe_nessy/session/session_manager.py,sha256=
|
|
92
|
+
cloe_nessy/session/session_manager.py,sha256=sgsN_U6c_IjYF1wSjd8Opj9FV1gjAvaqVy4ljFjk8AQ,9710
|
|
93
93
|
cloe_nessy/settings/__init__.py,sha256=ZbkneO3WaKOxon7qHFHnou7EnBOSnBFyKMDZblIEvzM,101
|
|
94
94
|
cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_UpVM,3653
|
|
95
95
|
cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
96
96
|
cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
|
|
97
|
-
cloe_nessy-0.3.14.
|
|
98
|
-
cloe_nessy-0.3.14.
|
|
99
|
-
cloe_nessy-0.3.14.
|
|
100
|
-
cloe_nessy-0.3.14.
|
|
97
|
+
cloe_nessy-0.3.14.6.dist-info/METADATA,sha256=7Oe0EDtFCqMLA_y9D0oMqJ-93yy1azqYtKbr-rec9So,3292
|
|
98
|
+
cloe_nessy-0.3.14.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
99
|
+
cloe_nessy-0.3.14.6.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
|
|
100
|
+
cloe_nessy-0.3.14.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|