cloe-nessy 0.3.14.4b0__py3-none-any.whl → 0.3.14.6b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -57,14 +57,14 @@ class TransformCleanColumnNamesAction(PipelineAction):
57
57
 
58
58
  for c in context.data.schema:
59
59
  old_name = c.name
60
- new_name = re.sub(single_underscrore_at_beginning, "__", re.sub("\W", "_", old_name))
60
+ new_name = re.sub(single_underscrore_at_beginning, "__", re.sub(r"\W", "_", old_name))
61
61
  with_columns_renamed[old_name] = new_name
62
62
 
63
63
  if isinstance(c.dataType, (T.StructType | T.ArrayType | T.MapType)):
64
64
  old_column_schema = c.dataType.json()
65
65
  new_column_schema = re.sub(
66
66
  r'(?<="name":")[^"]+',
67
- lambda m: re.sub("\W", "_", str(m.group())),
67
+ lambda m: re.sub(r"\W", "_", str(m.group())),
68
68
  old_column_schema,
69
69
  )
70
70
  if isinstance(c.dataType, T.StructType):
@@ -5,9 +5,15 @@ from typing import Any
5
5
 
6
6
  from pyspark.sql import SparkSession
7
7
 
8
+ from ..logging import LoggerMixin
8
9
 
9
- class SessionManager:
10
- """SessionManager is a singleton class that manages the SparkSession instance."""
10
+
11
+ class SessionManager(LoggerMixin):
12
+ """SessionManager is a singleton class that manages the SparkSession instance.
13
+
14
+ Logging can be configured via the nessy settings framework. The LoggerMixin provides
15
+ console logging capabilities with debug-level environment detection information.
16
+ """
11
17
 
12
18
  class Environment(Enum):
13
19
  """Enumeration of execution environments for Spark utilities.
@@ -60,11 +66,14 @@ class SessionManager:
60
66
  nessy_spark_config = os.getenv("NESSY_SPARK_CONFIG")
61
67
  if nessy_spark_config:
62
68
  try:
63
- config = json.loads(nessy_spark_config)
64
- if "remote" in config:
65
- builder = builder.remote(config["remote"])
66
- del config["remote"]
67
- # Parse the JSON configuration from the environment variable
69
+ env_config = json.loads(nessy_spark_config)
70
+ if "remote" in env_config:
71
+ builder = builder.remote(env_config["remote"])
72
+ del env_config["remote"]
73
+ if config is None:
74
+ config = env_config
75
+ else:
76
+ config.update(env_config)
68
77
  except json.JSONDecodeError as e:
69
78
  raise ValueError(f"Invalid JSON in NESSY_SPARK_CONFIG: {e}") from e
70
79
 
@@ -152,58 +161,79 @@ class SessionManager:
152
161
  RuntimeError: If the environment cannot be detected due to
153
162
  import errors or other exceptions.
154
163
  """
164
+ # Create a temporary instance to access LoggerMixin methods
165
+ temp_instance = cls()
166
+ logger = temp_instance.get_console_logger()
167
+
155
168
  if cls._env is not None:
169
+ logger.debug(f"Environment already detected: {cls._env}")
156
170
  return cls._env
157
171
 
158
- try:
159
- from databricks.sdk.dbutils import RemoteDbUtils # type: ignore
172
+ logger.debug("Starting environment detection...")
160
173
 
161
- if isinstance(dbutils, RemoteDbUtils): # type: ignore [name-defined]
162
- cls._env = cls.Environment.DATABRICKS_CONNECT
163
- return cls._env
164
- except (ImportError, NameError):
165
- pass
174
+ # Debug: Print relevant environment variables
175
+ databricks_host = os.getenv("DATABRICKS_HOST")
176
+ nessy_spark_config = os.getenv("NESSY_SPARK_CONFIG")
166
177
 
167
- try:
168
- from notebookutils import mssparkutils # type: ignore # noqa: F401
178
+ logger.debug(f"DATABRICKS_HOST = {databricks_host}")
179
+ logger.debug(f"NESSY_SPARK_CONFIG = {nessy_spark_config}")
169
180
 
170
- cls._env = cls.Environment.FABRIC_UI
171
- return cls._env
172
- except ImportError:
173
- pass
181
+ if nessy_spark_config:
182
+ try:
183
+ config = json.loads(nessy_spark_config)
184
+ if "remote" in config:
185
+ logger.debug(f"Remote Spark configuration detected: {config['remote']}")
186
+ cls._env = cls.Environment.OTHER_REMOTE_SPARK
187
+ return cls.Environment.OTHER_REMOTE_SPARK
188
+ cls._env = cls.Environment.STANDALONE_SPARK
189
+ return cls.Environment.STANDALONE_SPARK
190
+ except json.JSONDecodeError as e:
191
+ logger.error(f"Invalid JSON in NESSY_SPARK_CONFIG: {e}")
192
+ raise ValueError(f"Invalid JSON in NESSY_SPARK_CONFIG: {e}") from e
174
193
 
194
+ logger.debug("Checking for Databricks UI...")
175
195
  try:
176
196
  from dbruntime.dbutils import DBUtils # type: ignore [import-not-found] # noqa: F401
177
197
 
198
+ logger.debug("✓ Detected DATABRICKS_UI via dbruntime.dbutils")
178
199
  cls._env = cls.Environment.DATABRICKS_UI
179
200
  return cls._env
180
201
  except ImportError:
181
- pass
202
+ logger.debug("dbruntime.dbutils not available")
182
203
 
204
+ logger.debug("Checking for Databricks Connect...")
183
205
  try:
184
- from pyspark.sql.connect.session import (
185
- SparkSession as RemoteSparkSession, # type: ignore [import-not-found] # noqa: F401
186
- )
206
+ from databricks.sdk.dbutils import RemoteDbUtils # type: ignore # noqa: F401
207
+
208
+ logger.debug("✓ Detected DATABRICKS_CONNECT via RemoteDbUtils instance")
209
+ cls._env = cls.Environment.DATABRICKS_CONNECT
210
+ return cls.Environment.DATABRICKS_CONNECT
187
211
 
188
- cls._env = cls.Environment.OTHER_REMOTE_SPARK
189
- return cls._env
190
212
  except ImportError:
191
- pass
213
+ logger.debug("RemoteDbUtils not available")
192
214
 
215
+ logger.debug("Checking for Fabric UI...")
193
216
  try:
194
- from pyspark.sql import SparkSession # noqa: F401
217
+ from notebookutils import mssparkutils # type: ignore # noqa: F401
195
218
 
196
- cls._env = cls.Environment.STANDALONE_SPARK
219
+ logger.debug("✓ Detected FABRIC_UI via notebookutils")
220
+ cls._env = cls.Environment.FABRIC_UI
197
221
  return cls._env
198
222
  except ImportError:
199
- pass
223
+ logger.debug("notebookutils not available")
200
224
 
201
- raise RuntimeError("Cannot detect environment.")
225
+ logger.error("No environment could be detected")
226
+ raise RuntimeError(
227
+ "Cannot detect environment. This usually means you're not in a recognized Spark environment. "
228
+ "Ensure you're running in a supported environment (Databricks, Fabric, or with proper Spark "
229
+ "installation configured via NESSY_SPARK_CONFIG)."
230
+ )
202
231
 
203
232
  @classmethod
204
233
  def get_spark_builder(cls):
205
234
  """Get the SparkSession builder based on the current environment."""
206
- cls._detect_env()
235
+ if cls._env is None:
236
+ cls._detect_env()
207
237
  builders = {
208
238
  cls.Environment.DATABRICKS_UI: SparkSession.builder,
209
239
  cls.Environment.FABRIC_UI: SparkSession.builder,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cloe-nessy
3
- Version: 0.3.14.4b0
3
+ Version: 0.3.14.6b0
4
4
  Summary: Your friendly datalake monster.
5
5
  Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
6
6
  License: MIT
@@ -31,7 +31,6 @@ Requires-Dist: matplotlib<4.0.0,>=3.9.2
31
31
  Requires-Dist: types-networkx<4.0.0.0,>=3.2.1.20240820
32
32
  Requires-Dist: fsspec<2025.7.1,>=2025.7.0
33
33
  Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.8
34
- Requires-Dist: delta-spark>=3.3.2
35
34
 
36
35
  # cloe-nessy
37
36
 
@@ -69,7 +69,7 @@ cloe_nessy/pipeline/actions/read_excel.py,sha256=Mhl3r_2Hqk2XN7Fl5WqqAyE4JdnwSii
69
69
  cloe_nessy/pipeline/actions/read_files.py,sha256=N9bFgtG1tovhp2JayxE5YiN9PiO2lgG2-6h_Y6tD2eU,5220
70
70
  cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=3ZDy9qiDYtM1oDQzHPC23hLOvHjhdk5zg1wVHE60m9k,2295
71
71
  cloe_nessy/pipeline/actions/transform_change_datatype.py,sha256=24Tn6R3TvUkWCh8V6naLdyNbCbqvyPOOoer-hy_Ebq4,2077
72
- cloe_nessy/pipeline/actions/transform_clean_column_names.py,sha256=-CEdcXb7Fz5DQNitGlJ8EVBE_LzxfsInyCIO-D7b4iY,3042
72
+ cloe_nessy/pipeline/actions/transform_clean_column_names.py,sha256=VxvWqENW63c50L96JA1V_ioe4By6gGzx_iY86njOXEM,3044
73
73
  cloe_nessy/pipeline/actions/transform_concat_columns.py,sha256=Nk8YbhxDnFZsWzW9Dj5Yl76Uq6VrcMlevQPHGms65L8,3777
74
74
  cloe_nessy/pipeline/actions/transform_decode.py,sha256=JajMwHREtxa8u_1Q3RZDBVMjncoSel-WzQFVTO0MREg,4455
75
75
  cloe_nessy/pipeline/actions/transform_deduplication.py,sha256=E0ypz9qkHMSatNfnHekP-E6svQVL149M4PV02M03drg,5099
@@ -89,12 +89,12 @@ cloe_nessy/pipeline/actions/write_delta_append.py,sha256=fuL29SK9G5K14ycckU3iPex
89
89
  cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=Hir7QZZZJ9hmQZXiJ9iz6u06OCmcHFpyKFVB_I1saSM,5043
90
90
  cloe_nessy/pipeline/actions/write_file.py,sha256=H8LRst045yij-8XJ5pRB9m5d1lZpZjFa0WSVdSFesPo,2984
91
91
  cloe_nessy/session/__init__.py,sha256=t7_YjUhJYW3km_FrucaUdbIl1boQtwkyhw_8yE10qzc,74
92
- cloe_nessy/session/session_manager.py,sha256=6XGssLpLO-DbWnpvyF_6cPD5Of9VcyB_sBNnV17clW8,7856
92
+ cloe_nessy/session/session_manager.py,sha256=sgsN_U6c_IjYF1wSjd8Opj9FV1gjAvaqVy4ljFjk8AQ,9710
93
93
  cloe_nessy/settings/__init__.py,sha256=ZbkneO3WaKOxon7qHFHnou7EnBOSnBFyKMDZblIEvzM,101
94
94
  cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_UpVM,3653
95
95
  cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
96
96
  cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
97
- cloe_nessy-0.3.14.4b0.dist-info/METADATA,sha256=s1pZpHkrRMTLezVfS3VlUABGi8w4KM1SEEZ2RJpZqMY,3328
98
- cloe_nessy-0.3.14.4b0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
99
- cloe_nessy-0.3.14.4b0.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
100
- cloe_nessy-0.3.14.4b0.dist-info/RECORD,,
97
+ cloe_nessy-0.3.14.6b0.dist-info/METADATA,sha256=zz5Bu1hIh0bRbgHua_OOAWCDpnw3R0g_zBkubifTmJo,3294
98
+ cloe_nessy-0.3.14.6b0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
99
+ cloe_nessy-0.3.14.6b0.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
100
+ cloe_nessy-0.3.14.6b0.dist-info/RECORD,,