cloe-nessy 0.3.14.5b0__py3-none-any.whl → 0.3.14.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -57,14 +57,14 @@ class TransformCleanColumnNamesAction(PipelineAction):
57
57
 
58
58
  for c in context.data.schema:
59
59
  old_name = c.name
60
- new_name = re.sub(single_underscrore_at_beginning, "__", re.sub("\W", "_", old_name))
60
+ new_name = re.sub(single_underscrore_at_beginning, "__", re.sub(r"\W", "_", old_name))
61
61
  with_columns_renamed[old_name] = new_name
62
62
 
63
63
  if isinstance(c.dataType, (T.StructType | T.ArrayType | T.MapType)):
64
64
  old_column_schema = c.dataType.json()
65
65
  new_column_schema = re.sub(
66
66
  r'(?<="name":")[^"]+',
67
- lambda m: re.sub("\W", "_", str(m.group())),
67
+ lambda m: re.sub(r"\W", "_", str(m.group())),
68
68
  old_column_schema,
69
69
  )
70
70
  if isinstance(c.dataType, T.StructType):
@@ -5,9 +5,15 @@ from typing import Any
5
5
 
6
6
  from pyspark.sql import SparkSession
7
7
 
8
+ from ..logging import LoggerMixin
8
9
 
9
- class SessionManager:
10
- """SessionManager is a singleton class that manages the SparkSession instance."""
10
+
11
+ class SessionManager(LoggerMixin):
12
+ """SessionManager is a singleton class that manages the SparkSession instance.
13
+
14
+ Logging can be configured via the nessy settings framework. The LoggerMixin provides
15
+ console logging capabilities with debug-level environment detection information.
16
+ """
11
17
 
12
18
  class Environment(Enum):
13
19
  """Enumeration of execution environments for Spark utilities.
@@ -60,11 +66,14 @@ class SessionManager:
60
66
  nessy_spark_config = os.getenv("NESSY_SPARK_CONFIG")
61
67
  if nessy_spark_config:
62
68
  try:
63
- config = json.loads(nessy_spark_config)
64
- if "remote" in config:
65
- builder = builder.remote(config["remote"])
66
- del config["remote"]
67
- # Parse the JSON configuration from the environment variable
69
+ env_config = json.loads(nessy_spark_config)
70
+ if "remote" in env_config:
71
+ builder = builder.remote(env_config["remote"])
72
+ del env_config["remote"]
73
+ if config is None:
74
+ config = env_config
75
+ else:
76
+ config.update(env_config)
68
77
  except json.JSONDecodeError as e:
69
78
  raise ValueError(f"Invalid JSON in NESSY_SPARK_CONFIG: {e}") from e
70
79
 
@@ -152,62 +161,68 @@ class SessionManager:
152
161
  RuntimeError: If the environment cannot be detected due to
153
162
  import errors or other exceptions.
154
163
  """
164
+ # Create a temporary instance to access LoggerMixin methods
165
+ temp_instance = cls()
166
+ logger = temp_instance.get_console_logger()
167
+
155
168
  if cls._env is not None:
156
- print(f"DEBUG: Environment already detected: {cls._env}")
169
+ logger.debug(f"Environment already detected: {cls._env}")
157
170
  return cls._env
158
171
 
159
- print("DEBUG: Starting environment detection...")
172
+ logger.debug("Starting environment detection...")
160
173
 
161
174
  # Debug: Print relevant environment variables
162
175
  databricks_host = os.getenv("DATABRICKS_HOST")
163
176
  nessy_spark_config = os.getenv("NESSY_SPARK_CONFIG")
164
177
 
165
- print(f"DEBUG: DATABRICKS_HOST = {databricks_host}")
166
- print(f"DEBUG: NESSY_SPARK_CONFIG = {nessy_spark_config}")
178
+ logger.debug(f"DATABRICKS_HOST = {databricks_host}")
179
+ logger.debug(f"NESSY_SPARK_CONFIG = {nessy_spark_config}")
167
180
 
168
181
  if nessy_spark_config:
169
182
  try:
170
183
  config = json.loads(nessy_spark_config)
171
184
  if "remote" in config:
172
- print(f"DEBUG: Remote Spark configuration detected: {config['remote']}")
185
+ logger.debug(f"Remote Spark configuration detected: {config['remote']}")
186
+ cls._env = cls.Environment.OTHER_REMOTE_SPARK
173
187
  return cls.Environment.OTHER_REMOTE_SPARK
188
+ cls._env = cls.Environment.STANDALONE_SPARK
174
189
  return cls.Environment.STANDALONE_SPARK
175
190
  except json.JSONDecodeError as e:
176
- print(f"DEBUG: Invalid JSON in NESSY_SPARK_CONFIG: {e}")
191
+ logger.error(f"Invalid JSON in NESSY_SPARK_CONFIG: {e}")
177
192
  raise ValueError(f"Invalid JSON in NESSY_SPARK_CONFIG: {e}") from e
178
193
 
179
- print("DEBUG: Checking for Databricks Connect...")
194
+ logger.debug("Checking for Databricks UI...")
180
195
  try:
181
- from databricks.sdk.dbutils import RemoteDbUtils # type: ignore # noqa: F401
182
-
183
- print("DEBUG: ✓ Detected DATABRICKS_CONNECT via RemoteDbUtils instance")
184
- cls._env = cls.Environment.DATABRICKS_CONNECT
185
- return cls.Environment.DATABRICKS_CONNECT
196
+ from dbruntime.dbutils import DBUtils # type: ignore [import-not-found] # noqa: F401
186
197
 
198
+ logger.debug("✓ Detected DATABRICKS_UI via dbruntime.dbutils")
199
+ cls._env = cls.Environment.DATABRICKS_UI
200
+ return cls._env
187
201
  except ImportError:
188
- print("DEBUG: RemoteDbUtils not available")
202
+ logger.debug("dbruntime.dbutils not available")
189
203
 
190
- print("DEBUG: Checking for Databricks UI...")
204
+ logger.debug("Checking for Databricks Connect...")
191
205
  try:
192
- from dbruntime.dbutils import DBUtils # type: ignore [import-not-found] # noqa: F401
206
+ from databricks.sdk.dbutils import RemoteDbUtils # type: ignore # noqa: F401
207
+
208
+ logger.debug("✓ Detected DATABRICKS_CONNECT via RemoteDbUtils instance")
209
+ cls._env = cls.Environment.DATABRICKS_CONNECT
210
+ return cls.Environment.DATABRICKS_CONNECT
193
211
 
194
- print("DEBUG: ✓ Detected DATABRICKS_UI via dbruntime.dbutils")
195
- cls._env = cls.Environment.DATABRICKS_UI
196
- return cls._env
197
212
  except ImportError:
198
- print("DEBUG: dbruntime.dbutils not available")
213
+ logger.debug("RemoteDbUtils not available")
199
214
 
200
- print("DEBUG: Checking for Fabric UI...")
215
+ logger.debug("Checking for Fabric UI...")
201
216
  try:
202
217
  from notebookutils import mssparkutils # type: ignore # noqa: F401
203
218
 
204
- print("DEBUG: ✓ Detected FABRIC_UI via notebookutils")
219
+ logger.debug("✓ Detected FABRIC_UI via notebookutils")
205
220
  cls._env = cls.Environment.FABRIC_UI
206
221
  return cls._env
207
222
  except ImportError:
208
- print("DEBUG: notebookutils not available")
223
+ logger.debug("notebookutils not available")
209
224
 
210
- print("DEBUG: No environment could be detected")
225
+ logger.error("No environment could be detected")
211
226
  raise RuntimeError(
212
227
  "Cannot detect environment. This usually means you're not in a recognized Spark environment. "
213
228
  "Ensure you're running in a supported environment (Databricks, Fabric, or with proper Spark "
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cloe-nessy
3
- Version: 0.3.14.5b0
3
+ Version: 0.3.14.6
4
4
  Summary: Your friendly datalake monster.
5
5
  Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
6
6
  License: MIT
@@ -69,7 +69,7 @@ cloe_nessy/pipeline/actions/read_excel.py,sha256=Mhl3r_2Hqk2XN7Fl5WqqAyE4JdnwSii
69
69
  cloe_nessy/pipeline/actions/read_files.py,sha256=N9bFgtG1tovhp2JayxE5YiN9PiO2lgG2-6h_Y6tD2eU,5220
70
70
  cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=3ZDy9qiDYtM1oDQzHPC23hLOvHjhdk5zg1wVHE60m9k,2295
71
71
  cloe_nessy/pipeline/actions/transform_change_datatype.py,sha256=24Tn6R3TvUkWCh8V6naLdyNbCbqvyPOOoer-hy_Ebq4,2077
72
- cloe_nessy/pipeline/actions/transform_clean_column_names.py,sha256=-CEdcXb7Fz5DQNitGlJ8EVBE_LzxfsInyCIO-D7b4iY,3042
72
+ cloe_nessy/pipeline/actions/transform_clean_column_names.py,sha256=VxvWqENW63c50L96JA1V_ioe4By6gGzx_iY86njOXEM,3044
73
73
  cloe_nessy/pipeline/actions/transform_concat_columns.py,sha256=Nk8YbhxDnFZsWzW9Dj5Yl76Uq6VrcMlevQPHGms65L8,3777
74
74
  cloe_nessy/pipeline/actions/transform_decode.py,sha256=JajMwHREtxa8u_1Q3RZDBVMjncoSel-WzQFVTO0MREg,4455
75
75
  cloe_nessy/pipeline/actions/transform_deduplication.py,sha256=E0ypz9qkHMSatNfnHekP-E6svQVL149M4PV02M03drg,5099
@@ -89,12 +89,12 @@ cloe_nessy/pipeline/actions/write_delta_append.py,sha256=fuL29SK9G5K14ycckU3iPex
89
89
  cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=Hir7QZZZJ9hmQZXiJ9iz6u06OCmcHFpyKFVB_I1saSM,5043
90
90
  cloe_nessy/pipeline/actions/write_file.py,sha256=H8LRst045yij-8XJ5pRB9m5d1lZpZjFa0WSVdSFesPo,2984
91
91
  cloe_nessy/session/__init__.py,sha256=t7_YjUhJYW3km_FrucaUdbIl1boQtwkyhw_8yE10qzc,74
92
- cloe_nessy/session/session_manager.py,sha256=eyiSiQDpLg6GUwLUiZ8bZbrwh1jN_vYeLiuWFEl7RJo,9123
92
+ cloe_nessy/session/session_manager.py,sha256=sgsN_U6c_IjYF1wSjd8Opj9FV1gjAvaqVy4ljFjk8AQ,9710
93
93
  cloe_nessy/settings/__init__.py,sha256=ZbkneO3WaKOxon7qHFHnou7EnBOSnBFyKMDZblIEvzM,101
94
94
  cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_UpVM,3653
95
95
  cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
96
96
  cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
97
- cloe_nessy-0.3.14.5b0.dist-info/METADATA,sha256=nEbjVCZ8v13CJ9gE-KM_U-Uj1hdw5eSfdMPzU9jW3Ls,3294
98
- cloe_nessy-0.3.14.5b0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
99
- cloe_nessy-0.3.14.5b0.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
100
- cloe_nessy-0.3.14.5b0.dist-info/RECORD,,
97
+ cloe_nessy-0.3.14.6.dist-info/METADATA,sha256=7Oe0EDtFCqMLA_y9D0oMqJ-93yy1azqYtKbr-rec9So,3292
98
+ cloe_nessy-0.3.14.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
99
+ cloe_nessy-0.3.14.6.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
100
+ cloe_nessy-0.3.14.6.dist-info/RECORD,,