satisfactoscript 0.5.5__tar.gz → 0.5.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {satisfactoscript-0.5.5 → satisfactoscript-0.5.7}/PKG-INFO +1 -1
  2. {satisfactoscript-0.5.5 → satisfactoscript-0.5.7}/pyproject.toml +1 -1
  3. {satisfactoscript-0.5.5 → satisfactoscript-0.5.7}/src/satisfactoscript/core/core.py +79 -23
  4. {satisfactoscript-0.5.5 → satisfactoscript-0.5.7}/src/satisfactoscript.egg-info/PKG-INFO +1 -1
  5. {satisfactoscript-0.5.5 → satisfactoscript-0.5.7}/src/satisfactoscript.egg-info/SOURCES.txt +2 -0
  6. satisfactoscript-0.5.7/tests/test_core_env_detection.py +159 -0
  7. satisfactoscript-0.5.7/tests/test_core_username.py +101 -0
  8. {satisfactoscript-0.5.5 → satisfactoscript-0.5.7}/README.md +0 -0
  9. {satisfactoscript-0.5.5 → satisfactoscript-0.5.7}/setup.cfg +0 -0
  10. {satisfactoscript-0.5.5 → satisfactoscript-0.5.7}/src/satisfactoscript/__init__.py +0 -0
  11. {satisfactoscript-0.5.5 → satisfactoscript-0.5.7}/src/satisfactoscript/agentic/__init__.py +0 -0
  12. {satisfactoscript-0.5.5 → satisfactoscript-0.5.7}/src/satisfactoscript/agentic/agent.py +0 -0
  13. {satisfactoscript-0.5.5 → satisfactoscript-0.5.7}/src/satisfactoscript/core/__init__.py +0 -0
  14. {satisfactoscript-0.5.5 → satisfactoscript-0.5.7}/src/satisfactoscript/core/config.py +0 -0
  15. {satisfactoscript-0.5.5 → satisfactoscript-0.5.7}/src/satisfactoscript/core/loaders.py +0 -0
  16. {satisfactoscript-0.5.5 → satisfactoscript-0.5.7}/src/satisfactoscript/core/registry.py +0 -0
  17. {satisfactoscript-0.5.5 → satisfactoscript-0.5.7}/src/satisfactoscript/semantic/__init__.py +0 -0
  18. {satisfactoscript-0.5.5 → satisfactoscript-0.5.7}/src/satisfactoscript/semantic/semantic.py +0 -0
  19. {satisfactoscript-0.5.5 → satisfactoscript-0.5.7}/src/satisfactoscript.egg-info/dependency_links.txt +0 -0
  20. {satisfactoscript-0.5.5 → satisfactoscript-0.5.7}/src/satisfactoscript.egg-info/requires.txt +0 -0
  21. {satisfactoscript-0.5.5 → satisfactoscript-0.5.7}/src/satisfactoscript.egg-info/top_level.txt +0 -0
  22. {satisfactoscript-0.5.5 → satisfactoscript-0.5.7}/tests/test_config.py +0 -0
  23. {satisfactoscript-0.5.5 → satisfactoscript-0.5.7}/tests/test_core.py +0 -0
  24. {satisfactoscript-0.5.5 → satisfactoscript-0.5.7}/tests/test_dummy.py +0 -0
  25. {satisfactoscript-0.5.5 → satisfactoscript-0.5.7}/tests/test_loaders.py +0 -0
  26. {satisfactoscript-0.5.5 → satisfactoscript-0.5.7}/tests/test_registry.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: satisfactoscript
3
- Version: 0.5.5
3
+ Version: 0.5.7
4
4
  Summary: An Enterprise-Ready, Declarative Data Engineering Framework for Databricks Lakehouse.
5
5
  Author: julhouba
6
6
  Classifier: Programming Language :: Python :: 3
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "satisfactoscript"
7
- version = "0.5.5"
7
+ version = "0.5.7"
8
8
  description = "An Enterprise-Ready, Declarative Data Engineering Framework for Databricks Lakehouse."
9
9
  readme = "README.md"
10
10
  authors = [
@@ -188,7 +188,7 @@ class SatisfactoEngine:
188
188
  # Use the modern approach for Databricks Connect v2
189
189
  from databricks.connect import DatabricksSession
190
190
  from databricks.sdk.core import Config
191
-
191
+
192
192
  # We explicitly inject the values from the environment variables
193
193
  # into the Config object so Databricks Connect doesn't rely solely
194
194
  # on the implicit ~\.databrickscfg profile.
@@ -197,7 +197,7 @@ class SatisfactoEngine:
197
197
  token=os.getenv("DATABRICKS_TOKEN"),
198
198
  cluster_id=os.getenv("DATABRICKS_CLUSTER_ID")
199
199
  )
200
-
200
+
201
201
  self.spark = DatabricksSession.builder.sdkConfig(conf).getOrCreate()
202
202
  print(" [Init] 🚀 Remote DatabricksSession (Databricks Connect) initialized.")
203
203
  except Exception as e:
@@ -294,11 +294,43 @@ class SatisfactoEngine:
294
294
  # Auto-detect environment based on priority list
295
295
  self._auto_detect_environment()
296
296
 
297
+ def _check_catalog_access(self, catalog):
298
+ """
299
+ Verifies access to a catalog using two strategies in order:
300
+ 1. Spark SQL SHOW SCHEMAS (native on Databricks platform/cluster).
301
+ 2. Databricks SDK REST API (works locally via Databricks Connect when
302
+ gRPC DDL calls fail with 'Missing UserContext').
303
+
304
+ Returns:
305
+ bool: True if the catalog is accessible, False otherwise.
306
+ """
307
+ # Strategy 1: Spark SQL (works natively on Databricks)
308
+ try:
309
+ self.spark.sql(f"SHOW SCHEMAS IN `{catalog}`").limit(1).collect()
310
+ return True
311
+ except Exception:
312
+ pass
313
+
314
+ # Strategy 2: Databricks SDK REST API (works locally when gRPC DDL fails)
315
+ try:
316
+ from databricks.sdk import WorkspaceClient
317
+ host = os.getenv("DATABRICKS_HOST")
318
+ token = os.getenv("DATABRICKS_TOKEN")
319
+ if host and token:
320
+ w = WorkspaceClient(host=host, token=token)
321
+ w.catalogs.get(catalog)
322
+ return True
323
+ except Exception:
324
+ pass
325
+
326
+ return False
327
+
297
328
  def _auto_detect_environment(self):
298
329
  """
299
330
  Iterates through 'priority_check' list from YAML.
300
- Tries to verify if the catalog exists/is accessible.
331
+ Tries to verify if the catalog exists/is accessible via _check_catalog_access.
301
332
  Sets self.env and self.db to the first matching environment.
333
+ Falls back to 'default_env' from config if all checks fail (useful locally).
302
334
  """
303
335
  print(" [Config] Auto-detecting active environment...")
304
336
 
@@ -318,23 +350,28 @@ class SatisfactoEngine:
318
350
  catalog = env_config.get("catalog")
319
351
  print(f" -> Checking access to catalog: '{catalog}' ({env_name})...")
320
352
 
321
- try:
322
- # Test simple d'accès au catalogue
323
- # Note: spark.sql("USE catalog") change le contexte global,
324
- # on préfère juste vérifier l'existence via une requête légère ou listCatalogs si dispo.
325
- # Ici on tente un simple show databases dans ce catalogue.
326
- self.spark.sql(f"SHOW SCHEMAS IN `{catalog}`").limit(1).collect()
327
-
353
+ if self._check_catalog_access(catalog):
328
354
  print(f" ✅ Success: Connected to '{catalog}'.")
329
355
  detected_env = env_name
330
356
  self.env = env_name.upper()
331
357
  self.db = catalog
332
- break # On s'arrête au premier qui marche
333
- except Exception as e:
334
- print(f" ❌ Failed: Cannot access '{catalog}'. Error: {e}. Moving to next priority.")
358
+ break
359
+ else:
360
+ print(f" ❌ Failed: Cannot access '{catalog}'. Moving to next priority.")
335
361
 
336
362
  if not detected_env:
337
- raise ValueError("CRITICAL Could not connect to ANY defined environment catalog.")
363
+ # Fallback: use 'default_env' from config (avoids hard failure in local dev)
364
+ default_env = self.config.get("default_env")
365
+ if default_env and default_env in environments:
366
+ catalog = environments[default_env].get("catalog")
367
+ print(
368
+ f" ⚠️ [Fallback] All catalog checks failed. "
369
+ f"Using 'default_env': '{default_env}' -> catalog '{catalog}'."
370
+ )
371
+ self.env = default_env.upper()
372
+ self.db = catalog
373
+ else:
374
+ raise ValueError("CRITICAL ❌ Could not connect to ANY defined environment catalog.")
338
375
 
339
376
  def _is_running_as_job(self):
340
377
  """
@@ -372,25 +409,44 @@ class SatisfactoEngine:
372
409
  def _get_clean_username(self):
373
410
  """
374
411
  Récupère le user courant (ex: 'julien_hou').
375
-
412
+ Three strategies in order:
413
+ 1. Spark SQL current_user() — works natively on Databricks.
414
+ 2. Databricks SDK REST API — works locally when gRPC DDL fails.
415
+ 3. DBUtils notebook context tags — fallback for classic clusters.
416
+
376
417
  Returns:
377
418
  str: The cleaned username of the current execution context.
378
419
  """
420
+ def _clean(email):
421
+ return email.split('@')[0].replace('.', '_').replace('-', '_').lower()
422
+
423
+ # Strategy 1: Spark SQL (works natively on Databricks)
379
424
  try:
380
- # Priorité à Spark SQL (compatible VS Code & Notebooks)
381
425
  rows = self.spark.sql("SELECT current_user()").collect()
382
- if rows:
383
- email = rows[0][0]
384
- return email.split('@')[0].replace('.', '_').replace('-', '_').lower()
385
- except:
426
+ if rows and rows[0][0]:
427
+ return _clean(rows[0][0])
428
+ except Exception:
386
429
  pass
387
430
 
388
- # Fallback Tags
431
+ # Strategy 2: Databricks SDK REST API (works locally when gRPC fails)
432
+ try:
433
+ from databricks.sdk import WorkspaceClient
434
+ host = os.getenv("DATABRICKS_HOST")
435
+ token = os.getenv("DATABRICKS_TOKEN")
436
+ if host and token:
437
+ w = WorkspaceClient(host=host, token=token)
438
+ email = w.current_user.me().user_name
439
+ if email:
440
+ return _clean(email)
441
+ except Exception:
442
+ pass
443
+
444
+ # Strategy 3: DBUtils notebook context tags (classic clusters / notebooks)
389
445
  try:
390
446
  context = self.dbutils.notebook.entry_point.getDbutils().notebook().getContext()
391
447
  email = context.tags().apply('user')
392
- return email.split('@')[0].replace('.', '_').replace('-', '_').lower()
393
- except:
448
+ return _clean(email)
449
+ except Exception:
394
450
  return "unknown_user"
395
451
 
396
452
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: satisfactoscript
3
- Version: 0.5.5
3
+ Version: 0.5.7
4
4
  Summary: An Enterprise-Ready, Declarative Data Engineering Framework for Databricks Lakehouse.
5
5
  Author: julhouba
6
6
  Classifier: Programming Language :: Python :: 3
@@ -17,6 +17,8 @@ src/satisfactoscript/semantic/__init__.py
17
17
  src/satisfactoscript/semantic/semantic.py
18
18
  tests/test_config.py
19
19
  tests/test_core.py
20
+ tests/test_core_env_detection.py
21
+ tests/test_core_username.py
20
22
  tests/test_dummy.py
21
23
  tests/test_loaders.py
22
24
  tests/test_registry.py
@@ -0,0 +1,159 @@
1
+ """
2
+ Tests for SatisfactoEngine catalog access and environment auto-detection.
3
+ Covers: _check_catalog_access and _auto_detect_environment.
4
+ """
5
+ import pytest
6
+ from unittest.mock import MagicMock, patch
7
+ from satisfactoscript.core.core import SatisfactoEngine
8
+
9
+
10
+ def _make_engine(config, spark=None):
11
+ """
12
+ Build a SatisfactoEngine instance without calling __init__,
13
+ allowing isolated testing of individual methods.
14
+ """
15
+ engine = object.__new__(SatisfactoEngine)
16
+ engine.config = config
17
+ engine.spark = spark or MagicMock()
18
+ return engine
19
+
20
+
21
+ CONFIG_TWO_ENVS = {
22
+ "priority_check": ["dev", "prod"],
23
+ "environments": {
24
+ "dev": {"catalog": "catalog_dev"},
25
+ "prod": {"catalog": "catalog_prd"},
26
+ },
27
+ }
28
+
29
+ CONFIG_WITH_DEFAULT = {
30
+ "priority_check": ["dev", "prod"],
31
+ "default_env": "dev",
32
+ "environments": {
33
+ "dev": {"catalog": "catalog_dev"},
34
+ "prod": {"catalog": "catalog_prd"},
35
+ },
36
+ }
37
+
38
+ CONFIG_EMPTY = {}
39
+
40
+
41
+ # ──────────────────────────────────────────────────────────────────────────────
42
+ # _check_catalog_access
43
+ # ──────────────────────────────────────────────────────────────────────────────
44
+
45
+ class TestCheckCatalogAccess:
46
+
47
+ def test_returns_true_when_spark_sql_succeeds(self):
48
+ spark = MagicMock()
49
+ spark.sql.return_value.limit.return_value.collect.return_value = [("row",)]
50
+ engine = _make_engine(CONFIG_TWO_ENVS, spark=spark)
51
+
52
+ assert engine._check_catalog_access("catalog_dev") is True
53
+ spark.sql.assert_called_once_with("SHOW SCHEMAS IN `catalog_dev`")
54
+
55
+ def test_falls_back_to_sdk_when_spark_sql_fails(self):
56
+ spark = MagicMock()
57
+ spark.sql.side_effect = Exception("gRPC: Missing UserContext")
58
+ engine = _make_engine(CONFIG_TWO_ENVS, spark=spark)
59
+
60
+ mock_catalog_info = MagicMock()
61
+ mock_workspace_client = MagicMock()
62
+ mock_workspace_client.catalogs.get.return_value = mock_catalog_info
63
+
64
+ with patch.dict("os.environ", {"DATABRICKS_HOST": "https://adb-xxx.azuredatabricks.net", "DATABRICKS_TOKEN": "dapi123"}):
65
+ with patch("satisfactoscript.core.core.WorkspaceClient", return_value=mock_workspace_client, create=True):
66
+ # The SDK import inside the method needs to be patched at the right place
67
+ with patch.dict("sys.modules", {"databricks.sdk": MagicMock(WorkspaceClient=mock_workspace_client.__class__)}):
68
+ # Patch the import inside the method
69
+ import sys
70
+ fake_sdk = MagicMock()
71
+ fake_sdk.WorkspaceClient.return_value = mock_workspace_client
72
+ sys.modules["databricks.sdk"] = fake_sdk
73
+
74
+ result = engine._check_catalog_access("catalog_dev")
75
+
76
+ mock_workspace_client.catalogs.get.assert_called_once_with("catalog_dev")
77
+ assert result is True
78
+
79
+ def test_returns_false_when_both_strategies_fail(self):
80
+ spark = MagicMock()
81
+ spark.sql.side_effect = Exception("SQL error")
82
+ engine = _make_engine(CONFIG_TWO_ENVS, spark=spark)
83
+
84
+ with patch.dict("os.environ", {"DATABRICKS_HOST": "https://adb-xxx.azuredatabricks.net", "DATABRICKS_TOKEN": "dapi123"}):
85
+ import sys
86
+ fake_sdk = MagicMock()
87
+ fake_sdk.WorkspaceClient.return_value.catalogs.get.side_effect = Exception("SDK error")
88
+ sys.modules["databricks.sdk"] = fake_sdk
89
+
90
+ result = engine._check_catalog_access("catalog_dev")
91
+
92
+ assert result is False
93
+
94
+ def test_returns_false_when_no_credentials_for_sdk(self):
95
+ spark = MagicMock()
96
+ spark.sql.side_effect = Exception("SQL error")
97
+ engine = _make_engine(CONFIG_TWO_ENVS, spark=spark)
98
+
99
+ with patch.dict("os.environ", {}, clear=True):
100
+ result = engine._check_catalog_access("catalog_dev")
101
+
102
+ assert result is False
103
+
104
+
105
+ # ──────────────────────────────────────────────────────────────────────────────
106
+ # _auto_detect_environment
107
+ # ──────────────────────────────────────────────────────────────────────────────
108
+
109
+ class TestAutoDetectEnvironment:
110
+
111
+ def test_detects_first_accessible_env(self):
112
+ engine = _make_engine(CONFIG_TWO_ENVS)
113
+ engine._check_catalog_access = MagicMock(side_effect=lambda c: c == "catalog_dev")
114
+
115
+ engine._auto_detect_environment()
116
+
117
+ assert engine.env == "DEV"
118
+ assert engine.db == "catalog_dev"
119
+
120
+ def test_skips_inaccessible_env_and_detects_next(self):
121
+ engine = _make_engine(CONFIG_TWO_ENVS)
122
+ engine._check_catalog_access = MagicMock(side_effect=lambda c: c == "catalog_prd")
123
+
124
+ engine._auto_detect_environment()
125
+
126
+ assert engine.env == "PROD"
127
+ assert engine.db == "catalog_prd"
128
+
129
+ def test_uses_default_env_when_all_checks_fail(self):
130
+ engine = _make_engine(CONFIG_WITH_DEFAULT)
131
+ engine._check_catalog_access = MagicMock(return_value=False)
132
+
133
+ engine._auto_detect_environment()
134
+
135
+ assert engine.env == "DEV"
136
+ assert engine.db == "catalog_dev"
137
+
138
+ def test_raises_when_all_fail_and_no_default_env(self):
139
+ engine = _make_engine(CONFIG_TWO_ENVS)
140
+ engine._check_catalog_access = MagicMock(return_value=False)
141
+
142
+ with pytest.raises(ValueError, match="CRITICAL"):
143
+ engine._auto_detect_environment()
144
+
145
+ def test_raises_on_invalid_config(self):
146
+ engine = _make_engine(CONFIG_EMPTY)
147
+
148
+ with pytest.raises(ValueError, match="CRITICAL"):
149
+ engine._auto_detect_environment()
150
+
151
+ def test_default_env_not_used_when_sql_succeeds(self):
152
+ """default_env must not override a successfully detected environment."""
153
+ engine = _make_engine(CONFIG_WITH_DEFAULT)
154
+ engine._check_catalog_access = MagicMock(side_effect=lambda c: c == "catalog_prd")
155
+
156
+ engine._auto_detect_environment()
157
+
158
+ assert engine.env == "PROD"
159
+ assert engine.db == "catalog_prd"
@@ -0,0 +1,101 @@
1
+ """
2
+ Tests for SatisfactoEngine._get_clean_username.
3
+ Covers: Spark SQL, Databricks SDK REST API, DBUtils fallback, unknown_user.
4
+ """
5
+ import sys
6
+ import pytest
7
+ from unittest.mock import MagicMock, patch
8
+ from satisfactoscript.core.core import SatisfactoEngine
9
+
10
+
11
+ def _make_engine(spark=None, dbutils=None):
12
+ engine = object.__new__(SatisfactoEngine)
13
+ engine.spark = spark or MagicMock()
14
+ engine.dbutils = dbutils or MagicMock()
15
+ return engine
16
+
17
+
18
+ class TestGetCleanUsername:
19
+
20
+ def test_returns_username_from_spark_sql(self):
21
+ spark = MagicMock()
22
+ spark.sql.return_value.collect.return_value = [("julien.houba@company.com",)]
23
+ engine = _make_engine(spark=spark)
24
+
25
+ assert engine._get_clean_username() == "julien_houba"
26
+
27
+ def test_cleans_dots_and_dashes(self):
28
+ spark = MagicMock()
29
+ spark.sql.return_value.collect.return_value = [("jean-pierre.dupont@company.com",)]
30
+ engine = _make_engine(spark=spark)
31
+
32
+ assert engine._get_clean_username() == "jean_pierre_dupont"
33
+
34
+ def test_falls_back_to_sdk_when_spark_sql_fails(self):
35
+ spark = MagicMock()
36
+ spark.sql.side_effect = Exception("gRPC: Missing UserContext")
37
+ engine = _make_engine(spark=spark)
38
+
39
+ mock_me = MagicMock()
40
+ mock_me.user_name = "hqhoujul@company.com"
41
+ mock_wc = MagicMock()
42
+ mock_wc.current_user.me.return_value = mock_me
43
+
44
+ fake_sdk = MagicMock()
45
+ fake_sdk.WorkspaceClient.return_value = mock_wc
46
+ sys.modules["databricks.sdk"] = fake_sdk
47
+
48
+ with patch.dict("os.environ", {"DATABRICKS_HOST": "https://adb.net", "DATABRICKS_TOKEN": "dapi123"}):
49
+ result = engine._get_clean_username()
50
+
51
+ assert result == "hqhoujul"
52
+
53
+ def test_falls_back_to_dbutils_when_sql_and_sdk_fail(self):
54
+ spark = MagicMock()
55
+ spark.sql.side_effect = Exception("SQL error")
56
+ engine = _make_engine(spark=spark)
57
+
58
+ fake_sdk = MagicMock()
59
+ fake_sdk.WorkspaceClient.side_effect = Exception("SDK error")
60
+ sys.modules["databricks.sdk"] = fake_sdk
61
+
62
+ context = MagicMock()
63
+ context.tags.return_value.apply.return_value = "fallback.user@company.com"
64
+ engine.dbutils.notebook.entry_point.getDbutils.return_value.notebook.return_value.getContext.return_value = context
65
+
66
+ with patch.dict("os.environ", {"DATABRICKS_HOST": "https://adb.net", "DATABRICKS_TOKEN": "dapi123"}):
67
+ result = engine._get_clean_username()
68
+
69
+ assert result == "fallback_user"
70
+
71
+ def test_returns_unknown_user_when_all_strategies_fail(self):
72
+ spark = MagicMock()
73
+ spark.sql.side_effect = Exception("SQL error")
74
+ engine = _make_engine(spark=spark)
75
+
76
+ fake_sdk = MagicMock()
77
+ fake_sdk.WorkspaceClient.side_effect = Exception("SDK error")
78
+ sys.modules["databricks.sdk"] = fake_sdk
79
+
80
+ engine.dbutils.notebook.entry_point.getDbutils.side_effect = Exception("no context")
81
+
82
+ with patch.dict("os.environ", {}, clear=True):
83
+ result = engine._get_clean_username()
84
+
85
+ assert result == "unknown_user"
86
+
87
+ def test_sdk_not_called_when_no_credentials(self):
88
+ spark = MagicMock()
89
+ spark.sql.side_effect = Exception("SQL error")
90
+ engine = _make_engine(spark=spark)
91
+
92
+ fake_sdk = MagicMock()
93
+ sys.modules["databricks.sdk"] = fake_sdk
94
+
95
+ engine.dbutils.notebook.entry_point.getDbutils.side_effect = Exception("no context")
96
+
97
+ with patch.dict("os.environ", {}, clear=True):
98
+ engine._get_clean_username()
99
+
100
+ # WorkspaceClient must not be called without credentials
101
+ fake_sdk.WorkspaceClient.assert_not_called()