satisfactoscript 0.5.4__tar.gz → 0.5.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {satisfactoscript-0.5.4 → satisfactoscript-0.5.6}/PKG-INFO +1 -1
  2. {satisfactoscript-0.5.4 → satisfactoscript-0.5.6}/pyproject.toml +1 -1
  3. {satisfactoscript-0.5.4 → satisfactoscript-0.5.6}/src/satisfactoscript/core/core.py +59 -21
  4. {satisfactoscript-0.5.4 → satisfactoscript-0.5.6}/src/satisfactoscript.egg-info/PKG-INFO +1 -1
  5. {satisfactoscript-0.5.4 → satisfactoscript-0.5.6}/src/satisfactoscript.egg-info/SOURCES.txt +1 -0
  6. satisfactoscript-0.5.6/tests/test_core_env_detection.py +159 -0
  7. {satisfactoscript-0.5.4 → satisfactoscript-0.5.6}/README.md +0 -0
  8. {satisfactoscript-0.5.4 → satisfactoscript-0.5.6}/setup.cfg +0 -0
  9. {satisfactoscript-0.5.4 → satisfactoscript-0.5.6}/src/satisfactoscript/__init__.py +0 -0
  10. {satisfactoscript-0.5.4 → satisfactoscript-0.5.6}/src/satisfactoscript/agentic/__init__.py +0 -0
  11. {satisfactoscript-0.5.4 → satisfactoscript-0.5.6}/src/satisfactoscript/agentic/agent.py +0 -0
  12. {satisfactoscript-0.5.4 → satisfactoscript-0.5.6}/src/satisfactoscript/core/__init__.py +0 -0
  13. {satisfactoscript-0.5.4 → satisfactoscript-0.5.6}/src/satisfactoscript/core/config.py +0 -0
  14. {satisfactoscript-0.5.4 → satisfactoscript-0.5.6}/src/satisfactoscript/core/loaders.py +0 -0
  15. {satisfactoscript-0.5.4 → satisfactoscript-0.5.6}/src/satisfactoscript/core/registry.py +0 -0
  16. {satisfactoscript-0.5.4 → satisfactoscript-0.5.6}/src/satisfactoscript/semantic/__init__.py +0 -0
  17. {satisfactoscript-0.5.4 → satisfactoscript-0.5.6}/src/satisfactoscript/semantic/semantic.py +0 -0
  18. {satisfactoscript-0.5.4 → satisfactoscript-0.5.6}/src/satisfactoscript.egg-info/dependency_links.txt +0 -0
  19. {satisfactoscript-0.5.4 → satisfactoscript-0.5.6}/src/satisfactoscript.egg-info/requires.txt +0 -0
  20. {satisfactoscript-0.5.4 → satisfactoscript-0.5.6}/src/satisfactoscript.egg-info/top_level.txt +0 -0
  21. {satisfactoscript-0.5.4 → satisfactoscript-0.5.6}/tests/test_config.py +0 -0
  22. {satisfactoscript-0.5.4 → satisfactoscript-0.5.6}/tests/test_core.py +0 -0
  23. {satisfactoscript-0.5.4 → satisfactoscript-0.5.6}/tests/test_dummy.py +0 -0
  24. {satisfactoscript-0.5.4 → satisfactoscript-0.5.6}/tests/test_loaders.py +0 -0
  25. {satisfactoscript-0.5.4 → satisfactoscript-0.5.6}/tests/test_registry.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: satisfactoscript
3
- Version: 0.5.4
3
+ Version: 0.5.6
4
4
  Summary: An Enterprise-Ready, Declarative Data Engineering Framework for Databricks Lakehouse.
5
5
  Author: julhouba
6
6
  Classifier: Programming Language :: Python :: 3
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "satisfactoscript"
7
- version = "0.5.4"
7
+ version = "0.5.6"
8
8
  description = "An Enterprise-Ready, Declarative Data Engineering Framework for Databricks Lakehouse."
9
9
  readme = "README.md"
10
10
  authors = [
@@ -188,15 +188,16 @@ class SatisfactoEngine:
188
188
  # Use the modern approach for Databricks Connect v2
189
189
  from databricks.connect import DatabricksSession
190
190
  from databricks.sdk.core import Config
191
-
192
- # This relies on the environment having valid Databricks configuration
193
- # e.g., DATABRICKS_HOST, DATABRICKS_TOKEN, DATABRICKS_CLUSTER_ID
194
- # or a valid ~/.databrickscfg file.
195
- # We pass a specific cluster_id if it exists in env, else it relies on profile
196
- conf = Config()
197
- if os.getenv("DATABRICKS_CLUSTER_ID"):
198
- conf.cluster_id = os.getenv("DATABRICKS_CLUSTER_ID")
199
-
191
+
192
+ # We explicitly inject the values from the environment variables
193
+ # into the Config object so Databricks Connect doesn't rely solely
194
+ # on the implicit ~\.databrickscfg profile.
195
+ conf = Config(
196
+ host=os.getenv("DATABRICKS_HOST"),
197
+ token=os.getenv("DATABRICKS_TOKEN"),
198
+ cluster_id=os.getenv("DATABRICKS_CLUSTER_ID")
199
+ )
200
+
200
201
  self.spark = DatabricksSession.builder.sdkConfig(conf).getOrCreate()
201
202
  print(" [Init] 🚀 Remote DatabricksSession (Databricks Connect) initialized.")
202
203
  except Exception as e:
@@ -293,11 +294,43 @@ class SatisfactoEngine:
293
294
  # Auto-detect environment based on priority list
294
295
  self._auto_detect_environment()
295
296
 
297
+ def _check_catalog_access(self, catalog):
298
+ """
299
+ Verifies access to a catalog using two strategies in order:
300
+ 1. Spark SQL SHOW SCHEMAS (native on Databricks platform/cluster).
301
+ 2. Databricks SDK REST API (works locally via Databricks Connect when
302
+ gRPC DDL calls fail with 'Missing UserContext').
303
+
304
+ Returns:
305
+ bool: True if the catalog is accessible, False otherwise.
306
+ """
307
+ # Strategy 1: Spark SQL (works natively on Databricks)
308
+ try:
309
+ self.spark.sql(f"SHOW SCHEMAS IN `{catalog}`").limit(1).collect()
310
+ return True
311
+ except Exception:
312
+ pass
313
+
314
+ # Strategy 2: Databricks SDK REST API (works locally when gRPC DDL fails)
315
+ try:
316
+ from databricks.sdk import WorkspaceClient
317
+ host = os.getenv("DATABRICKS_HOST")
318
+ token = os.getenv("DATABRICKS_TOKEN")
319
+ if host and token:
320
+ w = WorkspaceClient(host=host, token=token)
321
+ w.catalogs.get(catalog)
322
+ return True
323
+ except Exception:
324
+ pass
325
+
326
+ return False
327
+
296
328
  def _auto_detect_environment(self):
297
329
  """
298
330
  Iterates through 'priority_check' list from YAML.
299
- Tries to verify if the catalog exists/is accessible.
331
+ Tries to verify if the catalog exists/is accessible via _check_catalog_access.
300
332
  Sets self.env and self.db to the first matching environment.
333
+ Falls back to 'default_env' from config if all checks fail (useful locally).
301
334
  """
302
335
  print(" [Config] Auto-detecting active environment...")
303
336
 
@@ -317,23 +350,28 @@ class SatisfactoEngine:
317
350
  catalog = env_config.get("catalog")
318
351
  print(f" -> Checking access to catalog: '{catalog}' ({env_name})...")
319
352
 
320
- try:
321
- # Test simple d'accès au catalogue
322
- # Note: spark.sql("USE catalog") change le contexte global,
323
- # on préfère juste vérifier l'existence via une requête légère ou listCatalogs si dispo.
324
- # Ici on tente un simple show databases dans ce catalogue.
325
- self.spark.sql(f"SHOW SCHEMAS IN `{catalog}`").limit(1).collect()
326
-
353
+ if self._check_catalog_access(catalog):
327
354
  print(f" ✅ Success: Connected to '{catalog}'.")
328
355
  detected_env = env_name
329
356
  self.env = env_name.upper()
330
357
  self.db = catalog
331
- break # On s'arrête au premier qui marche
332
- except Exception as e:
333
- print(f" ❌ Failed: Cannot access '{catalog}'. Error: {e}. Moving to next priority.")
358
+ break
359
+ else:
360
+ print(f" ❌ Failed: Cannot access '{catalog}'. Moving to next priority.")
334
361
 
335
362
  if not detected_env:
336
- raise ValueError("CRITICAL Could not connect to ANY defined environment catalog.")
363
+ # Fallback: use 'default_env' from config (avoids hard failure in local dev)
364
+ default_env = self.config.get("default_env")
365
+ if default_env and default_env in environments:
366
+ catalog = environments[default_env].get("catalog")
367
+ print(
368
+ f" ⚠️ [Fallback] All catalog checks failed. "
369
+ f"Using 'default_env': '{default_env}' -> catalog '{catalog}'."
370
+ )
371
+ self.env = default_env.upper()
372
+ self.db = catalog
373
+ else:
374
+ raise ValueError("CRITICAL ❌ Could not connect to ANY defined environment catalog.")
337
375
 
338
376
  def _is_running_as_job(self):
339
377
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: satisfactoscript
3
- Version: 0.5.4
3
+ Version: 0.5.6
4
4
  Summary: An Enterprise-Ready, Declarative Data Engineering Framework for Databricks Lakehouse.
5
5
  Author: julhouba
6
6
  Classifier: Programming Language :: Python :: 3
@@ -17,6 +17,7 @@ src/satisfactoscript/semantic/__init__.py
17
17
  src/satisfactoscript/semantic/semantic.py
18
18
  tests/test_config.py
19
19
  tests/test_core.py
20
+ tests/test_core_env_detection.py
20
21
  tests/test_dummy.py
21
22
  tests/test_loaders.py
22
23
  tests/test_registry.py
@@ -0,0 +1,159 @@
1
+ """
2
+ Tests for SatisfactoEngine catalog access and environment auto-detection.
3
+ Covers: _check_catalog_access and _auto_detect_environment.
4
+ """
5
+ import pytest
6
+ from unittest.mock import MagicMock, patch
7
+ from satisfactoscript.core.core import SatisfactoEngine
8
+
9
+
10
+ def _make_engine(config, spark=None):
11
+ """
12
+ Build a SatisfactoEngine instance without calling __init__,
13
+ allowing isolated testing of individual methods.
14
+ """
15
+ engine = object.__new__(SatisfactoEngine)
16
+ engine.config = config
17
+ engine.spark = spark or MagicMock()
18
+ return engine
19
+
20
+
21
+ CONFIG_TWO_ENVS = {
22
+ "priority_check": ["dev", "prod"],
23
+ "environments": {
24
+ "dev": {"catalog": "catalog_dev"},
25
+ "prod": {"catalog": "catalog_prd"},
26
+ },
27
+ }
28
+
29
+ CONFIG_WITH_DEFAULT = {
30
+ "priority_check": ["dev", "prod"],
31
+ "default_env": "dev",
32
+ "environments": {
33
+ "dev": {"catalog": "catalog_dev"},
34
+ "prod": {"catalog": "catalog_prd"},
35
+ },
36
+ }
37
+
38
+ CONFIG_EMPTY = {}
39
+
40
+
41
+ # ──────────────────────────────────────────────────────────────────────────────
42
+ # _check_catalog_access
43
+ # ──────────────────────────────────────────────────────────────────────────────
44
+
45
+ class TestCheckCatalogAccess:
46
+
47
+ def test_returns_true_when_spark_sql_succeeds(self):
48
+ spark = MagicMock()
49
+ spark.sql.return_value.limit.return_value.collect.return_value = [("row",)]
50
+ engine = _make_engine(CONFIG_TWO_ENVS, spark=spark)
51
+
52
+ assert engine._check_catalog_access("catalog_dev") is True
53
+ spark.sql.assert_called_once_with("SHOW SCHEMAS IN `catalog_dev`")
54
+
55
+ def test_falls_back_to_sdk_when_spark_sql_fails(self):
56
+ spark = MagicMock()
57
+ spark.sql.side_effect = Exception("gRPC: Missing UserContext")
58
+ engine = _make_engine(CONFIG_TWO_ENVS, spark=spark)
59
+
60
+ mock_catalog_info = MagicMock()
61
+ mock_workspace_client = MagicMock()
62
+ mock_workspace_client.catalogs.get.return_value = mock_catalog_info
63
+
64
+ with patch.dict("os.environ", {"DATABRICKS_HOST": "https://adb-xxx.azuredatabricks.net", "DATABRICKS_TOKEN": "dapi123"}):
65
+ with patch("satisfactoscript.core.core.WorkspaceClient", return_value=mock_workspace_client, create=True):
66
+ # The SDK import inside the method needs to be patched at the right place
67
+ with patch.dict("sys.modules", {"databricks.sdk": MagicMock(WorkspaceClient=mock_workspace_client.__class__)}):
68
+ # Patch the import inside the method
69
+ import sys
70
+ fake_sdk = MagicMock()
71
+ fake_sdk.WorkspaceClient.return_value = mock_workspace_client
72
+ sys.modules["databricks.sdk"] = fake_sdk
73
+
74
+ result = engine._check_catalog_access("catalog_dev")
75
+
76
+ mock_workspace_client.catalogs.get.assert_called_once_with("catalog_dev")
77
+ assert result is True
78
+
79
+ def test_returns_false_when_both_strategies_fail(self):
80
+ spark = MagicMock()
81
+ spark.sql.side_effect = Exception("SQL error")
82
+ engine = _make_engine(CONFIG_TWO_ENVS, spark=spark)
83
+
84
+ with patch.dict("os.environ", {"DATABRICKS_HOST": "https://adb-xxx.azuredatabricks.net", "DATABRICKS_TOKEN": "dapi123"}):
85
+ import sys
86
+ fake_sdk = MagicMock()
87
+ fake_sdk.WorkspaceClient.return_value.catalogs.get.side_effect = Exception("SDK error")
88
+ sys.modules["databricks.sdk"] = fake_sdk
89
+
90
+ result = engine._check_catalog_access("catalog_dev")
91
+
92
+ assert result is False
93
+
94
+ def test_returns_false_when_no_credentials_for_sdk(self):
95
+ spark = MagicMock()
96
+ spark.sql.side_effect = Exception("SQL error")
97
+ engine = _make_engine(CONFIG_TWO_ENVS, spark=spark)
98
+
99
+ with patch.dict("os.environ", {}, clear=True):
100
+ result = engine._check_catalog_access("catalog_dev")
101
+
102
+ assert result is False
103
+
104
+
105
+ # ──────────────────────────────────────────────────────────────────────────────
106
+ # _auto_detect_environment
107
+ # ──────────────────────────────────────────────────────────────────────────────
108
+
109
+ class TestAutoDetectEnvironment:
110
+
111
+ def test_detects_first_accessible_env(self):
112
+ engine = _make_engine(CONFIG_TWO_ENVS)
113
+ engine._check_catalog_access = MagicMock(side_effect=lambda c: c == "catalog_dev")
114
+
115
+ engine._auto_detect_environment()
116
+
117
+ assert engine.env == "DEV"
118
+ assert engine.db == "catalog_dev"
119
+
120
+ def test_skips_inaccessible_env_and_detects_next(self):
121
+ engine = _make_engine(CONFIG_TWO_ENVS)
122
+ engine._check_catalog_access = MagicMock(side_effect=lambda c: c == "catalog_prd")
123
+
124
+ engine._auto_detect_environment()
125
+
126
+ assert engine.env == "PROD"
127
+ assert engine.db == "catalog_prd"
128
+
129
+ def test_uses_default_env_when_all_checks_fail(self):
130
+ engine = _make_engine(CONFIG_WITH_DEFAULT)
131
+ engine._check_catalog_access = MagicMock(return_value=False)
132
+
133
+ engine._auto_detect_environment()
134
+
135
+ assert engine.env == "DEV"
136
+ assert engine.db == "catalog_dev"
137
+
138
+ def test_raises_when_all_fail_and_no_default_env(self):
139
+ engine = _make_engine(CONFIG_TWO_ENVS)
140
+ engine._check_catalog_access = MagicMock(return_value=False)
141
+
142
+ with pytest.raises(ValueError, match="CRITICAL"):
143
+ engine._auto_detect_environment()
144
+
145
+ def test_raises_on_invalid_config(self):
146
+ engine = _make_engine(CONFIG_EMPTY)
147
+
148
+ with pytest.raises(ValueError, match="CRITICAL"):
149
+ engine._auto_detect_environment()
150
+
151
+ def test_default_env_not_used_when_sql_succeeds(self):
152
+ """default_env must not override a successfully detected environment."""
153
+ engine = _make_engine(CONFIG_WITH_DEFAULT)
154
+ engine._check_catalog_access = MagicMock(side_effect=lambda c: c == "catalog_prd")
155
+
156
+ engine._auto_detect_environment()
157
+
158
+ assert engine.env == "PROD"
159
+ assert engine.db == "catalog_prd"