satisfactoscript 0.5.8__tar.gz → 0.5.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/PKG-INFO +1 -1
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/pyproject.toml +1 -1
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/src/satisfactoscript/__init__.py +2 -1
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/src/satisfactoscript/core/core.py +15 -8
- satisfactoscript-0.5.10/src/satisfactoscript/registry.py +7 -0
- satisfactoscript-0.5.10/src/satisfactoscript/utils.py +29 -0
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/src/satisfactoscript.egg-info/PKG-INFO +1 -1
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/src/satisfactoscript.egg-info/SOURCES.txt +6 -1
- satisfactoscript-0.5.10/tests/test_core_join.py +140 -0
- satisfactoscript-0.5.10/tests/test_registry_import_paths.py +26 -0
- satisfactoscript-0.5.10/tests/test_utils_safe_columns.py +46 -0
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/README.md +0 -0
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/setup.cfg +0 -0
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/src/satisfactoscript/agentic/__init__.py +0 -0
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/src/satisfactoscript/agentic/agent.py +0 -0
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/src/satisfactoscript/core/__init__.py +0 -0
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/src/satisfactoscript/core/config.py +0 -0
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/src/satisfactoscript/core/loaders.py +0 -0
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/src/satisfactoscript/core/registry.py +0 -0
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/src/satisfactoscript/semantic/__init__.py +0 -0
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/src/satisfactoscript/semantic/semantic.py +0 -0
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/src/satisfactoscript.egg-info/dependency_links.txt +0 -0
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/src/satisfactoscript.egg-info/requires.txt +0 -0
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/src/satisfactoscript.egg-info/top_level.txt +0 -0
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/tests/test_config.py +0 -0
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/tests/test_core.py +0 -0
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/tests/test_core_connect_patch.py +0 -0
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/tests/test_core_env_detection.py +0 -0
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/tests/test_core_username.py +0 -0
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/tests/test_dummy.py +0 -0
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/tests/test_loaders.py +0 -0
- {satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/tests/test_registry.py +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from .core.core import SatisfactoEngine
|
|
2
2
|
from .core.registry import RuleRegistry
|
|
3
3
|
from .core.config import ConfigurationManager
|
|
4
|
+
from .utils import safe_columns
|
|
4
5
|
|
|
5
|
-
__all__ = ["SatisfactoEngine", "RuleRegistry", "ConfigurationManager"]
|
|
6
|
+
__all__ = ["SatisfactoEngine", "RuleRegistry", "ConfigurationManager", "safe_columns"]
|
|
@@ -657,17 +657,24 @@ class SatisfactoEngine:
|
|
|
657
657
|
if "join" in schema_dict:
|
|
658
658
|
for j in schema_dict["join"]:
|
|
659
659
|
print(f" -> [Join] {j.get('type', 'left').upper()} JOIN {j['table_from']} -> {j['table_to']}")
|
|
660
|
-
|
|
660
|
+
|
|
661
661
|
on_l = j["on_from"] if isinstance(j["on_from"], list) else [j["on_from"]]
|
|
662
662
|
on_r = j["on_to"] if isinstance(j["on_to"], list) else [j["on_to"]]
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
663
|
+
join_type = j.get("type", "left")
|
|
664
|
+
|
|
666
665
|
df_to = dfs[j["table_to"]]
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
666
|
+
|
|
667
|
+
if on_l == on_r:
|
|
668
|
+
# Same key names: PySpark list-join deduplicates automatically.
|
|
669
|
+
# No schema RPC (AnalyzePlan) needed.
|
|
670
|
+
df_main = df_main.join(df_to, on=on_l, how=join_type)
|
|
671
|
+
else:
|
|
672
|
+
# Different key names: build condition from DataFrame column references
|
|
673
|
+
# and drop right-side join keys post-join using unambiguous df refs.
|
|
674
|
+
# Avoids df_to.columns (AnalyzePlan RPC) that fails with UserContext
|
|
675
|
+
# on Databricks Connect v2 from local environments.
|
|
676
|
+
cond = reduce(lambda x, y: x & y, [df_main[l] == df_to[r] for l, r in zip(on_l, on_r)])
|
|
677
|
+
df_main = df_main.join(df_to, cond, join_type).drop(*[df_to[r] for r in on_r])
|
|
671
678
|
|
|
672
679
|
# 3. BUSINESS RULES
|
|
673
680
|
if "business_rules" in schema_dict:
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utility helpers for rule authoring — compatible with Databricks Connect v2.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def safe_columns(df):
|
|
7
|
+
"""
|
|
8
|
+
Returns the column names of a DataFrame without triggering a failing AnalyzePlan RPC.
|
|
9
|
+
|
|
10
|
+
On native Databricks (cluster / notebook), this delegates to ``df.columns`` as normal.
|
|
11
|
+
On Databricks Connect v2 from local environments (PyCharm / VS Code on Windows), the
|
|
12
|
+
``AnalyzePlan`` gRPC call that backs ``df.columns`` can fail with
|
|
13
|
+
``Missing required field 'UserContext'``. In that case this function returns an empty
|
|
14
|
+
list so that optional-column branches in business rules are silently skipped rather than
|
|
15
|
+
crashing. Core transformations (withColumn, filter, write …) are unaffected because they
|
|
16
|
+
go through ``ExecutePlan``, not ``AnalyzePlan``.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
df: A PySpark DataFrame.
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
list[str]: Column names, or ``[]`` when schema inspection fails locally.
|
|
23
|
+
"""
|
|
24
|
+
try:
|
|
25
|
+
return df.columns
|
|
26
|
+
except Exception as e:
|
|
27
|
+
if "UserContext" in str(e) or "INVALID_ARGUMENT" in str(e):
|
|
28
|
+
return []
|
|
29
|
+
raise
|
{satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/src/satisfactoscript.egg-info/SOURCES.txt
RENAMED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
README.md
|
|
2
2
|
pyproject.toml
|
|
3
3
|
src/satisfactoscript/__init__.py
|
|
4
|
+
src/satisfactoscript/registry.py
|
|
5
|
+
src/satisfactoscript/utils.py
|
|
4
6
|
src/satisfactoscript.egg-info/PKG-INFO
|
|
5
7
|
src/satisfactoscript.egg-info/SOURCES.txt
|
|
6
8
|
src/satisfactoscript.egg-info/dependency_links.txt
|
|
@@ -19,7 +21,10 @@ tests/test_config.py
|
|
|
19
21
|
tests/test_core.py
|
|
20
22
|
tests/test_core_connect_patch.py
|
|
21
23
|
tests/test_core_env_detection.py
|
|
24
|
+
tests/test_core_join.py
|
|
22
25
|
tests/test_core_username.py
|
|
23
26
|
tests/test_dummy.py
|
|
24
27
|
tests/test_loaders.py
|
|
25
|
-
tests/test_registry.py
|
|
28
|
+
tests/test_registry.py
|
|
29
|
+
tests/test_registry_import_paths.py
|
|
30
|
+
tests/test_utils_safe_columns.py
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tests for process_schema JOIN logic.
|
|
3
|
+
Key invariant: df_to.columns (AnalyzePlan gRPC) must never be called during join planning.
|
|
4
|
+
"""
|
|
5
|
+
import pytest
|
|
6
|
+
from unittest.mock import MagicMock, call, patch
|
|
7
|
+
from satisfactoscript.core.core import SatisfactoEngine
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _make_engine():
|
|
11
|
+
engine = object.__new__(SatisfactoEngine)
|
|
12
|
+
engine.spark = MagicMock()
|
|
13
|
+
engine.config = {}
|
|
14
|
+
return engine
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _joined_df():
|
|
18
|
+
"""Return a MagicMock DataFrame that also handles chained .drop()."""
|
|
19
|
+
df = MagicMock(name="joined_df")
|
|
20
|
+
df.drop.return_value = df
|
|
21
|
+
return df
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class TestProcessSchemaJoin:
|
|
25
|
+
|
|
26
|
+
def test_same_keys_uses_list_join(self):
|
|
27
|
+
"""on_from == on_to → join(df_to, on=['id'], how='inner'). No .columns call."""
|
|
28
|
+
df_a = MagicMock(name="df_a")
|
|
29
|
+
df_b = MagicMock(name="df_b")
|
|
30
|
+
joined = _joined_df()
|
|
31
|
+
df_a.join.return_value = joined
|
|
32
|
+
|
|
33
|
+
schema = {
|
|
34
|
+
"join": [{
|
|
35
|
+
"table_from": "a",
|
|
36
|
+
"table_to": "b",
|
|
37
|
+
"on_from": "id",
|
|
38
|
+
"on_to": "id",
|
|
39
|
+
"type": "inner",
|
|
40
|
+
}]
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
engine = _make_engine()
|
|
44
|
+
result = engine.process_schema(schema, dataframes_in={"a": df_a, "b": df_b})
|
|
45
|
+
|
|
46
|
+
df_a.join.assert_called_once_with(df_b, on=["id"], how="inner")
|
|
47
|
+
df_b.columns.assert_not_called() # ← no AnalyzePlan gRPC
|
|
48
|
+
assert result == joined
|
|
49
|
+
|
|
50
|
+
def test_same_keys_list_join_default_left(self):
|
|
51
|
+
"""Default join type is 'left'."""
|
|
52
|
+
df_a = MagicMock(name="df_a")
|
|
53
|
+
df_b = MagicMock(name="df_b")
|
|
54
|
+
df_a.join.return_value = _joined_df()
|
|
55
|
+
|
|
56
|
+
schema = {
|
|
57
|
+
"join": [{"table_from": "a", "table_to": "b", "on_from": "k", "on_to": "k"}]
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
engine = _make_engine()
|
|
61
|
+
engine.process_schema(schema, dataframes_in={"a": df_a, "b": df_b})
|
|
62
|
+
|
|
63
|
+
df_a.join.assert_called_once_with(df_b, on=["k"], how="left")
|
|
64
|
+
|
|
65
|
+
def test_different_keys_uses_expression_join_and_drop(self):
|
|
66
|
+
"""on_from != on_to → expression join, then drop right-side keys. No .columns call."""
|
|
67
|
+
df_a = MagicMock(name="df_a")
|
|
68
|
+
df_b = MagicMock(name="df_b")
|
|
69
|
+
joined = _joined_df()
|
|
70
|
+
df_a.join.return_value = joined
|
|
71
|
+
|
|
72
|
+
schema = {
|
|
73
|
+
"join": [{
|
|
74
|
+
"table_from": "a",
|
|
75
|
+
"table_to": "b",
|
|
76
|
+
"on_from": "order_id",
|
|
77
|
+
"on_to": "id",
|
|
78
|
+
"type": "left",
|
|
79
|
+
}]
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
engine = _make_engine()
|
|
83
|
+
result = engine.process_schema(schema, dataframes_in={"a": df_a, "b": df_b})
|
|
84
|
+
|
|
85
|
+
# join must be called with an expression condition (not a list of strings)
|
|
86
|
+
join_args, join_kwargs = df_a.join.call_args
|
|
87
|
+
assert join_args[0] is df_b
|
|
88
|
+
assert join_args[2] == "left"
|
|
89
|
+
assert "on" not in join_kwargs # expression-based, not list-based
|
|
90
|
+
|
|
91
|
+
# drop must be called on joined df with df_b["id"] reference
|
|
92
|
+
joined.drop.assert_called_once_with(df_b["id"])
|
|
93
|
+
|
|
94
|
+
df_b.columns.assert_not_called() # ← no AnalyzePlan gRPC
|
|
95
|
+
|
|
96
|
+
def test_multi_key_same_names(self):
|
|
97
|
+
"""Multi-column join with same key names uses list-join."""
|
|
98
|
+
df_a = MagicMock(name="df_a")
|
|
99
|
+
df_b = MagicMock(name="df_b")
|
|
100
|
+
df_a.join.return_value = _joined_df()
|
|
101
|
+
|
|
102
|
+
schema = {
|
|
103
|
+
"join": [{
|
|
104
|
+
"table_from": "a",
|
|
105
|
+
"table_to": "b",
|
|
106
|
+
"on_from": ["org", "date"],
|
|
107
|
+
"on_to": ["org", "date"],
|
|
108
|
+
"type": "left",
|
|
109
|
+
}]
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
engine = _make_engine()
|
|
113
|
+
engine.process_schema(schema, dataframes_in={"a": df_a, "b": df_b})
|
|
114
|
+
|
|
115
|
+
df_a.join.assert_called_once_with(df_b, on=["org", "date"], how="left")
|
|
116
|
+
df_b.columns.assert_not_called()
|
|
117
|
+
|
|
118
|
+
def test_multi_key_different_names_drops_all_right_keys(self):
|
|
119
|
+
"""Multi-column join with different key names drops all right-side keys."""
|
|
120
|
+
df_a = MagicMock(name="df_a")
|
|
121
|
+
df_b = MagicMock(name="df_b")
|
|
122
|
+
joined = _joined_df()
|
|
123
|
+
df_a.join.return_value = joined
|
|
124
|
+
|
|
125
|
+
schema = {
|
|
126
|
+
"join": [{
|
|
127
|
+
"table_from": "a",
|
|
128
|
+
"table_to": "b",
|
|
129
|
+
"on_from": ["org_l", "date_l"],
|
|
130
|
+
"on_to": ["org_r", "date_r"],
|
|
131
|
+
"type": "inner",
|
|
132
|
+
}]
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
engine = _make_engine()
|
|
136
|
+
engine.process_schema(schema, dataframes_in={"a": df_a, "b": df_b})
|
|
137
|
+
|
|
138
|
+
# drop must receive both right-side key refs
|
|
139
|
+
joined.drop.assert_called_once_with(df_b["org_r"], df_b["date_r"])
|
|
140
|
+
df_b.columns.assert_not_called()
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tests for RuleRegistry import path compatibility.
|
|
3
|
+
All import paths must resolve to the same singleton class.
|
|
4
|
+
"""
|
|
5
|
+
from satisfactoscript.core.registry import RuleRegistry as RR_core
|
|
6
|
+
from satisfactoscript.registry import RuleRegistry as RR_shim
|
|
7
|
+
from satisfactoscript import RuleRegistry as RR_top
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def test_all_import_paths_resolve_to_same_class():
|
|
11
|
+
"""All three import paths must return the exact same class (same singleton)."""
|
|
12
|
+
assert RR_core is RR_shim
|
|
13
|
+
assert RR_core is RR_top
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_rule_registered_via_shim_is_visible_everywhere():
|
|
17
|
+
"""A rule registered using the shim import must be retrievable from all paths."""
|
|
18
|
+
@RR_shim.register_rule(name="__test_shim_rule__")
|
|
19
|
+
def _my_rule(df):
|
|
20
|
+
return df
|
|
21
|
+
|
|
22
|
+
assert RR_core.get_rule("__test_shim_rule__") is _my_rule
|
|
23
|
+
assert RR_top.get_rule("__test_shim_rule__") is _my_rule
|
|
24
|
+
|
|
25
|
+
# Cleanup
|
|
26
|
+
RR_core._rules.pop("__test_shim_rule__", None)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tests for satisfactoscript.utils.safe_columns.
|
|
3
|
+
"""
|
|
4
|
+
import pytest
|
|
5
|
+
from unittest.mock import MagicMock, PropertyMock
|
|
6
|
+
from satisfactoscript import safe_columns
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TestSafeColumns:
|
|
10
|
+
|
|
11
|
+
def test_returns_df_columns_normally(self):
|
|
12
|
+
df = MagicMock()
|
|
13
|
+
type(df).columns = PropertyMock(return_value=["id", "name", "value"])
|
|
14
|
+
|
|
15
|
+
assert safe_columns(df) == ["id", "name", "value"]
|
|
16
|
+
|
|
17
|
+
def test_returns_empty_list_on_usercontext_grpc_error(self):
|
|
18
|
+
df = MagicMock()
|
|
19
|
+
type(df).columns = PropertyMock(
|
|
20
|
+
side_effect=Exception("gRPC: Missing required field 'UserContext' in the request.")
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
result = safe_columns(df)
|
|
24
|
+
|
|
25
|
+
assert result == []
|
|
26
|
+
|
|
27
|
+
def test_returns_empty_list_on_invalid_argument_grpc_error(self):
|
|
28
|
+
df = MagicMock()
|
|
29
|
+
type(df).columns = PropertyMock(
|
|
30
|
+
side_effect=Exception("StatusCode.INVALID_ARGUMENT details: something")
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
result = safe_columns(df)
|
|
34
|
+
|
|
35
|
+
assert result == []
|
|
36
|
+
|
|
37
|
+
def test_reraises_unrelated_exceptions(self):
|
|
38
|
+
df = MagicMock()
|
|
39
|
+
type(df).columns = PropertyMock(side_effect=RuntimeError("disk full"))
|
|
40
|
+
|
|
41
|
+
with pytest.raises(RuntimeError, match="disk full"):
|
|
42
|
+
safe_columns(df)
|
|
43
|
+
|
|
44
|
+
def test_importable_from_satisfactoscript_top_level(self):
|
|
45
|
+
from satisfactoscript import safe_columns as sc
|
|
46
|
+
assert callable(sc)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/src/satisfactoscript/semantic/__init__.py
RENAMED
|
File without changes
|
{satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/src/satisfactoscript/semantic/semantic.py
RENAMED
|
File without changes
|
|
File without changes
|
{satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/src/satisfactoscript.egg-info/requires.txt
RENAMED
|
File without changes
|
{satisfactoscript-0.5.8 → satisfactoscript-0.5.10}/src/satisfactoscript.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|