satisfactoscript 1.1.0__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/PKG-INFO +1 -1
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/pyproject.toml +1 -1
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/backends/snowpark.py +6 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/backends/sql_base.py +57 -17
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/core/interpreter.py +4 -1
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/core/ir.py +9 -1
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/core/op_catalog.py +13 -2
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/core/operations.py +108 -45
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/core/rule_executor.py +22 -5
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript.egg-info/PKG-INFO +1 -1
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_backend_snowpark.py +10 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_backend_sql_base.py +57 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_core.py +60 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_core_join.py +57 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_ir.py +23 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_op_catalog.py +9 -1
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_registry_import_paths.py +8 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_rule_executor.py +31 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/README.md +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/setup.cfg +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/__init__.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/agentic/__init__.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/agentic/agent.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/agentic/builder_agent.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/agentic/dictionary_agent.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/agentic/exporter.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/agentic/history.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/agentic/hub.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/agentic/lineage_agent.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/agentic/models.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/agentic/orchestrator.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/agentic/quality_agent.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/agentic/resolver.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/agentic/user_profile.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/backends/__init__.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/backends/bigquery.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/backends/spark.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/cli.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/core/__init__.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/core/backend.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/core/catalog_inspector.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/core/config.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/core/context.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/core/core.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/core/environment.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/core/json_schema.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/core/loaders.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/core/patterns.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/core/registry.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/core/rule_analyzer.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/core/rule_planner.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/core/sandbox.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/core/schema_loader.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/core/writer.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/lineage/__init__.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/lineage/dictionary.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/lineage/renderer.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/lineage/tracker.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/observability/__init__.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/observability/alerts.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/observability/checks.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/observability/contracts.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/observability/history.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/observability/monitor.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/observability/reporter.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/registry.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/semantic/__init__.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/semantic/builder.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/semantic/extractor.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/semantic/glossary.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/semantic/llm_provider.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/semantic/semantic.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/semantic/validator.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/serving/__init__.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/serving/_response_serializer.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/serving/chat_model.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/sinks/__init__.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/sinks/jdbc.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/spark_factory.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/utils.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript.egg-info/SOURCES.txt +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript.egg-info/dependency_links.txt +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript.egg-info/entry_points.txt +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript.egg-info/requires.txt +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript.egg-info/top_level.txt +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_agent.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_backend_bigquery.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_backend_protocol.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_backend_spark.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_builder_agent.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_catalog_inspector.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_cli.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_config.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_core_connect_patch.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_core_env_detection.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_core_username.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_dictionary_agent.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_dummy.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_engine_fake_backend.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_engine_with_backend.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_history.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_hub.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_interpreter.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_json_schema.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_lineage_agent.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_lineage_dictionary.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_lineage_renderer.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_lineage_tracker.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_llm_provider.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_loaders.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_observability.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_orchestrator.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_patterns.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_quality_agent.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_registry.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_resolver.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_rule_analyzer.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_rule_planner.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_sandbox.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_schema_loader.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_semantic_builder.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_semantic_engine_catalog.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_serving_chat_model.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_serving_response_serializer.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_sink_jdbc.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_user_profile.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_utils_logging.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_utils_safe_columns.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_validator.py +0 -0
- {satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/tests/test_writer.py +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "satisfactoscript"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.2.0"
|
|
8
8
|
description = "Declarative data engineering framework — multi-platform (Databricks, Snowflake, BigQuery)."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { text = "MIT" }
|
|
@@ -19,6 +19,9 @@ import logging
|
|
|
19
19
|
from functools import reduce
|
|
20
20
|
from typing import Any
|
|
21
21
|
|
|
22
|
+
from satisfactoscript.backends.sql_base import ANTI_SEMI_JOIN_UNSUPPORTED_MSG
|
|
23
|
+
from satisfactoscript.core.ir import _normalise_join_type
|
|
24
|
+
|
|
22
25
|
logger = logging.getLogger(__name__)
|
|
23
26
|
|
|
24
27
|
|
|
@@ -368,6 +371,9 @@ class SnowparkBackend:
|
|
|
368
371
|
return df.filter(condition)
|
|
369
372
|
|
|
370
373
|
def join(self, left: Any, right: Any, on: Any, how: str = "left") -> Any:
|
|
374
|
+
join_type = _normalise_join_type(how)
|
|
375
|
+
if join_type in {"left_anti", "left_semi"}:
|
|
376
|
+
raise NotImplementedError(ANTI_SEMI_JOIN_UNSUPPORTED_MSG)
|
|
371
377
|
return left.join(right, on=on, how=how)
|
|
372
378
|
|
|
373
379
|
def drop_columns(self, df: Any, columns: list) -> Any:
|
|
@@ -30,6 +30,11 @@ from typing import Any
|
|
|
30
30
|
logger = logging.getLogger(__name__)
|
|
31
31
|
|
|
32
32
|
_alias_counter = itertools.count(1)
|
|
33
|
+
ANTI_SEMI_JOIN_UNSUPPORTED_MSG = (
|
|
34
|
+
"anti/semi join only supported on the Spark DataFrame backend so far "
|
|
35
|
+
"(see Plan 21)"
|
|
36
|
+
)
|
|
37
|
+
_UNSUPPORTED_SQL_JOIN_TYPES = {"LEFT ANTI", "LEFT SEMI"}
|
|
33
38
|
|
|
34
39
|
|
|
35
40
|
def _next_alias() -> str:
|
|
@@ -342,6 +347,8 @@ class SQLQuery:
|
|
|
342
347
|
right_inner = right_q._base_to_sql()
|
|
343
348
|
on_sql = _build_join_on(self._alias, right_q._alias, on_cond)
|
|
344
349
|
jt = _normalise_join_type(how)
|
|
350
|
+
if jt in _UNSUPPORTED_SQL_JOIN_TYPES:
|
|
351
|
+
raise NotImplementedError(ANTI_SEMI_JOIN_UNSUPPORTED_MSG)
|
|
345
352
|
parts.append(f"{jt} JOIN ({right_inner}) AS `{right_q._alias}`")
|
|
346
353
|
parts.append(f" ON {on_sql}")
|
|
347
354
|
else:
|
|
@@ -409,6 +416,10 @@ def _normalise_join_type(how: str) -> str:
|
|
|
409
416
|
how = how.upper().replace("_", " ")
|
|
410
417
|
if how in ("LEFT", "LEFT OUTER"):
|
|
411
418
|
return "LEFT"
|
|
419
|
+
if how in ("ANTI", "LEFT ANTI"):
|
|
420
|
+
return "LEFT ANTI"
|
|
421
|
+
if how in ("SEMI", "LEFT SEMI"):
|
|
422
|
+
return "LEFT SEMI"
|
|
412
423
|
if how in ("RIGHT", "RIGHT OUTER"):
|
|
413
424
|
return "RIGHT"
|
|
414
425
|
if how in ("INNER",):
|
|
@@ -639,14 +650,22 @@ class SQLBackend:
|
|
|
639
650
|
inner_op_name = resolve_filter_operator(raw_op_name) or raw_op_name
|
|
640
651
|
inner_val = parts[1] if len(parts) > 1 else None
|
|
641
652
|
# Evaluate condition against c_expr directly (not via build_filter column lookup)
|
|
642
|
-
if inner_op_name == "is_not_null":
|
|
643
|
-
|
|
644
|
-
if inner_op_name == "
|
|
645
|
-
|
|
646
|
-
if inner_op_name == "
|
|
647
|
-
|
|
648
|
-
if inner_op_name == "
|
|
649
|
-
|
|
653
|
+
if inner_op_name == "is_not_null":
|
|
654
|
+
return SQLCondition(f"{c_expr} IS NOT NULL")
|
|
655
|
+
if inner_op_name == "is_null":
|
|
656
|
+
return SQLCondition(f"{c_expr} IS NULL")
|
|
657
|
+
if inner_op_name == "contains":
|
|
658
|
+
return SQLCondition(f"{c_expr} LIKE '%{_sql_escape(str(inner_val))}%'")
|
|
659
|
+
if inner_op_name == "not_contains":
|
|
660
|
+
return SQLCondition(f"{c_expr} NOT LIKE '%{_sql_escape(str(inner_val))}%'")
|
|
661
|
+
if inner_op_name == "starts_with":
|
|
662
|
+
return SQLCondition(f"{c_expr} LIKE '{_sql_escape(str(inner_val))}%'")
|
|
663
|
+
if inner_op_name == "ends_with":
|
|
664
|
+
return SQLCondition(f"{c_expr} LIKE '%{_sql_escape(str(inner_val))}'")
|
|
665
|
+
if inner_op_name == "like":
|
|
666
|
+
return SQLCondition(f"{c_expr} LIKE '{_sql_escape(str(inner_val))}'")
|
|
667
|
+
if inner_op_name == "not_like":
|
|
668
|
+
return SQLCondition(f"{c_expr} NOT LIKE '{_sql_escape(str(inner_val))}'")
|
|
650
669
|
_comp_sym = {"equals": "=", "not_equals": "!=", "greater_than": ">",
|
|
651
670
|
"less_than": "<", "greater_than_equal": ">=", "less_than_equal": "<="}
|
|
652
671
|
if inner_op_name in _comp_sym:
|
|
@@ -675,6 +694,7 @@ class SQLBackend:
|
|
|
675
694
|
"trim": lambda: SQLColumn(f"TRIM({c_expr})"),
|
|
676
695
|
"round": lambda: SQLColumn(f"ROUND({c_expr}, {op.args[0]})"),
|
|
677
696
|
"abs": lambda: SQLColumn(f"ABS({c_expr})"),
|
|
697
|
+
"ceil": lambda: SQLColumn(f"CEIL({c_expr})"),
|
|
678
698
|
"length": lambda: SQLColumn(f"LENGTH({c_expr})"),
|
|
679
699
|
"to_date": lambda: SQLColumn(f"PARSE_DATE('{op.args[0]}', {c_expr})"),
|
|
680
700
|
"split": lambda: SQLColumn(f"SPLIT({c_expr}, '{op.args[0]}')[ORDINAL({int(op.args[1]) + 1})]"),
|
|
@@ -702,22 +722,42 @@ class SQLBackend:
|
|
|
702
722
|
val = f.value
|
|
703
723
|
c_expr = self._q(col_name)
|
|
704
724
|
|
|
705
|
-
if op == "is_not_null":
|
|
706
|
-
|
|
725
|
+
if op == "is_not_null":
|
|
726
|
+
return SQLCondition(f"{c_expr} IS NOT NULL")
|
|
727
|
+
if op == "is_null":
|
|
728
|
+
return SQLCondition(f"{c_expr} IS NULL")
|
|
707
729
|
if op in ("in", "not_in"):
|
|
708
730
|
lst = val if isinstance(val, list) else [v.strip() for v in str(val).replace(";", ",").split(",")]
|
|
709
731
|
vals_str = ", ".join(_smart_val(v) for v in lst)
|
|
710
732
|
return SQLCondition(f"{c_expr} {'NOT ' if op == 'not_in' else ''}IN ({vals_str})")
|
|
711
|
-
if op == "contains":
|
|
712
|
-
|
|
713
|
-
if op == "
|
|
714
|
-
|
|
715
|
-
if op == "
|
|
716
|
-
|
|
733
|
+
if op == "contains":
|
|
734
|
+
return SQLCondition(f"{c_expr} LIKE '%{_sql_escape(str(val))}%'")
|
|
735
|
+
if op == "not_contains":
|
|
736
|
+
return SQLCondition(f"{c_expr} NOT LIKE '%{_sql_escape(str(val))}%'")
|
|
737
|
+
if op == "starts_with":
|
|
738
|
+
return SQLCondition(f"{c_expr} LIKE '{_sql_escape(str(val))}%'")
|
|
739
|
+
if op == "ends_with":
|
|
740
|
+
return SQLCondition(f"{c_expr} LIKE '%{_sql_escape(str(val))}'")
|
|
741
|
+
if op == "like":
|
|
742
|
+
return SQLCondition(f"{c_expr} LIKE '{_sql_escape(str(val))}'")
|
|
743
|
+
if op == "not_like":
|
|
744
|
+
return SQLCondition(f"{c_expr} NOT LIKE '{_sql_escape(str(val))}'")
|
|
717
745
|
if op == "sql":
|
|
718
746
|
if not allow_raw_sql:
|
|
719
747
|
raise ValueError("[Governance] sql: filter disabled (allow_raw_sql: false).")
|
|
720
748
|
return SQLCondition(val)
|
|
749
|
+
if op == "between":
|
|
750
|
+
parts = [v.strip() for v in str(val).split(",")]
|
|
751
|
+
if len(parts) != 2:
|
|
752
|
+
raise ValueError(f"between filter on column '{col_name}' expects exactly 2 values.")
|
|
753
|
+
lo, hi = parts
|
|
754
|
+
return SQLCondition(f"({c_expr} >= {_smart_val(lo)} AND {c_expr} <= {_smart_val(hi)})")
|
|
755
|
+
if op == "not_between":
|
|
756
|
+
parts = [v.strip() for v in str(val).split(",")]
|
|
757
|
+
if len(parts) != 2:
|
|
758
|
+
raise ValueError(f"not_between filter on column '{col_name}' expects exactly 2 values.")
|
|
759
|
+
lo, hi = parts
|
|
760
|
+
return SQLCondition(f"({c_expr} < {_smart_val(lo)} OR {c_expr} > {_smart_val(hi)})")
|
|
721
761
|
_comp = {"equals": "=", "not_equals": "!=", "greater_than": ">",
|
|
722
762
|
"less_than": "<", "greater_than_equal": ">=", "less_than_equal": "<="}
|
|
723
763
|
if op in _comp:
|
|
@@ -893,4 +933,4 @@ class SQLBackend:
|
|
|
893
933
|
|
|
894
934
|
def optimize_table(self, fqn: str, zorder_cols: list[str] | None = None) -> None:
|
|
895
935
|
"""Default no-op. Override in subclasses for platform-specific optimisation."""
|
|
896
|
-
print(f" [SQLBackend] optimize_table: no-op for {fqn}")
|
|
936
|
+
print(f" [SQLBackend] optimize_table: no-op for {fqn}")
|
|
@@ -27,6 +27,8 @@ if TYPE_CHECKING:
|
|
|
27
27
|
|
|
28
28
|
logger = logging.getLogger(__name__)
|
|
29
29
|
|
|
30
|
+
_LEFT_ONLY_JOIN_TYPES = {"left_anti", "left_semi"}
|
|
31
|
+
|
|
30
32
|
|
|
31
33
|
class SchemaInterpreter:
|
|
32
34
|
"""
|
|
@@ -351,7 +353,8 @@ class SchemaInterpreter:
|
|
|
351
353
|
else:
|
|
352
354
|
cond = reduce(lambda x, y: x & y, [df_main[lk] == df_to[rk] for lk, rk in zip(on_l, on_r)])
|
|
353
355
|
df_main = b.join(df_main, df_to, cond, join_type)
|
|
354
|
-
|
|
356
|
+
if join_type not in _LEFT_ONLY_JOIN_TYPES:
|
|
357
|
+
df_main = b.drop_columns(df_main, [df_to[r] for r in on_r])
|
|
355
358
|
|
|
356
359
|
# 3. BUSINESS RULES
|
|
357
360
|
if "business_rules" in schema_dict:
|
|
@@ -11,7 +11,9 @@ from __future__ import annotations
|
|
|
11
11
|
from dataclasses import dataclass, field
|
|
12
12
|
from typing import Any
|
|
13
13
|
|
|
14
|
-
VALID_JOIN_TYPES: frozenset[str] = frozenset(
|
|
14
|
+
VALID_JOIN_TYPES: frozenset[str] = frozenset(
|
|
15
|
+
{"left", "right", "inner", "full", "cross", "left_anti", "left_semi"}
|
|
16
|
+
)
|
|
15
17
|
|
|
16
18
|
_JOIN_TYPE_ALIASES: dict[str, str] = {
|
|
17
19
|
"left outer": "left",
|
|
@@ -19,6 +21,12 @@ _JOIN_TYPE_ALIASES: dict[str, str] = {
|
|
|
19
21
|
"outer": "full",
|
|
20
22
|
"full outer": "full",
|
|
21
23
|
"full_outer": "full",
|
|
24
|
+
"anti": "left_anti",
|
|
25
|
+
"left anti": "left_anti",
|
|
26
|
+
"leftanti": "left_anti",
|
|
27
|
+
"semi": "left_semi",
|
|
28
|
+
"left semi": "left_semi",
|
|
29
|
+
"leftsemi": "left_semi",
|
|
22
30
|
}
|
|
23
31
|
|
|
24
32
|
|
|
@@ -32,7 +32,7 @@ class OperatorSpec:
|
|
|
32
32
|
"""
|
|
33
33
|
``"none"`` — operator takes no value (``is_null``, ``is_not_null``)
|
|
34
34
|
``"single"`` — operator takes one scalar value
|
|
35
|
-
``"list"`` — operator takes a comma-separated / YAML-list value (``in``, ``not_in``)
|
|
35
|
+
``"list"`` — operator takes a comma-separated / YAML-list value (``in``, ``not_in``, ``between``, ``not_between``)
|
|
36
36
|
``"sql"`` — operator takes a raw SQL expression (``sql``)
|
|
37
37
|
"""
|
|
38
38
|
|
|
@@ -49,7 +49,7 @@ class OpSpec:
|
|
|
49
49
|
|
|
50
50
|
arity: str = "none"
|
|
51
51
|
"""
|
|
52
|
-
``"none"`` — no argument (``upper``, ``lower``, ``trim``, ``abs``, ``length``)
|
|
52
|
+
``"none"`` — no argument (``upper``, ``lower``, ``trim``, ``abs``, ``length``, ``ceil``)
|
|
53
53
|
``"single"`` — one argument after ``:`` (``cast:``, ``lit:``, ``round:``, …)
|
|
54
54
|
``"two"`` — two comma-separated arguments (``split:sep,idx``, ``substring:start,len``)
|
|
55
55
|
``"expression"`` — raw SQL expression (``expr:``)
|
|
@@ -125,6 +125,16 @@ FILTER_OPERATORS: dict[str, OperatorSpec] = {
|
|
|
125
125
|
arity="list",
|
|
126
126
|
description="Passes rows where column value is in the provided list.",
|
|
127
127
|
),
|
|
128
|
+
"between": OperatorSpec(
|
|
129
|
+
canonical="between",
|
|
130
|
+
arity="list",
|
|
131
|
+
description="Passes rows where lo <= column <= hi (inclusive). Two comma-separated values.",
|
|
132
|
+
),
|
|
133
|
+
"not_between": OperatorSpec(
|
|
134
|
+
canonical="not_between",
|
|
135
|
+
arity="list",
|
|
136
|
+
description="Passes rows where column is strictly outside the provided [lo, hi] interval.",
|
|
137
|
+
),
|
|
128
138
|
"not_in": OperatorSpec(
|
|
129
139
|
canonical="not_in",
|
|
130
140
|
arity="list",
|
|
@@ -189,6 +199,7 @@ COLUMN_OPS: dict[str, OpSpec] = {
|
|
|
189
199
|
# ---- numeric ----
|
|
190
200
|
"round": OpSpec("round", arity="single", description="Round to N decimal places."),
|
|
191
201
|
"abs": OpSpec("abs", arity="none", description="Absolute value."),
|
|
202
|
+
"ceil": OpSpec("ceil", arity="none", description="Round up to nearest integer."),
|
|
192
203
|
# ---- date ----
|
|
193
204
|
"to_date": OpSpec("to_date", arity="single", description="Parse string to date using the given format."),
|
|
194
205
|
# ---- null handling ----
|
|
@@ -38,9 +38,12 @@ def _apply_operation(c, op_str, allow_raw_sql=True):
|
|
|
38
38
|
# --- 1. CONDITIONS (WHEN) ---
|
|
39
39
|
if op_str.startswith("when:"):
|
|
40
40
|
cond = op_str.split(":", 1)[1]
|
|
41
|
-
if cond == "is_not_null":
|
|
42
|
-
|
|
43
|
-
if cond
|
|
41
|
+
if cond == "is_not_null":
|
|
42
|
+
return c.isNotNull()
|
|
43
|
+
if cond == "is_null":
|
|
44
|
+
return c.isNull()
|
|
45
|
+
if cond.startswith("like:"):
|
|
46
|
+
return c.like(cond.split(":", 1)[1])
|
|
44
47
|
if cond.startswith("not_like:") or cond.startswith("notlike:"):
|
|
45
48
|
val = cond.split(":", 1)[1]
|
|
46
49
|
return ~c.like(val)
|
|
@@ -85,10 +88,13 @@ def _apply_operation(c, op_str, allow_raw_sql=True):
|
|
|
85
88
|
|
|
86
89
|
# --- 2. ACTIONS ---
|
|
87
90
|
clean_op = op_str
|
|
88
|
-
if op_str.startswith("then:"):
|
|
89
|
-
|
|
91
|
+
if op_str.startswith("then:"):
|
|
92
|
+
clean_op = op_str.split(":", 1)[1]
|
|
93
|
+
if op_str.startswith("else:"):
|
|
94
|
+
clean_op = op_str.split(":", 1)[1]
|
|
90
95
|
|
|
91
|
-
if clean_op.startswith("lit:"):
|
|
96
|
+
if clean_op.startswith("lit:"):
|
|
97
|
+
return smart_lit(clean_op.split(":", 1)[1])
|
|
92
98
|
if clean_op.startswith("expr:"):
|
|
93
99
|
if not allow_raw_sql:
|
|
94
100
|
raise ValueError(
|
|
@@ -96,10 +102,14 @@ def _apply_operation(c, op_str, allow_raw_sql=True):
|
|
|
96
102
|
"(allow_raw_sql: false). Remove the expr: operation or enable allow_raw_sql in config.yaml."
|
|
97
103
|
)
|
|
98
104
|
return F.expr(clean_op.split(":", 1)[1])
|
|
99
|
-
if clean_op.startswith("col:"):
|
|
100
|
-
|
|
101
|
-
if clean_op
|
|
102
|
-
|
|
105
|
+
if clean_op.startswith("col:"):
|
|
106
|
+
return F.col(f"`{clean_op.split(':', 1)[1]}`")
|
|
107
|
+
if clean_op == "col":
|
|
108
|
+
return c
|
|
109
|
+
if clean_op.startswith("coalesce:"):
|
|
110
|
+
return F.coalesce(c, smart_lit(clean_op.split(":", 1)[1]))
|
|
111
|
+
if clean_op.startswith("nvl:"):
|
|
112
|
+
return F.coalesce(c, smart_lit(clean_op.split(":", 1)[1]))
|
|
103
113
|
if clean_op.startswith("cast:"):
|
|
104
114
|
t = clean_op.split(":", 1)[1].lower()
|
|
105
115
|
return c.cast("timestamp") if t in ["datetime", "timestamp"] else c.cast(t)
|
|
@@ -115,11 +125,18 @@ def _apply_operation(c, op_str, allow_raw_sql=True):
|
|
|
115
125
|
if clean_op.startswith("to_date:"):
|
|
116
126
|
fmt = clean_op.split(":", 1)[1]
|
|
117
127
|
return F.to_date(c, fmt)
|
|
118
|
-
if clean_op == "trim":
|
|
119
|
-
|
|
120
|
-
if clean_op == "
|
|
121
|
-
|
|
122
|
-
if clean_op == "
|
|
128
|
+
if clean_op == "trim":
|
|
129
|
+
return F.trim(c)
|
|
130
|
+
if clean_op == "upper":
|
|
131
|
+
return F.upper(c)
|
|
132
|
+
if clean_op == "lower":
|
|
133
|
+
return F.lower(c)
|
|
134
|
+
if clean_op == "abs":
|
|
135
|
+
return F.abs(c)
|
|
136
|
+
if clean_op == "ceil":
|
|
137
|
+
return F.ceil(c)
|
|
138
|
+
if clean_op == "length":
|
|
139
|
+
return F.length(c)
|
|
123
140
|
|
|
124
141
|
op_name = clean_op.split(":", 1)[0]
|
|
125
142
|
from satisfactoscript.core.op_catalog import COLUMN_OPS, suggest as _suggest
|
|
@@ -149,37 +166,78 @@ def _build_filter_expression(filter_list, allow_raw_sql=True):
|
|
|
149
166
|
"""
|
|
150
167
|
from pyspark.sql import functions as F
|
|
151
168
|
|
|
169
|
+
def parse_list_values(col_name, val):
|
|
170
|
+
if isinstance(val, str):
|
|
171
|
+
raw_vals = val.replace(";", ",").split(",")
|
|
172
|
+
val_list = []
|
|
173
|
+
for v in raw_vals:
|
|
174
|
+
stripped = v.strip()
|
|
175
|
+
if stripped != v:
|
|
176
|
+
logger.warning(
|
|
177
|
+
"Filter on '%s': value '%s' has leading/trailing space. "
|
|
178
|
+
"If the value contains a comma, use the dict form instead.",
|
|
179
|
+
col_name,
|
|
180
|
+
v,
|
|
181
|
+
)
|
|
182
|
+
val_list.append(stripped)
|
|
183
|
+
return val_list
|
|
184
|
+
if isinstance(val, (list, tuple)):
|
|
185
|
+
return list(val)
|
|
186
|
+
return val
|
|
187
|
+
|
|
152
188
|
def build_condition(rule):
|
|
153
189
|
col_name, op, val = rule["column"], rule["operator"].lower(), rule.get("value")
|
|
154
190
|
c = F.col(f"`{col_name}`")
|
|
155
191
|
|
|
156
|
-
if op == "is_not_null":
|
|
157
|
-
|
|
192
|
+
if op == "is_not_null":
|
|
193
|
+
return c.isNotNull()
|
|
194
|
+
if op == "is_null":
|
|
195
|
+
return c.isNull()
|
|
158
196
|
|
|
159
197
|
if op in ["in", "not_in"]:
|
|
160
|
-
|
|
161
|
-
raw_vals = val.replace(";", ",").split(",")
|
|
162
|
-
val_list = []
|
|
163
|
-
for v in raw_vals:
|
|
164
|
-
stripped = v.strip()
|
|
165
|
-
if stripped != v:
|
|
166
|
-
logger.warning(
|
|
167
|
-
"Filter on '%s': value '%s' has leading/trailing space. "
|
|
168
|
-
"If the value contains a comma, use the dict form instead.",
|
|
169
|
-
col_name,
|
|
170
|
-
v,
|
|
171
|
-
)
|
|
172
|
-
val_list.append(stripped)
|
|
173
|
-
else:
|
|
174
|
-
val_list = val
|
|
198
|
+
val_list = parse_list_values(col_name, val)
|
|
175
199
|
return c.isin(val_list) if op == "in" else ~c.isin(val_list)
|
|
200
|
+
if op == "between":
|
|
201
|
+
val_list = parse_list_values(col_name, val)
|
|
202
|
+
if not isinstance(val_list, list):
|
|
203
|
+
raise ValueError(
|
|
204
|
+
f"Filter on column '{col_name}': between operator expects exactly 2 values "
|
|
205
|
+
"(lo,hi)."
|
|
206
|
+
)
|
|
207
|
+
if len(val_list) != 2:
|
|
208
|
+
raise ValueError(
|
|
209
|
+
f"Filter on column '{col_name}': between operator expects exactly 2 values "
|
|
210
|
+
f"(lo,hi); got {len(val_list)}."
|
|
211
|
+
)
|
|
212
|
+
lo, hi = val_list
|
|
213
|
+
return (c >= lo) & (c <= hi)
|
|
214
|
+
if op == "not_between":
|
|
215
|
+
val_list = parse_list_values(col_name, val)
|
|
216
|
+
if not isinstance(val_list, list):
|
|
217
|
+
raise ValueError(
|
|
218
|
+
f"Filter on column '{col_name}': not_between operator expects exactly 2 values "
|
|
219
|
+
"(lo,hi)."
|
|
220
|
+
)
|
|
221
|
+
if len(val_list) != 2:
|
|
222
|
+
raise ValueError(
|
|
223
|
+
f"Filter on column '{col_name}': not_between operator expects exactly 2 values "
|
|
224
|
+
f"(lo,hi); got {len(val_list)}."
|
|
225
|
+
)
|
|
226
|
+
lo, hi = val_list
|
|
227
|
+
return (c < lo) | (c > hi)
|
|
176
228
|
|
|
177
|
-
if op == "contains":
|
|
178
|
-
|
|
179
|
-
if op
|
|
180
|
-
|
|
181
|
-
if op == "
|
|
182
|
-
|
|
229
|
+
if op == "contains":
|
|
230
|
+
return c.like(f"%{val}%")
|
|
231
|
+
if op in ["not_contains", "notcontains"]:
|
|
232
|
+
return ~c.like(f"%{val}%")
|
|
233
|
+
if op == "starts_with":
|
|
234
|
+
return c.like(f"{val}%")
|
|
235
|
+
if op == "ends_with":
|
|
236
|
+
return c.like(f"%{val}")
|
|
237
|
+
if op == "like":
|
|
238
|
+
return c.like(val)
|
|
239
|
+
if op in ["not_like", "notlike"]:
|
|
240
|
+
return ~c.like(val)
|
|
183
241
|
|
|
184
242
|
if op == "sql":
|
|
185
243
|
if not allow_raw_sql:
|
|
@@ -199,12 +257,18 @@ def _build_filter_expression(filter_list, allow_raw_sql=True):
|
|
|
199
257
|
}
|
|
200
258
|
if op in op_map:
|
|
201
259
|
sql_op = op_map[op]
|
|
202
|
-
if sql_op == "=":
|
|
203
|
-
|
|
204
|
-
if sql_op == "
|
|
205
|
-
|
|
206
|
-
if sql_op == "
|
|
207
|
-
|
|
260
|
+
if sql_op == "=":
|
|
261
|
+
return c == val
|
|
262
|
+
if sql_op == "!=":
|
|
263
|
+
return c != val
|
|
264
|
+
if sql_op == ">":
|
|
265
|
+
return c > val
|
|
266
|
+
if sql_op == "<":
|
|
267
|
+
return c < val
|
|
268
|
+
if sql_op == ">=":
|
|
269
|
+
return c >= val
|
|
270
|
+
if sql_op == "<=":
|
|
271
|
+
return c <= val
|
|
208
272
|
|
|
209
273
|
from satisfactoscript.core.op_catalog import FILTER_OPERATORS, suggest as _suggest
|
|
210
274
|
hints = _suggest(op, FILTER_OPERATORS)
|
|
@@ -219,4 +283,3 @@ def _build_filter_expression(filter_list, allow_raw_sql=True):
|
|
|
219
283
|
return F.lit(True)
|
|
220
284
|
|
|
221
285
|
return reduce(lambda a, b: a & b, [build_condition(r) for r in filter_list])
|
|
222
|
-
|
{satisfactoscript-1.1.0 → satisfactoscript-1.2.0}/src/satisfactoscript/core/rule_executor.py
RENAMED
|
@@ -69,11 +69,18 @@ class RuleExecutor:
|
|
|
69
69
|
"""
|
|
70
70
|
Fuse all projection rules into a single ``select()`` call.
|
|
71
71
|
|
|
72
|
-
Each rule returns ``dict[str, Column]``.
|
|
72
|
+
Each rule returns ``dict[str, Column]``. Brand-new columns are appended
|
|
73
73
|
to the existing schema in topological order (already sorted by the
|
|
74
74
|
planner). Duplicate column names produced by different rules are
|
|
75
75
|
de-duplicated: the last rule's expression wins (consistent with
|
|
76
76
|
sequential semantics).
|
|
77
|
+
|
|
78
|
+
A rule may also produce a column whose name already exists in the input
|
|
79
|
+
DataFrame (e.g. ``{"x": F.upper(F.col("x"))}``). Such columns *replace*
|
|
80
|
+
the input column in place — keeping their original position — exactly
|
|
81
|
+
like a sequential ``withColumn``. This avoids emitting two homonymous
|
|
82
|
+
columns in the fused ``select()`` (which would raise
|
|
83
|
+
``AMBIGUOUS_REFERENCE``).
|
|
77
84
|
"""
|
|
78
85
|
from pyspark.sql import functions as F
|
|
79
86
|
|
|
@@ -95,10 +102,20 @@ class RuleExecutor:
|
|
|
95
102
|
if not new_cols:
|
|
96
103
|
return df
|
|
97
104
|
|
|
98
|
-
# Build the select expression
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
105
|
+
# Build the select expression. Columns rewritten by a rule replace the
|
|
106
|
+
# input column in place (preserving position); brand-new columns are
|
|
107
|
+
# appended. Keeping a rewritten column out of `existing` avoids two
|
|
108
|
+
# homonymous columns in the select (AMBIGUOUS_REFERENCE).
|
|
109
|
+
select_exprs = [
|
|
110
|
+
new_cols[c].alias(c) if c in new_cols else F.col(c)
|
|
111
|
+
for c in df.columns
|
|
112
|
+
]
|
|
113
|
+
select_exprs += [
|
|
114
|
+
col_expr.alias(col_name)
|
|
115
|
+
for col_name, col_expr in new_cols.items()
|
|
116
|
+
if col_name not in df.columns
|
|
117
|
+
]
|
|
118
|
+
return df.select(*select_exprs)
|
|
102
119
|
|
|
103
120
|
def _execute_aggregation_stage(self, df: "DataFrame", stage: "RuleStage") -> "DataFrame":
|
|
104
121
|
"""
|
|
@@ -257,6 +257,16 @@ def test_join(backend):
|
|
|
257
257
|
left.join.assert_called_once_with(right, on=on, how="left")
|
|
258
258
|
|
|
259
259
|
|
|
260
|
+
@pytest.mark.parametrize("join_type", ["left_anti", "anti", "left_semi", "semi", "left semi"])
|
|
261
|
+
def test_join_anti_semi_fail_fast(backend, join_type):
|
|
262
|
+
left = MagicMock()
|
|
263
|
+
right = MagicMock()
|
|
264
|
+
on = MagicMock()
|
|
265
|
+
with pytest.raises(NotImplementedError, match="anti/semi join only supported on the Spark DataFrame backend so far"):
|
|
266
|
+
backend.join(left, right, on=on, how=join_type)
|
|
267
|
+
left.join.assert_not_called()
|
|
268
|
+
|
|
269
|
+
|
|
260
270
|
def test_with_column_uses_snake_case(backend):
|
|
261
271
|
"""Snowpark uses with_column() (snake_case), not withColumn()."""
|
|
262
272
|
df = MagicMock()
|
|
@@ -234,6 +234,14 @@ class TestSQLQuery:
|
|
|
234
234
|
assert "FULL OUTER JOIN" in sql
|
|
235
235
|
assert "customer_id" in sql
|
|
236
236
|
|
|
237
|
+
@pytest.mark.parametrize("join_type", ["left_anti", "anti", "left_semi", "semi"])
|
|
238
|
+
def test_join_anti_semi_raise_not_implemented(self, join_type):
|
|
239
|
+
left = SQLQuery("`orders`")
|
|
240
|
+
right = SQLQuery("`customers`")
|
|
241
|
+
left._joins.append((join_type, right, ["customer_id"]))
|
|
242
|
+
with pytest.raises(NotImplementedError, match="anti/semi join only supported on the Spark DataFrame backend so far"):
|
|
243
|
+
left.to_sql()
|
|
244
|
+
|
|
237
245
|
def test_qualify(self):
|
|
238
246
|
q = SQLQuery("`orders`")
|
|
239
247
|
q._qualify = "ROW_NUMBER() OVER (PARTITION BY `id` ORDER BY 1) = 1"
|
|
@@ -413,6 +421,14 @@ class TestSQLBackend:
|
|
|
413
421
|
assert "cust_id" in sql
|
|
414
422
|
assert "LEFT JOIN" in sql
|
|
415
423
|
|
|
424
|
+
@pytest.mark.parametrize("join_type", ["left_anti", "anti", "left_semi", "semi"])
|
|
425
|
+
def test_join_anti_semi_fail_fast_on_backend(self, backend, join_type):
|
|
426
|
+
left = backend.read_table("`orders`")
|
|
427
|
+
right = backend.read_table("`customers`")
|
|
428
|
+
joined = backend.join(left, right, on=["customer_id"], how=join_type)
|
|
429
|
+
with pytest.raises(NotImplementedError, match="anti/semi join only supported on the Spark DataFrame backend so far"):
|
|
430
|
+
joined.to_sql()
|
|
431
|
+
|
|
416
432
|
def test_select_sets_explicit_columns(self, backend):
|
|
417
433
|
df = backend.read_table("`orders`")
|
|
418
434
|
cols = [backend.col("amount").alias("total")]
|
|
@@ -565,6 +581,11 @@ class TestSQLBackend:
|
|
|
565
581
|
result = backend.apply_operation(c, "abs")
|
|
566
582
|
assert "ABS(`amount`)" == result._expr
|
|
567
583
|
|
|
584
|
+
def test_apply_operation_ceil(self, backend):
|
|
585
|
+
c = SQLColumn("`amount`")
|
|
586
|
+
result = backend.apply_operation(c, "ceil")
|
|
587
|
+
assert "CEIL(`amount`)" == result._expr
|
|
588
|
+
|
|
568
589
|
def test_apply_operation_round(self, backend):
|
|
569
590
|
c = SQLColumn("`amount`")
|
|
570
591
|
result = backend.apply_operation(c, "round:2")
|
|
@@ -667,6 +688,42 @@ class TestSQLBackend:
|
|
|
667
688
|
])
|
|
668
689
|
assert "`status` IN ('ACTIVE', 'PENDING')" in result._sql
|
|
669
690
|
|
|
691
|
+
def test_build_filter_between(self, backend):
|
|
692
|
+
result = backend.build_filter_expression([
|
|
693
|
+
{"column": "amount", "operator": "between", "value": "10, 20"}
|
|
694
|
+
])
|
|
695
|
+
assert "(`amount` >= 10 AND `amount` <= 20)" in result._sql
|
|
696
|
+
|
|
697
|
+
def test_build_filter_between_escapes_strings(self, backend):
|
|
698
|
+
result = backend.build_filter_expression([
|
|
699
|
+
{"column": "name", "operator": "between", "value": "O'Brien, Zed"}
|
|
700
|
+
])
|
|
701
|
+
assert "(`name` >= 'O\\'Brien' AND `name` <= 'Zed')" in result._sql
|
|
702
|
+
|
|
703
|
+
def test_build_filter_between_requires_two_values(self, backend):
|
|
704
|
+
with pytest.raises(ValueError, match="amount"):
|
|
705
|
+
backend.build_filter_expression([
|
|
706
|
+
{"column": "amount", "operator": "between", "value": "10,20,30"}
|
|
707
|
+
])
|
|
708
|
+
|
|
709
|
+
def test_build_filter_not_between(self, backend):
|
|
710
|
+
result = backend.build_filter_expression([
|
|
711
|
+
{"column": "amount", "operator": "not_between", "value": "10, 20"}
|
|
712
|
+
])
|
|
713
|
+
assert "(`amount` < 10 OR `amount` > 20)" in result._sql
|
|
714
|
+
|
|
715
|
+
def test_build_filter_not_between_escapes_strings(self, backend):
|
|
716
|
+
result = backend.build_filter_expression([
|
|
717
|
+
{"column": "name", "operator": "not_between", "value": "O'Brien, Zed"}
|
|
718
|
+
])
|
|
719
|
+
assert "(`name` < 'O\\'Brien' OR `name` > 'Zed')" in result._sql
|
|
720
|
+
|
|
721
|
+
def test_build_filter_not_between_requires_two_values(self, backend):
|
|
722
|
+
with pytest.raises(ValueError, match="amount"):
|
|
723
|
+
backend.build_filter_expression([
|
|
724
|
+
{"column": "amount", "operator": "not_between", "value": "10,20,30"}
|
|
725
|
+
])
|
|
726
|
+
|
|
670
727
|
def test_build_filter_not_in(self, backend):
|
|
671
728
|
result = backend.build_filter_expression([
|
|
672
729
|
{"column": "status", "operator": "not_in", "value": "DELETED,ARCHIVED"}
|