PyPI - dvt-core - Versions diffs - 0.52.2__cp310-cp310-macosx_10_9_x86_64.whl - Mend

dvt-core 0.52.2__cp310-cp310-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dvt-core might be problematic. Click here for more details.

Files changed (275) hide show

dbt/__init__.py +7 -0
dbt/_pydantic_shim.py +26 -0
dbt/artifacts/__init__.py +0 -0
dbt/artifacts/exceptions/__init__.py +1 -0
dbt/artifacts/exceptions/schemas.py +31 -0
dbt/artifacts/resources/__init__.py +116 -0
dbt/artifacts/resources/base.py +67 -0
dbt/artifacts/resources/types.py +93 -0
dbt/artifacts/resources/v1/analysis.py +10 -0
dbt/artifacts/resources/v1/catalog.py +23 -0
dbt/artifacts/resources/v1/components.py +274 -0
dbt/artifacts/resources/v1/config.py +277 -0
dbt/artifacts/resources/v1/documentation.py +11 -0
dbt/artifacts/resources/v1/exposure.py +51 -0
dbt/artifacts/resources/v1/function.py +52 -0
dbt/artifacts/resources/v1/generic_test.py +31 -0
dbt/artifacts/resources/v1/group.py +21 -0
dbt/artifacts/resources/v1/hook.py +11 -0
dbt/artifacts/resources/v1/macro.py +29 -0
dbt/artifacts/resources/v1/metric.py +172 -0
dbt/artifacts/resources/v1/model.py +145 -0
dbt/artifacts/resources/v1/owner.py +10 -0
dbt/artifacts/resources/v1/saved_query.py +111 -0
dbt/artifacts/resources/v1/seed.py +41 -0
dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
dbt/artifacts/resources/v1/semantic_model.py +314 -0
dbt/artifacts/resources/v1/singular_test.py +14 -0
dbt/artifacts/resources/v1/snapshot.py +91 -0
dbt/artifacts/resources/v1/source_definition.py +84 -0
dbt/artifacts/resources/v1/sql_operation.py +10 -0
dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
dbt/artifacts/schemas/__init__.py +0 -0
dbt/artifacts/schemas/base.py +191 -0
dbt/artifacts/schemas/batch_results.py +24 -0
dbt/artifacts/schemas/catalog/__init__.py +11 -0
dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
dbt/artifacts/schemas/freshness/__init__.py +1 -0
dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
dbt/artifacts/schemas/manifest/__init__.py +2 -0
dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
dbt/artifacts/schemas/results.py +147 -0
dbt/artifacts/schemas/run/__init__.py +2 -0
dbt/artifacts/schemas/run/v5/__init__.py +0 -0
dbt/artifacts/schemas/run/v5/run.py +184 -0
dbt/artifacts/schemas/upgrades/__init__.py +4 -0
dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
dbt/artifacts/utils/validation.py +153 -0
dbt/cli/__init__.py +1 -0
dbt/cli/context.py +17 -0
dbt/cli/exceptions.py +57 -0
dbt/cli/flags.py +560 -0
dbt/cli/main.py +2039 -0
dbt/cli/option_types.py +121 -0
dbt/cli/options.py +80 -0
dbt/cli/params.py +804 -0
dbt/cli/requires.py +490 -0
dbt/cli/resolvers.py +50 -0
dbt/cli/types.py +40 -0
dbt/clients/__init__.py +0 -0
dbt/clients/checked_load.py +83 -0
dbt/clients/git.py +164 -0
dbt/clients/jinja.py +206 -0
dbt/clients/jinja_static.py +245 -0
dbt/clients/registry.py +192 -0
dbt/clients/yaml_helper.py +68 -0
dbt/compilation.py +876 -0
dbt/compute/__init__.py +14 -0
dbt/compute/engines/__init__.py +12 -0
dbt/compute/engines/spark_engine.py +624 -0
dbt/compute/federated_executor.py +837 -0
dbt/compute/filter_pushdown.cpython-310-darwin.so +0 -0
dbt/compute/filter_pushdown.py +273 -0
dbt/compute/jar_provisioning.cpython-310-darwin.so +0 -0
dbt/compute/jar_provisioning.py +255 -0
dbt/compute/java_compat.cpython-310-darwin.so +0 -0
dbt/compute/java_compat.py +689 -0
dbt/compute/jdbc_utils.cpython-310-darwin.so +0 -0
dbt/compute/jdbc_utils.py +678 -0
dbt/compute/smart_selector.cpython-310-darwin.so +0 -0
dbt/compute/smart_selector.py +311 -0
dbt/compute/strategies/__init__.py +54 -0
dbt/compute/strategies/base.py +165 -0
dbt/compute/strategies/dataproc.py +207 -0
dbt/compute/strategies/emr.py +203 -0
dbt/compute/strategies/local.py +364 -0
dbt/compute/strategies/standalone.py +262 -0
dbt/config/__init__.py +4 -0
dbt/config/catalogs.py +94 -0
dbt/config/compute.cpython-310-darwin.so +0 -0
dbt/config/compute.py +547 -0
dbt/config/dvt_profile.cpython-310-darwin.so +0 -0
dbt/config/dvt_profile.py +342 -0
dbt/config/profile.py +422 -0
dbt/config/project.py +873 -0
dbt/config/project_utils.py +28 -0
dbt/config/renderer.py +231 -0
dbt/config/runtime.py +553 -0
dbt/config/selectors.py +208 -0
dbt/config/utils.py +77 -0
dbt/constants.py +28 -0
dbt/context/__init__.py +0 -0
dbt/context/base.py +745 -0
dbt/context/configured.py +135 -0
dbt/context/context_config.py +382 -0
dbt/context/docs.py +82 -0
dbt/context/exceptions_jinja.py +178 -0
dbt/context/macro_resolver.py +195 -0
dbt/context/macros.py +171 -0
dbt/context/manifest.py +72 -0
dbt/context/providers.py +2249 -0
dbt/context/query_header.py +13 -0
dbt/context/secret.py +58 -0
dbt/context/target.py +74 -0
dbt/contracts/__init__.py +0 -0
dbt/contracts/files.py +413 -0
dbt/contracts/graph/__init__.py +0 -0
dbt/contracts/graph/manifest.py +1904 -0
dbt/contracts/graph/metrics.py +97 -0
dbt/contracts/graph/model_config.py +70 -0
dbt/contracts/graph/node_args.py +42 -0
dbt/contracts/graph/nodes.py +1806 -0
dbt/contracts/graph/semantic_manifest.py +232 -0
dbt/contracts/graph/unparsed.py +811 -0
dbt/contracts/project.py +417 -0
dbt/contracts/results.py +53 -0
dbt/contracts/selection.py +23 -0
dbt/contracts/sql.py +85 -0
dbt/contracts/state.py +68 -0
dbt/contracts/util.py +46 -0
dbt/deprecations.py +346 -0
dbt/deps/__init__.py +0 -0
dbt/deps/base.py +152 -0
dbt/deps/git.py +195 -0
dbt/deps/local.py +79 -0
dbt/deps/registry.py +130 -0
dbt/deps/resolver.py +149 -0
dbt/deps/tarball.py +120 -0
dbt/docs/source/_ext/dbt_click.py +119 -0
dbt/docs/source/conf.py +32 -0
dbt/env_vars.py +64 -0
dbt/event_time/event_time.py +40 -0
dbt/event_time/sample_window.py +60 -0
dbt/events/__init__.py +15 -0
dbt/events/base_types.py +36 -0
dbt/events/core_types_pb2.py +2 -0
dbt/events/logging.py +108 -0
dbt/events/types.py +2516 -0
dbt/exceptions.py +1486 -0
dbt/flags.py +89 -0
dbt/graph/__init__.py +11 -0
dbt/graph/cli.py +247 -0
dbt/graph/graph.py +172 -0
dbt/graph/queue.py +214 -0
dbt/graph/selector.py +374 -0
dbt/graph/selector_methods.py +975 -0
dbt/graph/selector_spec.py +222 -0
dbt/graph/thread_pool.py +18 -0
dbt/hooks.py +21 -0
dbt/include/README.md +49 -0
dbt/include/__init__.py +3 -0
dbt/include/starter_project/.gitignore +4 -0
dbt/include/starter_project/README.md +15 -0
dbt/include/starter_project/__init__.py +3 -0
dbt/include/starter_project/analyses/.gitkeep +0 -0
dbt/include/starter_project/dbt_project.yml +36 -0
dbt/include/starter_project/macros/.gitkeep +0 -0
dbt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
dbt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
dbt/include/starter_project/models/example/schema.yml +21 -0
dbt/include/starter_project/seeds/.gitkeep +0 -0
dbt/include/starter_project/snapshots/.gitkeep +0 -0
dbt/include/starter_project/tests/.gitkeep +0 -0
dbt/internal_deprecations.py +26 -0
dbt/jsonschemas/__init__.py +3 -0
dbt/jsonschemas/jsonschemas.py +309 -0
dbt/jsonschemas/project/0.0.110.json +4717 -0
dbt/jsonschemas/project/0.0.85.json +2015 -0
dbt/jsonschemas/resources/0.0.110.json +2636 -0
dbt/jsonschemas/resources/0.0.85.json +2536 -0
dbt/jsonschemas/resources/latest.json +6773 -0
dbt/links.py +4 -0
dbt/materializations/__init__.py +0 -0
dbt/materializations/incremental/__init__.py +0 -0
dbt/materializations/incremental/microbatch.py +236 -0
dbt/mp_context.py +8 -0
dbt/node_types.py +37 -0
dbt/parser/__init__.py +23 -0
dbt/parser/analysis.py +21 -0
dbt/parser/base.py +548 -0
dbt/parser/common.py +266 -0
dbt/parser/docs.py +52 -0
dbt/parser/fixtures.py +51 -0
dbt/parser/functions.py +30 -0
dbt/parser/generic_test.py +100 -0
dbt/parser/generic_test_builders.py +333 -0
dbt/parser/hooks.py +118 -0
dbt/parser/macros.py +137 -0
dbt/parser/manifest.py +2204 -0
dbt/parser/models.py +573 -0
dbt/parser/partial.py +1178 -0
dbt/parser/read_files.py +445 -0
dbt/parser/schema_generic_tests.py +422 -0
dbt/parser/schema_renderer.py +111 -0
dbt/parser/schema_yaml_readers.py +935 -0
dbt/parser/schemas.py +1466 -0
dbt/parser/search.py +149 -0
dbt/parser/seeds.py +28 -0
dbt/parser/singular_test.py +20 -0
dbt/parser/snapshots.py +44 -0
dbt/parser/sources.py +558 -0
dbt/parser/sql.py +62 -0
dbt/parser/unit_tests.py +621 -0
dbt/plugins/__init__.py +20 -0
dbt/plugins/contracts.py +9 -0
dbt/plugins/exceptions.py +2 -0
dbt/plugins/manager.py +163 -0
dbt/plugins/manifest.py +21 -0
dbt/profiler.py +20 -0
dbt/py.typed +1 -0
dbt/query_analyzer.cpython-310-darwin.so +0 -0
dbt/query_analyzer.py +410 -0
dbt/runners/__init__.py +2 -0
dbt/runners/exposure_runner.py +7 -0
dbt/runners/no_op_runner.py +45 -0
dbt/runners/saved_query_runner.py +7 -0
dbt/selected_resources.py +8 -0
dbt/task/__init__.py +0 -0
dbt/task/base.py +503 -0
dbt/task/build.py +197 -0
dbt/task/clean.py +56 -0
dbt/task/clone.py +161 -0
dbt/task/compile.py +150 -0
dbt/task/compute.py +454 -0
dbt/task/debug.py +505 -0
dbt/task/deps.py +280 -0
dbt/task/docs/__init__.py +3 -0
dbt/task/docs/generate.py +660 -0
dbt/task/docs/index.html +250 -0
dbt/task/docs/serve.py +29 -0
dbt/task/freshness.py +322 -0
dbt/task/function.py +121 -0
dbt/task/group_lookup.py +46 -0
dbt/task/init.py +553 -0
dbt/task/java.py +316 -0
dbt/task/list.py +236 -0
dbt/task/printer.py +175 -0
dbt/task/retry.py +175 -0
dbt/task/run.py +1306 -0
dbt/task/run_operation.py +141 -0
dbt/task/runnable.py +758 -0
dbt/task/seed.py +103 -0
dbt/task/show.py +149 -0
dbt/task/snapshot.py +56 -0
dbt/task/spark.py +414 -0
dbt/task/sql.py +110 -0
dbt/task/target_sync.py +759 -0
dbt/task/test.py +464 -0
dbt/tests/fixtures/__init__.py +1 -0
dbt/tests/fixtures/project.py +620 -0
dbt/tests/util.py +651 -0
dbt/tracking.py +529 -0
dbt/utils/__init__.py +3 -0
dbt/utils/artifact_upload.py +151 -0
dbt/utils/utils.py +408 -0
dbt/version.py +268 -0
dvt_cli/__init__.py +72 -0
dvt_core-0.52.2.dist-info/METADATA +286 -0
dvt_core-0.52.2.dist-info/RECORD +275 -0
dvt_core-0.52.2.dist-info/WHEEL +5 -0
dvt_core-0.52.2.dist-info/entry_points.txt +2 -0
dvt_core-0.52.2.dist-info/top_level.txt +2 -0

dbt/compute/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""
+DVT Compute Layer
+This module provides compute engine integration for federated query execution.
+v0.3.0: Spark-unified architecture - arrow_bridge removed.
+"""
+# Note: arrow_bridge, adapter_to_arrow, and arrow_to_adapter removed in v0.3.0
+# All data loading now uses Spark JDBC
+from typing import List
+__all__: List[str] = []

dbt/compute/engines/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+"""
+DVT Compute Engines
+This module provides ephemeral compute engines for federated query execution.
+Compute engines are used ONLY for processing, never for materialization.
+v0.3.0: Spark-unified architecture - DuckDBEngine removed.
+"""
+from dbt.compute.engines.spark_engine import SparkEngine
+__all__ = ["SparkEngine"]

dbt/compute/engines/spark_engine.py ADDED Viewed

@@ -0,0 +1,624 @@
+"""
+Spark Compute Engine
+Provides Spark integration for large-scale federated query execution.
+Supports multiple connection strategies via strategy pattern:
+- Local: Embedded PySpark (in-process)
+- Standalone: Remote Spark clusters via spark:// URL
+- EMR: AWS EMR clusters via YARN
+- Dataproc: GCP Dataproc clusters
+Key characteristics:
+- Scalable to large datasets
+- Distributed processing
+- Can connect to external Spark clusters
+- No materialization (ephemeral only)
+v0.51.2: Removed Databricks support (serverless cannot read external JDBC sources).
+"""
+from typing import Any, Dict, List, Optional
+from dbt_common.exceptions import DbtRuntimeError
+try:
+    from pyspark.sql import SparkSession, DataFrame
+    PYSPARK_AVAILABLE = True
+except ImportError:
+    PYSPARK_AVAILABLE = False
+    SparkSession = None
+    DataFrame = None
+from dbt.compute.strategies import (
+    BaseConnectionStrategy,
+    LocalStrategy,
+    get_emr_strategy,
+    get_dataproc_strategy,
+    get_standalone_strategy,
+)
+def _clean_spark_error(e: Exception) -> str:
+    """
+    Extract clean error message from Java/Spark exception.
+    DVT v0.5.2: Removes verbose Java stack traces and returns readable error message.
+    :param e: Exception from Spark/Java
+    :returns: Clean error message string
+    """
+    error_str = str(e)
+    # Check for common error patterns and extract meaningful message
+    # Pattern 1: ServiceConfigurationError (Scala version mismatch)
+    if "ServiceConfigurationError" in error_str:
+        if "Unable to get public no-arg constructor" in error_str:
+            # Extract the class name that failed
+            if "DataSourceRegister:" in error_str:
+                class_name = error_str.split("DataSourceRegister:")[-1].split()[0]
+                return f"Spark connector incompatible with current Scala version: {class_name}. Try using JDBC driver directly instead of Spark connector."
+        return "Spark service configuration error - possible Scala version mismatch"
+    # Pattern 2: NoClassDefFoundError
+    if "NoClassDefFoundError:" in error_str:
+        missing_class = error_str.split("NoClassDefFoundError:")[-1].split()[0].strip()
+        return f"Missing Java class: {missing_class}. This usually indicates a Scala version mismatch between Spark and the connector."
+    # Pattern 3: ClassNotFoundException
+    if "ClassNotFoundException:" in error_str:
+        missing_class = error_str.split("ClassNotFoundException:")[-1].split()[0].strip()
+        return f"Class not found: {missing_class}"
+    # Pattern 4: SQLException
+    if "SQLException:" in error_str:
+        sql_error = error_str.split("SQLException:")[-1].split('\n')[0].strip()
+        return f"SQL Error: {sql_error}"
+    # Pattern 5: Snowflake errors
+    if "net.snowflake" in error_str:
+        if "Authentication" in error_str or "auth" in error_str.lower():
+            return "Snowflake authentication failed. Check credentials in profile."
+        if "does not exist" in error_str:
+            return "Snowflake table/schema not found. Check the object path."
+    # Pattern 6: PostgreSQL errors
+    if "PSQLException:" in error_str:
+        lines = error_str.split('\n')
+        for line in lines:
+            if "PSQLException:" in line:
+                return line.split("PSQLException:")[-1].strip()
+    # Default: Return first line only (remove stack trace)
+    first_line = error_str.split('\n')[0]
+    if len(first_line) > 200:
+        first_line = first_line[:200] + "..."
+    return first_line
+class SparkEngine:
+    """
+    Ephemeral Spark compute engine for federated query execution.
+    Uses strategy pattern for flexible connection management:
+    1. Local: Embedded PySpark session (in-process)
+    2. Databricks: Remote Databricks clusters via databricks-connect
+    3. External: Generic external clusters (legacy)
+    """
+    def __init__(
+        self,
+        mode: str = "embedded",
+        spark_config: Optional[Dict[str, str]] = None,
+        app_name: str = "DVT-Compute",
+    ):
+        """
+        Initialize Spark engine.
+        :param mode: 'embedded' for local, 'external' for remote cluster, 'databricks' for Databricks
+        :param spark_config: Spark configuration dict (platform-specific)
+        :param app_name: Spark application name
+        :raises DbtRuntimeError: If PySpark not available or invalid config
+        """
+        if not PYSPARK_AVAILABLE:
+            raise DbtRuntimeError("PySpark is not available. Install it with: pip install pyspark")
+        self.mode = mode
+        self.spark_config = spark_config or {}
+        self.app_name = app_name
+        self.spark: Optional[SparkSession] = None
+        self.registered_tables: Dict[str, str] = {}
+        # Create connection strategy based on mode or config
+        self._connection_strategy = self._create_strategy()
+    def _create_strategy(self) -> BaseConnectionStrategy:
+        """
+        Create connection strategy based on mode or config.
+        v0.51.2: Removed Databricks (serverless cannot read external JDBC sources).
+        Platform detection order:
+        1. Dataproc: project + region + cluster
+        2. EMR: master=yarn (without Dataproc keys)
+        3. Standalone: master=spark://
+        4. Local: default (local[*] or no master)
+        :returns: Connection strategy instance
+        :raises DbtRuntimeError: If platform detection fails
+        """
+        config_keys = set(self.spark_config.keys())
+        # 1. Dataproc: has project, region, and cluster
+        if all(k in config_keys for k in ("project", "region", "cluster")):
+            DataprocStrategy = get_dataproc_strategy()
+            strategy = DataprocStrategy(config=self.spark_config, app_name=self.app_name)
+            strategy.validate_config()
+            return strategy
+        # Check master config for EMR, Standalone, or Local
+        master = self.spark_config.get("master", "")
+        # 3. EMR: master=yarn (YARN resource manager)
+        if master.lower() == "yarn":
+            EMRStrategy = get_emr_strategy()
+            strategy = EMRStrategy(config=self.spark_config, app_name=self.app_name)
+            strategy.validate_config()
+            return strategy
+        # 4. Standalone: master=spark://
+        if master.startswith("spark://"):
+            StandaloneStrategy = get_standalone_strategy()
+            strategy = StandaloneStrategy(config=self.spark_config, app_name=self.app_name)
+            strategy.validate_config()
+            return strategy
+        # 5. Local: local[*], local[N], or no master (default)
+        if master.startswith("local") or not master or self.mode in ("embedded", "local"):
+            strategy = LocalStrategy(config=self.spark_config, app_name=self.app_name)
+            strategy.validate_config()
+            return strategy
+        # Explicit mode overrides
+        if self.mode == "emr":
+            EMRStrategy = get_emr_strategy()
+            strategy = EMRStrategy(config=self.spark_config, app_name=self.app_name)
+            strategy.validate_config()
+            return strategy
+        if self.mode == "dataproc":
+            DataprocStrategy = get_dataproc_strategy()
+            strategy = DataprocStrategy(config=self.spark_config, app_name=self.app_name)
+            strategy.validate_config()
+            return strategy
+        if self.mode in ("standalone", "external"):
+            StandaloneStrategy = get_standalone_strategy()
+            strategy = StandaloneStrategy(config=self.spark_config, app_name=self.app_name)
+            strategy.validate_config()
+            return strategy
+        # Fallback to local
+        strategy = LocalStrategy(config=self.spark_config, app_name=self.app_name)
+        strategy.validate_config()
+        return strategy
+    def __enter__(self):
+        """Context manager entry - initialize Spark session."""
+        self.connect()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit - stop Spark session."""
+        self.close()
+    def connect(self, adapter_types: Optional[set] = None) -> None:
+        """
+        Create Spark session using the configured strategy.
+        v0.5.99: Now accepts adapter_types for JDBC driver provisioning.
+        :param adapter_types: Set of source adapter types that need JDBC drivers
+        """
+        try:
+            self.spark = self._connection_strategy.get_spark_session(adapter_types=adapter_types)
+        except Exception as e:
+            raise DbtRuntimeError(f"Failed to initialize Spark engine: {str(e)}") from e
+    def close(self) -> None:
+        """Stop Spark session and release resources."""
+        if self.spark:
+            try:
+                self._connection_strategy.close(self.spark)
+            except Exception:
+                pass  # Best effort cleanup
+            finally:
+                self.spark = None
+                self.registered_tables.clear()
+    def get_table_info(self, table_name: str) -> Dict[str, Any]:
+        """
+        Get metadata about a registered table.
+        :param table_name: Name of the table
+        :returns: Dictionary with table metadata (columns, row_count, etc.)
+        :raises DbtRuntimeError: If table not found
+        """
+        if not self.spark:
+            raise DbtRuntimeError("Spark engine not connected")
+        if table_name not in self.registered_tables:
+            raise DbtRuntimeError(f"Table '{table_name}' not registered")
+        try:
+            # Get DataFrame for the table
+            df = self.spark.table(table_name)
+            # Get schema
+            columns = []
+            for field in df.schema.fields:
+                columns.append(
+                    {"name": field.name, "type": str(field.dataType), "nullable": field.nullable}
+                )
+            # Get row count
+            row_count = df.count()
+            return {"table_name": table_name, "columns": columns, "row_count": row_count}
+        except Exception as e:
+            raise DbtRuntimeError(f"Failed to get info for table '{table_name}': {str(e)}") from e
+    def list_tables(self) -> List[str]:
+        """
+        List all registered tables.
+        :returns: List of table names
+        """
+        return list(self.registered_tables.keys())
+    def explain_query(self, sql: str) -> str:
+        """
+        Get query execution plan.
+        Useful for debugging and optimization.
+        :param sql: SQL query to explain
+        :returns: Query execution plan as string
+        """
+        if not self.spark:
+            raise DbtRuntimeError("Spark engine not connected")
+        try:
+            df = self.spark.sql(sql)
+            # Get extended explain with cost model and optimizations
+            return df._jdf.queryExecution().toString()
+        except Exception as e:
+            raise DbtRuntimeError(f"Failed to explain query: {str(e)}\nSQL: {sql}") from e
+    def cache_table(self, table_name: str) -> None:
+        """
+        Cache a table in Spark memory for faster subsequent queries.
+        Useful for tables that are accessed multiple times.
+        :param table_name: Name of the table to cache
+        :raises DbtRuntimeError: If table not found or caching fails
+        """
+        if not self.spark:
+            raise DbtRuntimeError("Spark engine not connected")
+        if table_name not in self.registered_tables:
+            raise DbtRuntimeError(f"Table '{table_name}' not registered")
+        try:
+            self.spark.catalog.cacheTable(table_name)
+        except Exception as e:
+            raise DbtRuntimeError(f"Failed to cache table '{table_name}': {str(e)}") from e
+    def uncache_table(self, table_name: str) -> None:
+        """
+        Remove a table from Spark memory cache.
+        :param table_name: Name of the table to uncache
+        """
+        if self.spark and table_name in self.registered_tables:
+            try:
+                self.spark.catalog.uncacheTable(table_name)
+            except Exception:
+                pass  # Best effort
+    def get_platform_info(self) -> Dict[str, Any]:
+        """
+        Get information about the Spark platform and connection.
+        :returns: Dictionary with platform metadata
+        """
+        info = {
+            "platform": self._connection_strategy.get_platform_name(),
+            "mode": self.mode,
+        }
+        # Add strategy-specific info if available
+        if hasattr(self._connection_strategy, "get_cluster_info"):
+            info.update(self._connection_strategy.get_cluster_info())
+        return info
+    def estimate_cost(self, duration_minutes: float) -> float:
+        """
+        Estimate execution cost for the configured platform.
+        :param duration_minutes: Estimated query duration in minutes
+        :returns: Estimated cost in USD
+        """
+        return self._connection_strategy.estimate_cost(duration_minutes)
+    # JDBC Methods (Phase 1: v0.2.0)
+    def supports_jdbc(self, adapter_type: str) -> bool:
+        """
+        Check if the given adapter type is supported for JDBC connectivity.
+        :param adapter_type: Adapter type (e.g., 'postgres', 'mysql', 'snowflake')
+        :returns: True if JDBC is supported for this adapter type
+        Example:
+            >>> engine = SparkEngine()
+            >>> engine.supports_jdbc('postgres')
+            True
+            >>> engine.supports_jdbc('duckdb')
+            False
+        """
+        # Import here to avoid circular dependency
+        from dbt.compute.jdbc_utils import JDBC_DRIVER_MAPPING
+        return adapter_type.lower() in JDBC_DRIVER_MAPPING
+    def read_jdbc(
+        self,
+        url: str,
+        table: str,
+        properties: Dict[str, str],
+        numPartitions: int = 16,
+        partitionColumn: Optional[str] = None,
+        lowerBound: Optional[int] = None,
+        upperBound: Optional[int] = None,
+        predicates: Optional[List[str]] = None,
+    ) -> DataFrame:
+        """
+        Read data from a JDBC source into Spark DataFrame with parallel reads.
+        This method bypasses the DVT node's memory by reading data directly
+        from the source database into Spark workers (distributed memory).
+        :param url: JDBC connection URL (e.g., 'jdbc:postgresql://host:port/db')
+        :param table: Table name or SQL query (wrapped in parentheses)
+        :param properties: JDBC connection properties (user, password, driver)
+        :param numPartitions: Number of partitions for parallel reads (default: 16)
+        :param partitionColumn: Column to use for partitioning (must be numeric)
+        :param lowerBound: Lower bound for partition column
+        :param upperBound: Upper bound for partition column
+        :param predicates: List of WHERE clause predicates for filtering partitions
+        :returns: Spark DataFrame with loaded data
+        :raises DbtRuntimeError: If JDBC read fails
+        Example:
+            >>> url = "jdbc:postgresql://localhost:5432/warehouse"
+            >>> properties = {
+            ...     "user": "analytics",
+            ...     "password": "secret",
+            ...     "driver": "org.postgresql.Driver"
+            ... }
+            >>> df = engine.read_jdbc(
+            ...     url=url,
+            ...     table="public.orders",
+            ...     properties=properties,
+            ...     numPartitions=16,
+            ...     partitionColumn="order_id",
+            ...     lowerBound=1,
+            ...     upperBound=1000000
+            ... )
+            >>> print(f"Loaded {df.count()} rows")
+        Notes:
+            - For partitioned reads, all of (partitionColumn, lowerBound, upperBound)
+              must be provided
+            - Partitioning enables parallel reads across Spark workers
+            - Without partitioning, data is read in a single thread
+        """
+        if not self.spark:
+            raise DbtRuntimeError("Spark engine not connected")
+        try:
+            # Build JDBC read options
+            read_options = {
+                "url": url,
+                "dbtable": table,
+                **properties,  # Merge user, password, driver
+            }
+            # Add partitioning options if provided
+            if partitionColumn and lowerBound is not None and upperBound is not None:
+                read_options.update(
+                    {
+                        "partitionColumn": partitionColumn,
+                        "lowerBound": str(lowerBound),
+                        "upperBound": str(upperBound),
+                        "numPartitions": str(numPartitions),
+                    }
+                )
+            # Add predicates if provided
+            if predicates:
+                # Predicates are used for push-down filtering
+                read_options["predicates"] = predicates
+            # Read via JDBC
+            df = self.spark.read.format("jdbc").options(**read_options).load()
+            return df
+        except Exception as e:
+            # DVT v0.5.2: Clean error message (no Java stack trace)
+            clean_error = _clean_spark_error(e)
+            raise DbtRuntimeError(f"Failed to read from JDBC source '{table}': {clean_error}")
+    def write_jdbc(
+        self,
+        df: DataFrame,
+        url: str,
+        table: str,
+        properties: Dict[str, str],
+        mode: str = "overwrite",
+        batchsize: int = 10000,
+        numPartitions: Optional[int] = None,
+    ) -> None:
+        """
+        Write Spark DataFrame to JDBC target with batch writes.
+        This method writes data directly from Spark workers to the target database,
+        bypassing the DVT node's memory.
+        :param df: Spark DataFrame to write
+        :param url: JDBC connection URL
+        :param table: Target table name (qualified: schema.table)
+        :param properties: JDBC connection properties (user, password, driver)
+        :param mode: Write mode - 'overwrite', 'append', 'error', 'ignore' (default: 'overwrite')
+        :param batchsize: Number of rows to insert per batch (default: 10000)
+        :param numPartitions: Repartition DataFrame before write for parallelism
+        :raises DbtRuntimeError: If JDBC write fails
+        Example:
+            >>> url = "jdbc:postgresql://localhost:5432/warehouse"
+            >>> properties = {
+            ...     "user": "analytics",
+            ...     "password": "secret",
+            ...     "driver": "org.postgresql.Driver"
+            ... }
+            >>> engine.write_jdbc(
+            ...     df=result_df,
+            ...     url=url,
+            ...     table="analytics.aggregated_metrics",
+            ...     properties=properties,
+            ...     mode="overwrite",
+            ...     batchsize=10000
+            ... )
+        Notes:
+            - 'overwrite' mode drops and recreates the table
+            - 'append' mode adds data to existing table
+            - Batch size affects memory usage and write performance
+            - Larger batch sizes are faster but use more memory
+        """
+        if not self.spark:
+            raise DbtRuntimeError("Spark engine not connected")
+        try:
+            # Repartition if requested for better write parallelism
+            write_df = df
+            if numPartitions:
+                write_df = df.repartition(numPartitions)
+            # DVT v0.5.0: Handle DROP CASCADE for table materialization
+            if mode == "overwrite":
+                # Drop existing table with CASCADE before writing
+                # This is essential for declarative workflows (handles dependent views)
+                try:
+                    import jaydebeapi
+                    conn = jaydebeapi.connect(
+                        properties.get("driver"),
+                        url,
+                        [properties.get("user"), properties.get("password")]
+                    )
+                    cursor = conn.cursor()
+                    cursor.execute(f"DROP TABLE IF EXISTS {table} CASCADE")
+                    conn.commit()
+                    cursor.close()
+                    conn.close()
+                except Exception:
+                    # If DROP fails (table doesn't exist), continue
+                    pass
+            # Build JDBC write options
+            write_options = {
+                "url": url,
+                "dbtable": table,
+                "batchsize": str(batchsize),
+                **properties,  # Merge user, password, driver
+            }
+            # Write via JDBC (now with CASCADE handling)
+            write_df.write.format("jdbc").options(**write_options).mode("append" if mode == "overwrite" else mode).save()
+        except Exception as e:
+            # DVT v0.5.0: Extract only the actual error message (remove Java stack trace)
+            error_msg = str(e).split('\n')[0] if '\n' in str(e) else str(e)
+            # Look for PostgreSQL error detail
+            if "PSQLException:" in str(e):
+                lines = str(e).split('\n')
+                for i, line in enumerate(lines):
+                    if "PSQLException:" in line:
+                        error_msg = line.split("PSQLException:")[-1].strip()
+                        # Include Detail and Hint if present
+                        if i+1 < len(lines) and "Detail:" in lines[i+1]:
+                            error_msg += "\n  " + lines[i+1].strip()
+                        if i+2 < len(lines) and "Hint:" in lines[i+2]:
+                            error_msg += "\n  " + lines[i+2].strip()
+                        break
+            raise DbtRuntimeError(f"Failed to write to JDBC target '{table}': {error_msg}")
+    def register_jdbc_table(
+        self,
+        url: str,
+        table: str,
+        properties: Dict[str, str],
+        table_alias: str,
+        numPartitions: int = 16,
+        partitionColumn: Optional[str] = None,
+        lowerBound: Optional[int] = None,
+        upperBound: Optional[int] = None,
+    ) -> None:
+        """
+        Read from JDBC and register as a temporary view in Spark.
+        Convenience method that combines read_jdbc() and temp view registration.
+        :param url: JDBC connection URL
+        :param table: Source table name
+        :param properties: JDBC connection properties
+        :param table_alias: Name to register the table as in Spark
+        :param numPartitions: Number of partitions for parallel reads
+        :param partitionColumn: Column to use for partitioning
+        :param lowerBound: Lower bound for partition column
+        :param upperBound: Upper bound for partition column
+        :raises DbtRuntimeError: If read or registration fails
+        Example:
+            >>> engine.register_jdbc_table(
+            ...     url="jdbc:postgresql://localhost:5432/warehouse",
+            ...     table="public.customers",
+            ...     properties={"user": "...", "password": "...", "driver": "..."},
+            ...     table_alias="customers",
+            ...     numPartitions=8,
+            ...     partitionColumn="customer_id",
+            ...     lowerBound=1,
+            ...     upperBound=500000
+            ... )
+            >>> # Now can query with: engine.execute_query("SELECT * FROM customers")
+        """
+        # Read from JDBC
+        df = self.read_jdbc(
+            url=url,
+            table=table,
+            properties=properties,
+            numPartitions=numPartitions,
+            partitionColumn=partitionColumn,
+            lowerBound=lowerBound,
+            upperBound=upperBound,
+        )
+        # Register as temp view
+        df.createOrReplaceTempView(table_alias)
+        # Track registration
+        self.registered_tables[table_alias] = table_alias