dvt-core 0.58.6__cp311-cp311-macosx_10_9_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbt/__init__.py +7 -0
- dbt/_pydantic_shim.py +26 -0
- dbt/artifacts/__init__.py +0 -0
- dbt/artifacts/exceptions/__init__.py +1 -0
- dbt/artifacts/exceptions/schemas.py +31 -0
- dbt/artifacts/resources/__init__.py +116 -0
- dbt/artifacts/resources/base.py +67 -0
- dbt/artifacts/resources/types.py +93 -0
- dbt/artifacts/resources/v1/analysis.py +10 -0
- dbt/artifacts/resources/v1/catalog.py +23 -0
- dbt/artifacts/resources/v1/components.py +274 -0
- dbt/artifacts/resources/v1/config.py +277 -0
- dbt/artifacts/resources/v1/documentation.py +11 -0
- dbt/artifacts/resources/v1/exposure.py +51 -0
- dbt/artifacts/resources/v1/function.py +52 -0
- dbt/artifacts/resources/v1/generic_test.py +31 -0
- dbt/artifacts/resources/v1/group.py +21 -0
- dbt/artifacts/resources/v1/hook.py +11 -0
- dbt/artifacts/resources/v1/macro.py +29 -0
- dbt/artifacts/resources/v1/metric.py +172 -0
- dbt/artifacts/resources/v1/model.py +145 -0
- dbt/artifacts/resources/v1/owner.py +10 -0
- dbt/artifacts/resources/v1/saved_query.py +111 -0
- dbt/artifacts/resources/v1/seed.py +41 -0
- dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
- dbt/artifacts/resources/v1/semantic_model.py +314 -0
- dbt/artifacts/resources/v1/singular_test.py +14 -0
- dbt/artifacts/resources/v1/snapshot.py +91 -0
- dbt/artifacts/resources/v1/source_definition.py +84 -0
- dbt/artifacts/resources/v1/sql_operation.py +10 -0
- dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
- dbt/artifacts/schemas/__init__.py +0 -0
- dbt/artifacts/schemas/base.py +191 -0
- dbt/artifacts/schemas/batch_results.py +24 -0
- dbt/artifacts/schemas/catalog/__init__.py +11 -0
- dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
- dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
- dbt/artifacts/schemas/freshness/__init__.py +1 -0
- dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
- dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
- dbt/artifacts/schemas/manifest/__init__.py +2 -0
- dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
- dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
- dbt/artifacts/schemas/results.py +147 -0
- dbt/artifacts/schemas/run/__init__.py +2 -0
- dbt/artifacts/schemas/run/v5/__init__.py +0 -0
- dbt/artifacts/schemas/run/v5/run.py +184 -0
- dbt/artifacts/schemas/upgrades/__init__.py +4 -0
- dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
- dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
- dbt/artifacts/utils/validation.py +153 -0
- dbt/cli/__init__.py +1 -0
- dbt/cli/context.py +17 -0
- dbt/cli/exceptions.py +57 -0
- dbt/cli/flags.py +560 -0
- dbt/cli/main.py +2403 -0
- dbt/cli/option_types.py +121 -0
- dbt/cli/options.py +80 -0
- dbt/cli/params.py +844 -0
- dbt/cli/requires.py +490 -0
- dbt/cli/resolvers.py +50 -0
- dbt/cli/types.py +40 -0
- dbt/clients/__init__.py +0 -0
- dbt/clients/checked_load.py +83 -0
- dbt/clients/git.py +164 -0
- dbt/clients/jinja.py +206 -0
- dbt/clients/jinja_static.py +245 -0
- dbt/clients/registry.py +192 -0
- dbt/clients/yaml_helper.py +68 -0
- dbt/compilation.py +876 -0
- dbt/compute/__init__.py +14 -0
- dbt/compute/engines/__init__.py +12 -0
- dbt/compute/engines/spark_engine.cpython-311-darwin.so +0 -0
- dbt/compute/engines/spark_engine.py +642 -0
- dbt/compute/federated_executor.cpython-311-darwin.so +0 -0
- dbt/compute/federated_executor.py +1080 -0
- dbt/compute/filter_pushdown.cpython-311-darwin.so +0 -0
- dbt/compute/filter_pushdown.py +273 -0
- dbt/compute/jar_provisioning.cpython-311-darwin.so +0 -0
- dbt/compute/jar_provisioning.py +255 -0
- dbt/compute/java_compat.cpython-311-darwin.so +0 -0
- dbt/compute/java_compat.py +689 -0
- dbt/compute/jdbc_utils.cpython-311-darwin.so +0 -0
- dbt/compute/jdbc_utils.py +678 -0
- dbt/compute/metadata/__init__.py +40 -0
- dbt/compute/metadata/adapters_registry.cpython-311-darwin.so +0 -0
- dbt/compute/metadata/adapters_registry.py +370 -0
- dbt/compute/metadata/registry.cpython-311-darwin.so +0 -0
- dbt/compute/metadata/registry.py +674 -0
- dbt/compute/metadata/store.cpython-311-darwin.so +0 -0
- dbt/compute/metadata/store.py +1499 -0
- dbt/compute/smart_selector.cpython-311-darwin.so +0 -0
- dbt/compute/smart_selector.py +377 -0
- dbt/compute/strategies/__init__.py +55 -0
- dbt/compute/strategies/base.cpython-311-darwin.so +0 -0
- dbt/compute/strategies/base.py +165 -0
- dbt/compute/strategies/dataproc.cpython-311-darwin.so +0 -0
- dbt/compute/strategies/dataproc.py +207 -0
- dbt/compute/strategies/emr.cpython-311-darwin.so +0 -0
- dbt/compute/strategies/emr.py +203 -0
- dbt/compute/strategies/local.cpython-311-darwin.so +0 -0
- dbt/compute/strategies/local.py +443 -0
- dbt/compute/strategies/standalone.cpython-311-darwin.so +0 -0
- dbt/compute/strategies/standalone.py +262 -0
- dbt/config/__init__.py +4 -0
- dbt/config/catalogs.py +94 -0
- dbt/config/compute.cpython-311-darwin.so +0 -0
- dbt/config/compute.py +513 -0
- dbt/config/dvt_profile.cpython-311-darwin.so +0 -0
- dbt/config/dvt_profile.py +342 -0
- dbt/config/profile.py +422 -0
- dbt/config/project.py +873 -0
- dbt/config/project_utils.py +28 -0
- dbt/config/renderer.py +231 -0
- dbt/config/runtime.py +553 -0
- dbt/config/selectors.py +208 -0
- dbt/config/utils.py +77 -0
- dbt/constants.py +28 -0
- dbt/context/__init__.py +0 -0
- dbt/context/base.py +745 -0
- dbt/context/configured.py +135 -0
- dbt/context/context_config.py +382 -0
- dbt/context/docs.py +82 -0
- dbt/context/exceptions_jinja.py +178 -0
- dbt/context/macro_resolver.py +195 -0
- dbt/context/macros.py +171 -0
- dbt/context/manifest.py +72 -0
- dbt/context/providers.py +2249 -0
- dbt/context/query_header.py +13 -0
- dbt/context/secret.py +58 -0
- dbt/context/target.py +74 -0
- dbt/contracts/__init__.py +0 -0
- dbt/contracts/files.py +413 -0
- dbt/contracts/graph/__init__.py +0 -0
- dbt/contracts/graph/manifest.py +1904 -0
- dbt/contracts/graph/metrics.py +97 -0
- dbt/contracts/graph/model_config.py +70 -0
- dbt/contracts/graph/node_args.py +42 -0
- dbt/contracts/graph/nodes.py +1806 -0
- dbt/contracts/graph/semantic_manifest.py +232 -0
- dbt/contracts/graph/unparsed.py +811 -0
- dbt/contracts/project.py +417 -0
- dbt/contracts/results.py +53 -0
- dbt/contracts/selection.py +23 -0
- dbt/contracts/sql.py +85 -0
- dbt/contracts/state.py +68 -0
- dbt/contracts/util.py +46 -0
- dbt/deprecations.py +348 -0
- dbt/deps/__init__.py +0 -0
- dbt/deps/base.py +152 -0
- dbt/deps/git.py +195 -0
- dbt/deps/local.py +79 -0
- dbt/deps/registry.py +130 -0
- dbt/deps/resolver.py +149 -0
- dbt/deps/tarball.py +120 -0
- dbt/docs/source/_ext/dbt_click.py +119 -0
- dbt/docs/source/conf.py +32 -0
- dbt/env_vars.py +64 -0
- dbt/event_time/event_time.py +40 -0
- dbt/event_time/sample_window.py +60 -0
- dbt/events/__init__.py +15 -0
- dbt/events/base_types.py +36 -0
- dbt/events/core_types_pb2.py +2 -0
- dbt/events/logging.py +108 -0
- dbt/events/types.py +2516 -0
- dbt/exceptions.py +1486 -0
- dbt/flags.py +89 -0
- dbt/graph/__init__.py +11 -0
- dbt/graph/cli.py +249 -0
- dbt/graph/graph.py +172 -0
- dbt/graph/queue.py +214 -0
- dbt/graph/selector.py +374 -0
- dbt/graph/selector_methods.py +975 -0
- dbt/graph/selector_spec.py +222 -0
- dbt/graph/thread_pool.py +18 -0
- dbt/hooks.py +21 -0
- dbt/include/README.md +49 -0
- dbt/include/__init__.py +3 -0
- dbt/include/data/adapters_registry.duckdb +0 -0
- dbt/include/data/build_registry.py +242 -0
- dbt/include/data/csv/adapter_queries.csv +33 -0
- dbt/include/data/csv/syntax_rules.csv +9 -0
- dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
- dbt/include/data/csv/type_mappings_databricks.csv +30 -0
- dbt/include/data/csv/type_mappings_mysql.csv +40 -0
- dbt/include/data/csv/type_mappings_oracle.csv +30 -0
- dbt/include/data/csv/type_mappings_postgres.csv +56 -0
- dbt/include/data/csv/type_mappings_redshift.csv +33 -0
- dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
- dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
- dbt/include/starter_project/.gitignore +4 -0
- dbt/include/starter_project/README.md +15 -0
- dbt/include/starter_project/__init__.py +3 -0
- dbt/include/starter_project/analyses/.gitkeep +0 -0
- dbt/include/starter_project/dbt_project.yml +36 -0
- dbt/include/starter_project/macros/.gitkeep +0 -0
- dbt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
- dbt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
- dbt/include/starter_project/models/example/schema.yml +21 -0
- dbt/include/starter_project/seeds/.gitkeep +0 -0
- dbt/include/starter_project/snapshots/.gitkeep +0 -0
- dbt/include/starter_project/tests/.gitkeep +0 -0
- dbt/internal_deprecations.py +26 -0
- dbt/jsonschemas/__init__.py +3 -0
- dbt/jsonschemas/jsonschemas.py +309 -0
- dbt/jsonschemas/project/0.0.110.json +4717 -0
- dbt/jsonschemas/project/0.0.85.json +2015 -0
- dbt/jsonschemas/resources/0.0.110.json +2636 -0
- dbt/jsonschemas/resources/0.0.85.json +2536 -0
- dbt/jsonschemas/resources/latest.json +6773 -0
- dbt/links.py +4 -0
- dbt/materializations/__init__.py +0 -0
- dbt/materializations/incremental/__init__.py +0 -0
- dbt/materializations/incremental/microbatch.py +236 -0
- dbt/mp_context.py +8 -0
- dbt/node_types.py +37 -0
- dbt/parser/__init__.py +23 -0
- dbt/parser/analysis.py +21 -0
- dbt/parser/base.py +548 -0
- dbt/parser/common.py +266 -0
- dbt/parser/docs.py +52 -0
- dbt/parser/fixtures.py +51 -0
- dbt/parser/functions.py +30 -0
- dbt/parser/generic_test.py +100 -0
- dbt/parser/generic_test_builders.py +333 -0
- dbt/parser/hooks.py +118 -0
- dbt/parser/macros.py +137 -0
- dbt/parser/manifest.py +2204 -0
- dbt/parser/models.py +573 -0
- dbt/parser/partial.py +1178 -0
- dbt/parser/read_files.py +445 -0
- dbt/parser/schema_generic_tests.py +422 -0
- dbt/parser/schema_renderer.py +111 -0
- dbt/parser/schema_yaml_readers.py +935 -0
- dbt/parser/schemas.py +1466 -0
- dbt/parser/search.py +149 -0
- dbt/parser/seeds.py +28 -0
- dbt/parser/singular_test.py +20 -0
- dbt/parser/snapshots.py +44 -0
- dbt/parser/sources.py +558 -0
- dbt/parser/sql.py +62 -0
- dbt/parser/unit_tests.py +621 -0
- dbt/plugins/__init__.py +20 -0
- dbt/plugins/contracts.py +9 -0
- dbt/plugins/exceptions.py +2 -0
- dbt/plugins/manager.py +163 -0
- dbt/plugins/manifest.py +21 -0
- dbt/profiler.py +20 -0
- dbt/py.typed +1 -0
- dbt/query_analyzer.cpython-311-darwin.so +0 -0
- dbt/query_analyzer.py +410 -0
- dbt/runners/__init__.py +2 -0
- dbt/runners/exposure_runner.py +7 -0
- dbt/runners/no_op_runner.py +45 -0
- dbt/runners/saved_query_runner.py +7 -0
- dbt/selected_resources.py +8 -0
- dbt/task/__init__.py +0 -0
- dbt/task/base.py +503 -0
- dbt/task/build.py +197 -0
- dbt/task/clean.py +56 -0
- dbt/task/clone.py +161 -0
- dbt/task/compile.py +150 -0
- dbt/task/compute.cpython-311-darwin.so +0 -0
- dbt/task/compute.py +458 -0
- dbt/task/debug.py +505 -0
- dbt/task/deps.py +280 -0
- dbt/task/docs/__init__.py +3 -0
- dbt/task/docs/api/__init__.py +23 -0
- dbt/task/docs/api/catalog.cpython-311-darwin.so +0 -0
- dbt/task/docs/api/catalog.py +204 -0
- dbt/task/docs/api/lineage.cpython-311-darwin.so +0 -0
- dbt/task/docs/api/lineage.py +234 -0
- dbt/task/docs/api/profile.cpython-311-darwin.so +0 -0
- dbt/task/docs/api/profile.py +204 -0
- dbt/task/docs/api/spark.cpython-311-darwin.so +0 -0
- dbt/task/docs/api/spark.py +186 -0
- dbt/task/docs/generate.py +947 -0
- dbt/task/docs/index.html +250 -0
- dbt/task/docs/serve.cpython-311-darwin.so +0 -0
- dbt/task/docs/serve.py +174 -0
- dbt/task/dvt_output.py +362 -0
- dbt/task/dvt_run.py +204 -0
- dbt/task/freshness.py +322 -0
- dbt/task/function.py +121 -0
- dbt/task/group_lookup.py +46 -0
- dbt/task/init.cpython-311-darwin.so +0 -0
- dbt/task/init.py +604 -0
- dbt/task/java.cpython-311-darwin.so +0 -0
- dbt/task/java.py +316 -0
- dbt/task/list.py +236 -0
- dbt/task/metadata.cpython-311-darwin.so +0 -0
- dbt/task/metadata.py +804 -0
- dbt/task/printer.py +175 -0
- dbt/task/profile.cpython-311-darwin.so +0 -0
- dbt/task/profile.py +1307 -0
- dbt/task/profile_serve.py +615 -0
- dbt/task/retract.py +438 -0
- dbt/task/retry.py +175 -0
- dbt/task/run.py +1387 -0
- dbt/task/run_operation.py +141 -0
- dbt/task/runnable.py +758 -0
- dbt/task/seed.py +103 -0
- dbt/task/show.py +149 -0
- dbt/task/snapshot.py +56 -0
- dbt/task/spark.cpython-311-darwin.so +0 -0
- dbt/task/spark.py +414 -0
- dbt/task/sql.py +110 -0
- dbt/task/target_sync.cpython-311-darwin.so +0 -0
- dbt/task/target_sync.py +766 -0
- dbt/task/test.py +464 -0
- dbt/tests/fixtures/__init__.py +1 -0
- dbt/tests/fixtures/project.py +620 -0
- dbt/tests/util.py +651 -0
- dbt/tracking.py +529 -0
- dbt/utils/__init__.py +3 -0
- dbt/utils/artifact_upload.py +151 -0
- dbt/utils/utils.py +408 -0
- dbt/version.py +270 -0
- dvt_cli/__init__.py +72 -0
- dvt_core-0.58.6.dist-info/METADATA +288 -0
- dvt_core-0.58.6.dist-info/RECORD +324 -0
- dvt_core-0.58.6.dist-info/WHEEL +5 -0
- dvt_core-0.58.6.dist-info/entry_points.txt +2 -0
- dvt_core-0.58.6.dist-info/top_level.txt +2 -0
dbt/task/spark.py
ADDED
|
@@ -0,0 +1,414 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Spark Task Module
|
|
3
|
+
|
|
4
|
+
Handles DVT spark management commands:
|
|
5
|
+
- check: Show PySpark version, Java compatibility, and cluster info
|
|
6
|
+
- set-version: Interactive selection to install PySpark version
|
|
7
|
+
- match-cluster: Detect cluster version and suggest compatible PySpark
|
|
8
|
+
|
|
9
|
+
v0.51.3: New module for comprehensive Spark management.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import os
|
|
13
|
+
import subprocess
|
|
14
|
+
import sys
|
|
15
|
+
from typing import Optional, Tuple
|
|
16
|
+
|
|
17
|
+
import click
|
|
18
|
+
|
|
19
|
+
from dbt.compute.java_compat import (
|
|
20
|
+
PYSPARK_JAVA_COMPATIBILITY,
|
|
21
|
+
PYSPARK_VERSIONS,
|
|
22
|
+
check_java_pyspark_compatibility,
|
|
23
|
+
detect_spark_cluster_version,
|
|
24
|
+
get_current_java,
|
|
25
|
+
get_pyspark_info,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class SparkTask:
|
|
30
|
+
"""Task for managing Spark/PySpark installations."""
|
|
31
|
+
|
|
32
|
+
def check(self) -> bool:
|
|
33
|
+
"""
|
|
34
|
+
Check PySpark installation, Java compatibility, and show status.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
bool: True if PySpark is installed and Java is compatible
|
|
38
|
+
"""
|
|
39
|
+
click.echo()
|
|
40
|
+
click.echo(click.style("PySpark Status", fg="cyan", bold=True))
|
|
41
|
+
click.echo("-" * 40)
|
|
42
|
+
|
|
43
|
+
# Get PySpark info
|
|
44
|
+
pyspark = get_pyspark_info()
|
|
45
|
+
if pyspark:
|
|
46
|
+
click.echo(f" Version: {pyspark.version}")
|
|
47
|
+
click.echo(f" Major.Minor: {pyspark.major_minor}")
|
|
48
|
+
click.echo(f" Required Java: {', '.join(str(v) for v in pyspark.java_supported)}")
|
|
49
|
+
click.echo(f" Recommended: Java {pyspark.java_recommended}")
|
|
50
|
+
else:
|
|
51
|
+
click.echo(click.style(" ✗ PySpark not installed!", fg="red"))
|
|
52
|
+
click.echo()
|
|
53
|
+
click.echo(" Install with: pip install pyspark")
|
|
54
|
+
click.echo(" Or run 'dvt spark set-version' to select a version")
|
|
55
|
+
click.echo()
|
|
56
|
+
return False
|
|
57
|
+
|
|
58
|
+
click.echo()
|
|
59
|
+
click.echo(click.style("Java Compatibility", fg="cyan", bold=True))
|
|
60
|
+
click.echo("-" * 40)
|
|
61
|
+
|
|
62
|
+
# Get current Java
|
|
63
|
+
java = get_current_java()
|
|
64
|
+
if java:
|
|
65
|
+
click.echo(f" JAVA_HOME: {java.path}")
|
|
66
|
+
click.echo(f" Version: Java {java.version}")
|
|
67
|
+
click.echo(f" Vendor: {java.vendor}")
|
|
68
|
+
|
|
69
|
+
# Check compatibility
|
|
70
|
+
is_compat, msg = check_java_pyspark_compatibility(java.version, pyspark.major_minor)
|
|
71
|
+
click.echo()
|
|
72
|
+
if is_compat:
|
|
73
|
+
click.echo(click.style(f" ✓ {msg}", fg="green"))
|
|
74
|
+
else:
|
|
75
|
+
click.echo(click.style(f" ✗ {msg}", fg="red"))
|
|
76
|
+
click.echo()
|
|
77
|
+
click.echo(" Run 'dvt java set' to select a compatible Java version")
|
|
78
|
+
else:
|
|
79
|
+
click.echo(click.style(" ✗ Java not found!", fg="red"))
|
|
80
|
+
click.echo()
|
|
81
|
+
supported = ", ".join(str(v) for v in pyspark.java_supported)
|
|
82
|
+
click.echo(f" PySpark {pyspark.version} requires Java {supported}")
|
|
83
|
+
click.echo()
|
|
84
|
+
click.echo(" Run 'dvt java search' to find Java installations")
|
|
85
|
+
click.echo(" Run 'dvt java install' for installation guide")
|
|
86
|
+
click.echo()
|
|
87
|
+
return False
|
|
88
|
+
|
|
89
|
+
click.echo()
|
|
90
|
+
return is_compat if java else False
|
|
91
|
+
|
|
92
|
+
def set_version(self) -> bool:
|
|
93
|
+
"""
|
|
94
|
+
Interactive selection to install a specific PySpark version.
|
|
95
|
+
|
|
96
|
+
Presents available PySpark versions with Java requirements,
|
|
97
|
+
then installs the selected version via pip.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
bool: True if installation successful
|
|
101
|
+
"""
|
|
102
|
+
click.echo()
|
|
103
|
+
click.echo(click.style("Select PySpark Version to Install", fg="cyan", bold=True))
|
|
104
|
+
click.echo("=" * 50)
|
|
105
|
+
click.echo()
|
|
106
|
+
|
|
107
|
+
# Get current Java for compatibility display
|
|
108
|
+
java = get_current_java()
|
|
109
|
+
current_java_version = java.version if java else None
|
|
110
|
+
|
|
111
|
+
# Display available versions
|
|
112
|
+
for i, (version, major_minor, tag) in enumerate(PYSPARK_VERSIONS, 1):
|
|
113
|
+
compat = PYSPARK_JAVA_COMPATIBILITY.get(major_minor, {})
|
|
114
|
+
supported = compat.get("supported", [])
|
|
115
|
+
supported_str = ", ".join(str(v) for v in supported)
|
|
116
|
+
|
|
117
|
+
# Tag display
|
|
118
|
+
if tag == "latest":
|
|
119
|
+
tag_display = click.style(" (latest)", fg="green")
|
|
120
|
+
elif tag == "stable":
|
|
121
|
+
tag_display = click.style(" (stable)", fg="blue")
|
|
122
|
+
else:
|
|
123
|
+
tag_display = ""
|
|
124
|
+
|
|
125
|
+
# Compatibility indicator
|
|
126
|
+
if current_java_version and supported:
|
|
127
|
+
if current_java_version in supported:
|
|
128
|
+
compat_marker = click.style(" ✓", fg="green")
|
|
129
|
+
else:
|
|
130
|
+
compat_marker = click.style(" ✗", fg="red")
|
|
131
|
+
else:
|
|
132
|
+
compat_marker = ""
|
|
133
|
+
|
|
134
|
+
click.echo(f" [{i}] PySpark {version}{tag_display}{compat_marker}")
|
|
135
|
+
click.echo(f" Requires Java: {supported_str}")
|
|
136
|
+
click.echo()
|
|
137
|
+
|
|
138
|
+
click.echo(f" [{len(PYSPARK_VERSIONS) + 1}] Custom version")
|
|
139
|
+
click.echo()
|
|
140
|
+
|
|
141
|
+
# Show current Java info
|
|
142
|
+
if java:
|
|
143
|
+
click.echo(f" Current Java: {java.version} ({java.vendor})")
|
|
144
|
+
click.echo(f" ✓ = compatible with your Java, ✗ = incompatible")
|
|
145
|
+
click.echo()
|
|
146
|
+
|
|
147
|
+
# Get user choice
|
|
148
|
+
while True:
|
|
149
|
+
try:
|
|
150
|
+
choice = click.prompt("Your choice", type=int)
|
|
151
|
+
if 1 <= choice <= len(PYSPARK_VERSIONS) + 1:
|
|
152
|
+
break
|
|
153
|
+
click.echo(click.style(f"Please enter a number between 1 and {len(PYSPARK_VERSIONS) + 1}", fg="yellow"))
|
|
154
|
+
except click.Abort:
|
|
155
|
+
click.echo("\nAborted.")
|
|
156
|
+
return False
|
|
157
|
+
|
|
158
|
+
# Determine version to install
|
|
159
|
+
if choice <= len(PYSPARK_VERSIONS):
|
|
160
|
+
version_to_install, major_minor, _ = PYSPARK_VERSIONS[choice - 1]
|
|
161
|
+
else:
|
|
162
|
+
# Custom version
|
|
163
|
+
version_to_install = click.prompt("Enter PySpark version (e.g., 3.4.1)")
|
|
164
|
+
parts = version_to_install.split(".")
|
|
165
|
+
major_minor = f"{parts[0]}.{parts[1]}" if len(parts) >= 2 else parts[0]
|
|
166
|
+
|
|
167
|
+
# Check Java compatibility before installing
|
|
168
|
+
compat = PYSPARK_JAVA_COMPATIBILITY.get(major_minor, {})
|
|
169
|
+
supported = compat.get("supported", [])
|
|
170
|
+
|
|
171
|
+
if java and supported and java.version not in supported:
|
|
172
|
+
click.echo()
|
|
173
|
+
click.echo(click.style(f"⚠️ Warning: PySpark {major_minor} requires Java {', '.join(str(v) for v in supported)}", fg="yellow"))
|
|
174
|
+
click.echo(f" Your current Java: {java.version}")
|
|
175
|
+
click.echo()
|
|
176
|
+
if not click.confirm("Install anyway? (You'll need to switch Java versions)"):
|
|
177
|
+
return False
|
|
178
|
+
|
|
179
|
+
# Install PySpark
|
|
180
|
+
click.echo()
|
|
181
|
+
click.echo(f"Installing PySpark {version_to_install}...")
|
|
182
|
+
click.echo()
|
|
183
|
+
|
|
184
|
+
try:
|
|
185
|
+
# Use pip to install
|
|
186
|
+
cmd = [sys.executable, "-m", "pip", "install", f"pyspark=={version_to_install}"]
|
|
187
|
+
result = subprocess.run(cmd, capture_output=False, text=True)
|
|
188
|
+
|
|
189
|
+
if result.returncode == 0:
|
|
190
|
+
click.echo()
|
|
191
|
+
click.echo(click.style(f"✓ PySpark {version_to_install} installed successfully", fg="green"))
|
|
192
|
+
|
|
193
|
+
# Post-install compatibility check
|
|
194
|
+
if java and supported and java.version not in supported:
|
|
195
|
+
click.echo()
|
|
196
|
+
click.echo(click.style("⚠️ Java compatibility note:", fg="yellow"))
|
|
197
|
+
click.echo(f" PySpark {major_minor} requires Java {', '.join(str(v) for v in supported)}")
|
|
198
|
+
click.echo(f" Run 'dvt java set' to select a compatible Java version")
|
|
199
|
+
|
|
200
|
+
click.echo()
|
|
201
|
+
return True
|
|
202
|
+
else:
|
|
203
|
+
click.echo()
|
|
204
|
+
click.echo(click.style(f"✗ Failed to install PySpark {version_to_install}", fg="red"))
|
|
205
|
+
click.echo()
|
|
206
|
+
return False
|
|
207
|
+
|
|
208
|
+
except Exception as e:
|
|
209
|
+
click.echo()
|
|
210
|
+
click.echo(click.style(f"✗ Installation error: {str(e)}", fg="red"))
|
|
211
|
+
click.echo()
|
|
212
|
+
return False
|
|
213
|
+
|
|
214
|
+
def match_cluster(self, compute_name: str) -> bool:
|
|
215
|
+
"""
|
|
216
|
+
Detect Spark version from a cluster and suggest compatible PySpark.
|
|
217
|
+
|
|
218
|
+
Reads the compute configuration from computes.yml, connects to the
|
|
219
|
+
cluster, and compares versions with locally installed PySpark.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
compute_name: Name of compute engine in computes.yml
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
bool: True if versions match, False if mismatch or error
|
|
226
|
+
"""
|
|
227
|
+
from dbt.config.compute import ComputeRegistry
|
|
228
|
+
|
|
229
|
+
click.echo()
|
|
230
|
+
click.echo(click.style(f"Checking cluster compatibility: {compute_name}", fg="cyan", bold=True))
|
|
231
|
+
click.echo("=" * 50)
|
|
232
|
+
click.echo()
|
|
233
|
+
|
|
234
|
+
# Load compute configuration
|
|
235
|
+
try:
|
|
236
|
+
registry = ComputeRegistry()
|
|
237
|
+
compute_cluster = registry.get(compute_name)
|
|
238
|
+
if not compute_cluster:
|
|
239
|
+
click.echo(click.style(f"✗ Compute '{compute_name}' not found in computes.yml", fg="red"))
|
|
240
|
+
click.echo()
|
|
241
|
+
click.echo(" Run 'dvt compute list' to see available compute engines")
|
|
242
|
+
click.echo()
|
|
243
|
+
return False
|
|
244
|
+
except Exception as e:
|
|
245
|
+
click.echo(click.style(f"✗ Error loading compute config: {str(e)}", fg="red"))
|
|
246
|
+
click.echo()
|
|
247
|
+
return False
|
|
248
|
+
|
|
249
|
+
# Get cluster info (ComputeCluster has config attribute)
|
|
250
|
+
config = compute_cluster.config if hasattr(compute_cluster, 'config') else {}
|
|
251
|
+
master_url = config.get("master")
|
|
252
|
+
host = config.get("host")
|
|
253
|
+
compute_type = compute_cluster.type if hasattr(compute_cluster, 'type') else 'spark'
|
|
254
|
+
|
|
255
|
+
click.echo(f" Compute: {compute_name}")
|
|
256
|
+
click.echo(f" Type: {compute_type}")
|
|
257
|
+
if master_url:
|
|
258
|
+
click.echo(f" Master URL: {master_url}")
|
|
259
|
+
if host:
|
|
260
|
+
click.echo(f" Host: {host}")
|
|
261
|
+
|
|
262
|
+
# Detect cluster version
|
|
263
|
+
click.echo()
|
|
264
|
+
click.echo("Connecting to cluster...")
|
|
265
|
+
|
|
266
|
+
cluster_version = None
|
|
267
|
+
if master_url and master_url.startswith("spark://"):
|
|
268
|
+
# Standalone cluster
|
|
269
|
+
cluster_version = detect_spark_cluster_version(master_url)
|
|
270
|
+
elif master_url == "local[*]" or (master_url and master_url.startswith("local")):
|
|
271
|
+
# Local mode - just use PySpark version
|
|
272
|
+
pyspark = get_pyspark_info()
|
|
273
|
+
if pyspark:
|
|
274
|
+
cluster_version = pyspark.version
|
|
275
|
+
click.echo(click.style(" (Local mode - using PySpark version)", fg="blue"))
|
|
276
|
+
elif host and "databricks" in host.lower():
|
|
277
|
+
# Databricks - requires databricks-connect
|
|
278
|
+
click.echo(click.style(" Databricks cluster detected", fg="blue"))
|
|
279
|
+
click.echo(" Note: Version detection requires active connection")
|
|
280
|
+
# Try to get version via Databricks Connect if installed
|
|
281
|
+
try:
|
|
282
|
+
from databricks.connect import DatabricksSession
|
|
283
|
+
# We can't actually connect without full config, so just note it
|
|
284
|
+
click.echo(" Run 'dvt compute test {compute_name}' to verify connectivity")
|
|
285
|
+
except ImportError:
|
|
286
|
+
click.echo(" Install databricks-connect for full support")
|
|
287
|
+
|
|
288
|
+
if cluster_version:
|
|
289
|
+
click.echo()
|
|
290
|
+
click.echo(click.style("Cluster Information", fg="cyan", bold=True))
|
|
291
|
+
click.echo("-" * 40)
|
|
292
|
+
click.echo(f" Spark Version: {cluster_version}")
|
|
293
|
+
|
|
294
|
+
# Extract major.minor
|
|
295
|
+
parts = cluster_version.split(".")
|
|
296
|
+
cluster_major_minor = f"{parts[0]}.{parts[1]}" if len(parts) >= 2 else parts[0]
|
|
297
|
+
|
|
298
|
+
# Compare with local PySpark
|
|
299
|
+
pyspark = get_pyspark_info()
|
|
300
|
+
click.echo()
|
|
301
|
+
click.echo(click.style("Version Comparison", fg="cyan", bold=True))
|
|
302
|
+
click.echo("-" * 40)
|
|
303
|
+
|
|
304
|
+
if pyspark:
|
|
305
|
+
click.echo(f" Driver (local): PySpark {pyspark.version}")
|
|
306
|
+
click.echo(f" Cluster: Spark {cluster_version}")
|
|
307
|
+
|
|
308
|
+
if pyspark.major_minor == cluster_major_minor:
|
|
309
|
+
click.echo()
|
|
310
|
+
click.echo(click.style(" ✓ Versions match!", fg="green"))
|
|
311
|
+
click.echo()
|
|
312
|
+
return True
|
|
313
|
+
else:
|
|
314
|
+
click.echo()
|
|
315
|
+
click.echo(click.style(" ✗ VERSION MISMATCH!", fg="red", bold=True))
|
|
316
|
+
click.echo()
|
|
317
|
+
click.echo(f" Driver (local): PySpark {pyspark.major_minor}")
|
|
318
|
+
click.echo(f" Cluster: Spark {cluster_major_minor}")
|
|
319
|
+
click.echo()
|
|
320
|
+
click.echo(click.style("Recommendation:", fg="yellow"))
|
|
321
|
+
click.echo(f" Run 'dvt spark set-version' and select PySpark {cluster_major_minor}.x")
|
|
322
|
+
click.echo()
|
|
323
|
+
|
|
324
|
+
# Check Java requirements for target version
|
|
325
|
+
target_compat = PYSPARK_JAVA_COMPATIBILITY.get(cluster_major_minor)
|
|
326
|
+
if target_compat:
|
|
327
|
+
java = get_current_java()
|
|
328
|
+
supported = target_compat["supported"]
|
|
329
|
+
click.echo(click.style("Java Note:", fg="yellow"))
|
|
330
|
+
click.echo(f" PySpark {cluster_major_minor} requires Java {', '.join(str(v) for v in supported)}")
|
|
331
|
+
if java:
|
|
332
|
+
if java.version in supported:
|
|
333
|
+
click.echo(f" Current Java {java.version} is compatible ✓")
|
|
334
|
+
else:
|
|
335
|
+
click.echo(f" Current Java {java.version} is NOT compatible")
|
|
336
|
+
click.echo(f" Run 'dvt java set' to select a compatible version")
|
|
337
|
+
click.echo()
|
|
338
|
+
|
|
339
|
+
return False
|
|
340
|
+
else:
|
|
341
|
+
click.echo(click.style(" ✗ PySpark not installed locally", fg="red"))
|
|
342
|
+
click.echo()
|
|
343
|
+
click.echo(f" Run 'dvt spark set-version' and select PySpark {cluster_major_minor}.x")
|
|
344
|
+
click.echo()
|
|
345
|
+
return False
|
|
346
|
+
else:
|
|
347
|
+
click.echo()
|
|
348
|
+
click.echo(click.style(" ⚠️ Could not detect cluster version", fg="yellow"))
|
|
349
|
+
click.echo()
|
|
350
|
+
click.echo(" Possible reasons:")
|
|
351
|
+
click.echo(" - Cluster is not running")
|
|
352
|
+
click.echo(" - Network connectivity issues")
|
|
353
|
+
click.echo(" - Firewall blocking connection")
|
|
354
|
+
click.echo()
|
|
355
|
+
click.echo(" Try:")
|
|
356
|
+
click.echo(f" - Start the cluster")
|
|
357
|
+
click.echo(f" - Run 'dvt compute test {compute_name}' to verify connectivity")
|
|
358
|
+
click.echo()
|
|
359
|
+
return False
|
|
360
|
+
|
|
361
|
+
def show_versions(self) -> None:
|
|
362
|
+
"""
|
|
363
|
+
Display PySpark/Java compatibility matrix.
|
|
364
|
+
|
|
365
|
+
Shows all available PySpark versions and their Java requirements.
|
|
366
|
+
"""
|
|
367
|
+
click.echo()
|
|
368
|
+
click.echo(click.style("PySpark/Java Compatibility Matrix", fg="cyan", bold=True))
|
|
369
|
+
click.echo("=" * 60)
|
|
370
|
+
click.echo()
|
|
371
|
+
|
|
372
|
+
# Get current versions
|
|
373
|
+
pyspark = get_pyspark_info()
|
|
374
|
+
java = get_current_java()
|
|
375
|
+
|
|
376
|
+
click.echo("Available PySpark Versions:")
|
|
377
|
+
click.echo()
|
|
378
|
+
|
|
379
|
+
for version, major_minor, tag in PYSPARK_VERSIONS:
|
|
380
|
+
compat = PYSPARK_JAVA_COMPATIBILITY.get(major_minor, {})
|
|
381
|
+
supported = compat.get("supported", [])
|
|
382
|
+
recommended = compat.get("recommended", supported[0] if supported else "?")
|
|
383
|
+
|
|
384
|
+
# Current marker
|
|
385
|
+
current_marker = ""
|
|
386
|
+
if pyspark and pyspark.version == version:
|
|
387
|
+
current_marker = click.style(" * INSTALLED", fg="green")
|
|
388
|
+
|
|
389
|
+
# Tag
|
|
390
|
+
if tag == "latest":
|
|
391
|
+
tag_display = click.style(" (latest)", fg="green")
|
|
392
|
+
elif tag == "stable":
|
|
393
|
+
tag_display = click.style(" (stable)", fg="blue")
|
|
394
|
+
else:
|
|
395
|
+
tag_display = ""
|
|
396
|
+
|
|
397
|
+
click.echo(f" PySpark {version}{tag_display}{current_marker}")
|
|
398
|
+
click.echo(f" Java Required: {', '.join(str(v) for v in supported)}")
|
|
399
|
+
click.echo(f" Java Recommended: {recommended}")
|
|
400
|
+
click.echo()
|
|
401
|
+
|
|
402
|
+
# Show current status
|
|
403
|
+
click.echo("-" * 60)
|
|
404
|
+
click.echo()
|
|
405
|
+
click.echo("Current Environment:")
|
|
406
|
+
if pyspark:
|
|
407
|
+
click.echo(f" PySpark: {pyspark.version}")
|
|
408
|
+
else:
|
|
409
|
+
click.echo(" PySpark: not installed")
|
|
410
|
+
if java:
|
|
411
|
+
click.echo(f" Java: {java.version} ({java.vendor})")
|
|
412
|
+
else:
|
|
413
|
+
click.echo(" Java: not found")
|
|
414
|
+
click.echo()
|
dbt/task/sql.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import traceback
|
|
2
|
+
from abc import abstractmethod
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
from typing import Generic, TypeVar
|
|
5
|
+
|
|
6
|
+
import dbt.exceptions
|
|
7
|
+
import dbt_common.exceptions.base
|
|
8
|
+
from dbt.contracts.graph.manifest import Manifest
|
|
9
|
+
from dbt.contracts.sql import (
|
|
10
|
+
RemoteCompileResult,
|
|
11
|
+
RemoteCompileResultMixin,
|
|
12
|
+
RemoteRunResult,
|
|
13
|
+
ResultTable,
|
|
14
|
+
)
|
|
15
|
+
from dbt.events.types import SQLRunnerException
|
|
16
|
+
from dbt.task.compile import CompileRunner
|
|
17
|
+
from dbt_common.events.functions import fire_event
|
|
18
|
+
|
|
19
|
+
SQLResult = TypeVar("SQLResult", bound=RemoteCompileResultMixin)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class GenericSqlRunner(CompileRunner, Generic[SQLResult]):
|
|
23
|
+
def __init__(self, config, adapter, node, node_index, num_nodes) -> None:
|
|
24
|
+
CompileRunner.__init__(self, config, adapter, node, node_index, num_nodes)
|
|
25
|
+
|
|
26
|
+
def handle_exception(self, e, ctx):
|
|
27
|
+
fire_event(
|
|
28
|
+
SQLRunnerException(
|
|
29
|
+
exc=str(e), exc_info=traceback.format_exc(), node_info=self.node.node_info
|
|
30
|
+
)
|
|
31
|
+
)
|
|
32
|
+
# REVIEW: This code is invalid and will always throw.
|
|
33
|
+
if isinstance(e, dbt.exceptions.Exception):
|
|
34
|
+
if isinstance(e, dbt_common.exceptions.DbtRuntimeError):
|
|
35
|
+
e.add_node(ctx.node)
|
|
36
|
+
return e
|
|
37
|
+
|
|
38
|
+
def before_execute(self) -> None:
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
def after_execute(self, result) -> None:
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
def compile(self, manifest: Manifest):
|
|
45
|
+
return self.compiler.compile_node(self.node, manifest, {}, write=False)
|
|
46
|
+
|
|
47
|
+
@abstractmethod
|
|
48
|
+
def execute(self, compiled_node, manifest) -> SQLResult:
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
@abstractmethod
|
|
52
|
+
def from_run_result(self, result, start_time, timing_info) -> SQLResult:
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
def error_result(self, node, error, start_time, timing_info):
|
|
56
|
+
raise error
|
|
57
|
+
|
|
58
|
+
def ephemeral_result(self, node, start_time, timing_info):
|
|
59
|
+
raise dbt_common.exceptions.base.NotImplementedError(
|
|
60
|
+
"cannot execute ephemeral nodes remotely!"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class SqlCompileRunner(GenericSqlRunner[RemoteCompileResult]):
|
|
65
|
+
def execute(self, compiled_node, manifest) -> RemoteCompileResult:
|
|
66
|
+
return RemoteCompileResult(
|
|
67
|
+
raw_code=compiled_node.raw_code,
|
|
68
|
+
compiled_code=compiled_node.compiled_code,
|
|
69
|
+
node=compiled_node,
|
|
70
|
+
timing=[], # this will get added later
|
|
71
|
+
generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
def from_run_result(self, result, start_time, timing_info) -> RemoteCompileResult:
|
|
75
|
+
return RemoteCompileResult(
|
|
76
|
+
raw_code=result.raw_code,
|
|
77
|
+
compiled_code=result.compiled_code,
|
|
78
|
+
node=result.node,
|
|
79
|
+
timing=timing_info,
|
|
80
|
+
generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class SqlExecuteRunner(GenericSqlRunner[RemoteRunResult]):
|
|
85
|
+
def execute(self, compiled_node, manifest) -> RemoteRunResult:
|
|
86
|
+
_, execute_result = self.adapter.execute(compiled_node.compiled_code, fetch=True)
|
|
87
|
+
|
|
88
|
+
table = ResultTable(
|
|
89
|
+
column_names=list(execute_result.column_names),
|
|
90
|
+
rows=[list(row) for row in execute_result],
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
return RemoteRunResult(
|
|
94
|
+
raw_code=compiled_node.raw_code,
|
|
95
|
+
compiled_code=compiled_node.compiled_code,
|
|
96
|
+
node=compiled_node,
|
|
97
|
+
table=table,
|
|
98
|
+
timing=[],
|
|
99
|
+
generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
def from_run_result(self, result, start_time, timing_info) -> RemoteRunResult:
|
|
103
|
+
return RemoteRunResult(
|
|
104
|
+
raw_code=result.raw_code,
|
|
105
|
+
compiled_code=result.compiled_code,
|
|
106
|
+
node=result.node,
|
|
107
|
+
table=result.table,
|
|
108
|
+
timing=timing_info,
|
|
109
|
+
generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
|
|
110
|
+
)
|
|
Binary file
|