dagster-databricks 0.28.7__tar.gz → 0.28.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dagster_databricks-0.28.7/dagster_databricks.egg-info → dagster_databricks-0.28.9}/PKG-INFO +5 -4
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/README.md +1 -1
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/__init__.py +3 -0
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/components/databricks_asset_bundle/component.py +4 -15
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/components/databricks_asset_bundle/configs.py +19 -0
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/components/databricks_asset_bundle/resource.py +64 -1
- dagster_databricks-0.28.9/dagster_databricks/components/databricks_workspace/__init__.py +0 -0
- dagster_databricks-0.28.9/dagster_databricks/components/databricks_workspace/component.py +207 -0
- dagster_databricks-0.28.9/dagster_databricks/components/databricks_workspace/schema.py +49 -0
- dagster_databricks-0.28.9/dagster_databricks/utils.py +8 -0
- dagster_databricks-0.28.9/dagster_databricks/version.py +1 -0
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9/dagster_databricks.egg-info}/PKG-INFO +5 -4
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks.egg-info/SOURCES.txt +5 -1
- dagster_databricks-0.28.9/dagster_databricks.egg-info/requires.txt +5 -0
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/setup.py +4 -3
- dagster_databricks-0.28.7/dagster_databricks/version.py +0 -1
- dagster_databricks-0.28.7/dagster_databricks.egg-info/requires.txt +0 -4
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/LICENSE +0 -0
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/MANIFEST.in +0 -0
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/_test_utils.py +0 -0
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/components/__init__.py +0 -0
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/components/databricks_asset_bundle/__init__.py +0 -0
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/components/databricks_asset_bundle/scaffolder.py +0 -0
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/configs.py +0 -0
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/databricks.py +0 -0
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/databricks_pyspark_step_launcher.py +0 -0
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/databricks_step_main.py +0 -0
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/ops.py +0 -0
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/pipes.py +0 -0
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/py.typed +0 -0
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/resources.py +0 -0
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/types.py +0 -0
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks.egg-info/dependency_links.txt +0 -0
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks.egg-info/not-zip-safe +0 -0
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks.egg-info/top_level.txt +0 -0
- {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/setup.cfg +0 -0
{dagster_databricks-0.28.7/dagster_databricks.egg-info → dagster_databricks-0.28.9}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dagster-databricks
|
|
3
|
-
Version: 0.28.
|
|
3
|
+
Version: 0.28.9
|
|
4
4
|
Summary: Package for Databricks-specific Dagster framework op and resource components.
|
|
5
5
|
Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-databricks
|
|
6
6
|
Author: Dagster Labs
|
|
@@ -11,9 +11,10 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
11
11
|
Classifier: Operating System :: OS Independent
|
|
12
12
|
Requires-Python: >=3.10,<3.15
|
|
13
13
|
License-File: LICENSE
|
|
14
|
-
Requires-Dist: dagster==1.12.
|
|
15
|
-
Requires-Dist: dagster-pipes==1.12.
|
|
16
|
-
Requires-Dist: dagster-pyspark==0.28.
|
|
14
|
+
Requires-Dist: dagster==1.12.9
|
|
15
|
+
Requires-Dist: dagster-pipes==1.12.9
|
|
16
|
+
Requires-Dist: dagster-pyspark==0.28.9
|
|
17
|
+
Requires-Dist: aiohttp
|
|
17
18
|
Requires-Dist: databricks-sdk<0.61.0,>=0.41
|
|
18
19
|
Dynamic: author
|
|
19
20
|
Dynamic: author-email
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# dagster-databricks
|
|
2
2
|
|
|
3
3
|
The docs for `dagster-databricks` can be found
|
|
4
|
-
[here](https://docs.dagster.io/
|
|
4
|
+
[here](https://docs.dagster.io/integrations/libraries/databricks/dagster-databricks).
|
|
5
5
|
|
|
6
6
|
A guide for integrating Databricks using Dagster Pipes can be found
|
|
7
7
|
[here](https://docs.dagster.io/guides/dagster-pipes/databricks).
|
|
@@ -13,6 +13,9 @@ from dagster_shared.libraries import DagsterLibraryRegistry
|
|
|
13
13
|
from dagster_databricks.components.databricks_asset_bundle.component import (
|
|
14
14
|
DatabricksAssetBundleComponent as DatabricksAssetBundleComponent,
|
|
15
15
|
)
|
|
16
|
+
from dagster_databricks.components.databricks_workspace.component import (
|
|
17
|
+
DatabricksWorkspaceComponent as DatabricksWorkspaceComponent,
|
|
18
|
+
)
|
|
16
19
|
from dagster_databricks.databricks import (
|
|
17
20
|
DatabricksClient as DatabricksClient,
|
|
18
21
|
DatabricksError as DatabricksError,
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import os
|
|
2
|
-
import re
|
|
3
2
|
from collections import defaultdict
|
|
4
3
|
from dataclasses import dataclass, field
|
|
5
4
|
from functools import cached_property
|
|
@@ -35,17 +34,7 @@ from dagster_databricks.components.databricks_asset_bundle.resource import Datab
|
|
|
35
34
|
from dagster_databricks.components.databricks_asset_bundle.scaffolder import (
|
|
36
35
|
DatabricksAssetBundleScaffolder,
|
|
37
36
|
)
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def snake_case(name: str) -> str:
|
|
41
|
-
"""Convert a string to snake_case."""
|
|
42
|
-
# Remove file extension if present
|
|
43
|
-
name = Path(name).stem
|
|
44
|
-
# Replace special characters and spaces with underscores
|
|
45
|
-
name = re.sub(r"[^a-zA-Z0-9]+", "_", name)
|
|
46
|
-
# Convert CamelCase to snake_case
|
|
47
|
-
name = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", name)
|
|
48
|
-
return name.lower().strip("_")
|
|
37
|
+
from dagster_databricks.utils import snake_case
|
|
49
38
|
|
|
50
39
|
|
|
51
40
|
@dataclass
|
|
@@ -251,9 +240,9 @@ class DatabricksAssetBundleComponent(Component, Resolvable):
|
|
|
251
240
|
)
|
|
252
241
|
|
|
253
242
|
def build_defs(self, context: ComponentLoadContext) -> Definitions:
|
|
254
|
-
component_defs_path_as_python_str =
|
|
255
|
-
os.path.relpath(context.component_path.file_path, start=context.project_root)
|
|
256
|
-
)
|
|
243
|
+
component_defs_path_as_python_str = snake_case(
|
|
244
|
+
str(os.path.relpath(context.component_path.file_path, start=context.project_root))
|
|
245
|
+
)
|
|
257
246
|
|
|
258
247
|
databricks_assets = []
|
|
259
248
|
for task_key, asset_specs in self.asset_specs_by_task_key.items():
|
|
@@ -12,6 +12,7 @@ from dagster import (
|
|
|
12
12
|
get_dagster_logger,
|
|
13
13
|
)
|
|
14
14
|
from dagster._annotations import preview
|
|
15
|
+
from dagster._serdes import whitelist_for_serdes
|
|
15
16
|
from dagster_shared.record import IHaveNew, record, record_custom
|
|
16
17
|
from databricks.sdk.service import jobs
|
|
17
18
|
from typing_extensions import Self, TypeVar
|
|
@@ -110,6 +111,7 @@ class DatabricksTaskDependsOnConfig:
|
|
|
110
111
|
outcome: Optional[str]
|
|
111
112
|
|
|
112
113
|
|
|
114
|
+
@whitelist_for_serdes
|
|
113
115
|
@record
|
|
114
116
|
class DatabricksBaseTask(ABC, Generic[T_DatabricksSdkTask]):
|
|
115
117
|
task_key: str
|
|
@@ -143,6 +145,7 @@ class DatabricksBaseTask(ABC, Generic[T_DatabricksSdkTask]):
|
|
|
143
145
|
def to_databricks_sdk_task(self) -> T_DatabricksSdkTask: ...
|
|
144
146
|
|
|
145
147
|
|
|
148
|
+
@whitelist_for_serdes
|
|
146
149
|
@record
|
|
147
150
|
class DatabricksNotebookTask(DatabricksBaseTask[jobs.NotebookTask]):
|
|
148
151
|
@property
|
|
@@ -186,6 +189,7 @@ class DatabricksNotebookTask(DatabricksBaseTask[jobs.NotebookTask]):
|
|
|
186
189
|
)
|
|
187
190
|
|
|
188
191
|
|
|
192
|
+
@whitelist_for_serdes
|
|
189
193
|
@record
|
|
190
194
|
class DatabricksConditionTask(DatabricksBaseTask[jobs.ConditionTask]):
|
|
191
195
|
@property
|
|
@@ -236,6 +240,7 @@ class DatabricksConditionTask(DatabricksBaseTask[jobs.ConditionTask]):
|
|
|
236
240
|
)
|
|
237
241
|
|
|
238
242
|
|
|
243
|
+
@whitelist_for_serdes
|
|
239
244
|
@record
|
|
240
245
|
class DatabricksSparkPythonTask(DatabricksBaseTask[jobs.SparkPythonTask]):
|
|
241
246
|
@property
|
|
@@ -282,6 +287,7 @@ class DatabricksSparkPythonTask(DatabricksBaseTask[jobs.SparkPythonTask]):
|
|
|
282
287
|
)
|
|
283
288
|
|
|
284
289
|
|
|
290
|
+
@whitelist_for_serdes
|
|
285
291
|
@record
|
|
286
292
|
class DatabricksPythonWheelTask(DatabricksBaseTask[jobs.PythonWheelTask]):
|
|
287
293
|
@property
|
|
@@ -331,6 +337,7 @@ class DatabricksPythonWheelTask(DatabricksBaseTask[jobs.PythonWheelTask]):
|
|
|
331
337
|
)
|
|
332
338
|
|
|
333
339
|
|
|
340
|
+
@whitelist_for_serdes
|
|
334
341
|
@record
|
|
335
342
|
class DatabricksSparkJarTask(DatabricksBaseTask[jobs.SparkJarTask]):
|
|
336
343
|
@property
|
|
@@ -376,6 +383,7 @@ class DatabricksSparkJarTask(DatabricksBaseTask[jobs.SparkJarTask]):
|
|
|
376
383
|
)
|
|
377
384
|
|
|
378
385
|
|
|
386
|
+
@whitelist_for_serdes
|
|
379
387
|
@record
|
|
380
388
|
class DatabricksJobTask(DatabricksBaseTask[jobs.RunJobTask]):
|
|
381
389
|
@property
|
|
@@ -420,6 +428,7 @@ class DatabricksJobTask(DatabricksBaseTask[jobs.RunJobTask]):
|
|
|
420
428
|
)
|
|
421
429
|
|
|
422
430
|
|
|
431
|
+
@whitelist_for_serdes
|
|
423
432
|
@record
|
|
424
433
|
class DatabricksUnknownTask(DatabricksBaseTask):
|
|
425
434
|
@property
|
|
@@ -609,3 +618,13 @@ class ResolvedDatabricksExistingClusterConfig(Resolvable, Model):
|
|
|
609
618
|
@preview
|
|
610
619
|
class ResolvedDatabricksServerlessConfig(Resolvable, Model):
|
|
611
620
|
is_serverless: bool = True
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
@whitelist_for_serdes
|
|
624
|
+
@record
|
|
625
|
+
class DatabricksJob:
|
|
626
|
+
"""Represents a Databricks Job structure for serialization."""
|
|
627
|
+
|
|
628
|
+
job_id: int
|
|
629
|
+
name: str
|
|
630
|
+
tasks: Optional[list[DatabricksBaseTask]] = None
|
|
@@ -1,5 +1,12 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
from collections.abc import Iterator, Mapping
|
|
2
|
-
from typing import TYPE_CHECKING, Any, Union
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Optional, Union
|
|
4
|
+
|
|
5
|
+
import aiohttp
|
|
6
|
+
|
|
7
|
+
DATABRICKS_JOBS_API_PATH = "/api/2.1/jobs"
|
|
8
|
+
MAX_CONCURRENT_REQUESTS = 10
|
|
9
|
+
RATE_LIMIT_STATUS_CODE = 429
|
|
3
10
|
|
|
4
11
|
from dagster import (
|
|
5
12
|
AssetExecutionContext,
|
|
@@ -16,6 +23,7 @@ from databricks.sdk.service import compute, jobs
|
|
|
16
23
|
from pydantic import Field
|
|
17
24
|
|
|
18
25
|
from dagster_databricks.components.databricks_asset_bundle.configs import (
|
|
26
|
+
DatabricksJob,
|
|
19
27
|
ResolvedDatabricksExistingClusterConfig,
|
|
20
28
|
ResolvedDatabricksNewClusterConfig,
|
|
21
29
|
ResolvedDatabricksServerlessConfig,
|
|
@@ -44,6 +52,61 @@ class DatabricksWorkspace(ConfigurableResource):
|
|
|
44
52
|
token=self.token,
|
|
45
53
|
)
|
|
46
54
|
|
|
55
|
+
async def fetch_jobs(self, databricks_filter: Any) -> list[DatabricksJob]:
|
|
56
|
+
"""Fetches jobs efficiently using async I/O directly from the resource."""
|
|
57
|
+
headers = {"Authorization": f"Bearer {self.token}"}
|
|
58
|
+
base_url = self.host.rstrip("/")
|
|
59
|
+
|
|
60
|
+
async with aiohttp.ClientSession(headers=headers) as session:
|
|
61
|
+
list_url = f"{base_url}{DATABRICKS_JOBS_API_PATH}list"
|
|
62
|
+
async with session.get(list_url) as resp:
|
|
63
|
+
resp.raise_for_status()
|
|
64
|
+
data = await resp.json()
|
|
65
|
+
all_jobs_lite = data.get("jobs", [])
|
|
66
|
+
|
|
67
|
+
job_ids_to_fetch = []
|
|
68
|
+
for j in all_jobs_lite:
|
|
69
|
+
if databricks_filter and not databricks_filter.include_job(j):
|
|
70
|
+
continue
|
|
71
|
+
job_ids_to_fetch.append(j["job_id"])
|
|
72
|
+
|
|
73
|
+
if not job_ids_to_fetch:
|
|
74
|
+
return []
|
|
75
|
+
|
|
76
|
+
semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS)
|
|
77
|
+
|
|
78
|
+
async def _fetch_single_job(job_id: int) -> Optional[dict]:
|
|
79
|
+
async with semaphore:
|
|
80
|
+
async with aiohttp.ClientSession(headers=headers) as session:
|
|
81
|
+
url = f"{base_url}{DATABRICKS_JOBS_API_PATH}/get?job_id={job_id}"
|
|
82
|
+
async with session.get(url) as resp:
|
|
83
|
+
if resp.status == MAX_CONCURRENT_REQUESTS:
|
|
84
|
+
await asyncio.sleep(1)
|
|
85
|
+
return await _fetch_single_job(job_id)
|
|
86
|
+
|
|
87
|
+
if resp.status != 200:
|
|
88
|
+
resp.raise_for_status()
|
|
89
|
+
|
|
90
|
+
return await resp.json()
|
|
91
|
+
|
|
92
|
+
tasks_coroutines = [_fetch_single_job(jid) for jid in job_ids_to_fetch]
|
|
93
|
+
raw_jobs = await asyncio.gather(*tasks_coroutines)
|
|
94
|
+
|
|
95
|
+
final_jobs = []
|
|
96
|
+
for rj in raw_jobs:
|
|
97
|
+
if not rj:
|
|
98
|
+
continue
|
|
99
|
+
|
|
100
|
+
settings = rj.get("settings", {})
|
|
101
|
+
job = DatabricksJob(
|
|
102
|
+
job_id=rj["job_id"],
|
|
103
|
+
name=settings.get("name", "Unnamed Job"),
|
|
104
|
+
tasks=settings.get("tasks", []),
|
|
105
|
+
)
|
|
106
|
+
final_jobs.append(job)
|
|
107
|
+
|
|
108
|
+
return final_jobs
|
|
109
|
+
|
|
47
110
|
def submit_and_poll(
|
|
48
111
|
self, component: "DatabricksAssetBundleComponent", context: AssetExecutionContext
|
|
49
112
|
) -> Iterator[Union[AssetMaterialization, MaterializeResult]]:
|
|
File without changes
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Annotated, Any, Optional
|
|
4
|
+
|
|
5
|
+
from dagster import (
|
|
6
|
+
AssetExecutionContext,
|
|
7
|
+
AssetKey,
|
|
8
|
+
AssetsDefinition,
|
|
9
|
+
AssetSpec,
|
|
10
|
+
Definitions,
|
|
11
|
+
MaterializeResult,
|
|
12
|
+
MetadataValue,
|
|
13
|
+
Resolvable,
|
|
14
|
+
ResolvedAssetSpec,
|
|
15
|
+
multi_asset,
|
|
16
|
+
)
|
|
17
|
+
from dagster._serdes import whitelist_for_serdes
|
|
18
|
+
from dagster._symbol_annotations.lifecycle import preview
|
|
19
|
+
from dagster.components import Resolver
|
|
20
|
+
from dagster.components.component.state_backed_component import StateBackedComponent
|
|
21
|
+
from dagster.components.utils.defs_state import (
|
|
22
|
+
DefsStateConfig,
|
|
23
|
+
DefsStateConfigArgs,
|
|
24
|
+
ResolvedDefsStateConfig,
|
|
25
|
+
)
|
|
26
|
+
from dagster_shared.record import record
|
|
27
|
+
from dagster_shared.serdes.serdes import deserialize_value, serialize_value
|
|
28
|
+
from databricks.sdk.service.jobs import RunResultState
|
|
29
|
+
|
|
30
|
+
from dagster_databricks.components.databricks_asset_bundle.component import (
|
|
31
|
+
DatabricksWorkspaceArgs,
|
|
32
|
+
resolve_databricks_workspace,
|
|
33
|
+
)
|
|
34
|
+
from dagster_databricks.components.databricks_asset_bundle.configs import (
|
|
35
|
+
DatabricksBaseTask,
|
|
36
|
+
DatabricksJob,
|
|
37
|
+
)
|
|
38
|
+
from dagster_databricks.components.databricks_asset_bundle.resource import DatabricksWorkspace
|
|
39
|
+
from dagster_databricks.components.databricks_workspace.schema import ResolvedDatabricksFilter
|
|
40
|
+
from dagster_databricks.utils import snake_case
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@whitelist_for_serdes
|
|
44
|
+
@record
|
|
45
|
+
class DatabricksWorkspaceData:
|
|
46
|
+
"""Container for serialized Databricks workspace state."""
|
|
47
|
+
|
|
48
|
+
jobs: list[DatabricksJob]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@preview
|
|
52
|
+
@dataclass
|
|
53
|
+
class DatabricksWorkspaceComponent(StateBackedComponent, Resolvable):
|
|
54
|
+
"""Component that fetches Databricks workspace jobs and exposes them as assets."""
|
|
55
|
+
|
|
56
|
+
workspace: Annotated[
|
|
57
|
+
DatabricksWorkspace,
|
|
58
|
+
Resolver(
|
|
59
|
+
resolve_databricks_workspace,
|
|
60
|
+
model_field_type=DatabricksWorkspaceArgs.model(),
|
|
61
|
+
description="The mapping defining a DatabricksWorkspace.",
|
|
62
|
+
),
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
databricks_filter: Annotated[
|
|
66
|
+
Optional[ResolvedDatabricksFilter],
|
|
67
|
+
Resolver.default(description="Filter which Databricks jobs to include"),
|
|
68
|
+
] = None
|
|
69
|
+
|
|
70
|
+
assets_by_task_key: Annotated[
|
|
71
|
+
Optional[dict[str, list[ResolvedAssetSpec]]],
|
|
72
|
+
Resolver.default(
|
|
73
|
+
description="Optional mapping of Databricks task keys to lists of Dagster AssetSpecs.",
|
|
74
|
+
),
|
|
75
|
+
] = None
|
|
76
|
+
|
|
77
|
+
defs_state: ResolvedDefsStateConfig = field(
|
|
78
|
+
default_factory=DefsStateConfigArgs.legacy_code_server_snapshots
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def defs_state_config(self) -> DefsStateConfig:
|
|
83
|
+
default_key = f"{self.__class__.__name__}[{self.workspace.host}]"
|
|
84
|
+
return DefsStateConfig.from_args(self.defs_state, default_key=default_key)
|
|
85
|
+
|
|
86
|
+
async def write_state_to_path(self, state_path: Path) -> None:
|
|
87
|
+
"""Async implementation of state fetching."""
|
|
88
|
+
jobs = await self.workspace.fetch_jobs(self.databricks_filter)
|
|
89
|
+
|
|
90
|
+
data = DatabricksWorkspaceData(jobs=jobs)
|
|
91
|
+
state_path.write_text(serialize_value(data))
|
|
92
|
+
|
|
93
|
+
def build_defs_from_state(self, context: Any, state_path: Optional[Path]) -> Definitions:
|
|
94
|
+
"""Build Dagster Definitions from the cached state."""
|
|
95
|
+
if not state_path or not state_path.exists():
|
|
96
|
+
return Definitions()
|
|
97
|
+
|
|
98
|
+
workspace_data = deserialize_value(state_path.read_text(), DatabricksWorkspaceData)
|
|
99
|
+
jobs_state = workspace_data.jobs
|
|
100
|
+
|
|
101
|
+
databricks_assets = []
|
|
102
|
+
|
|
103
|
+
for job in jobs_state:
|
|
104
|
+
job_specs = []
|
|
105
|
+
task_key_map = {}
|
|
106
|
+
tasks = job.tasks or []
|
|
107
|
+
|
|
108
|
+
for task in tasks:
|
|
109
|
+
specs = self.get_asset_specs(task=task, job_name=job.name)
|
|
110
|
+
|
|
111
|
+
for spec in specs:
|
|
112
|
+
job_specs.append(spec)
|
|
113
|
+
task_key_map[spec.key] = task.task_key
|
|
114
|
+
|
|
115
|
+
if job_specs:
|
|
116
|
+
asset_def = self._create_job_asset_def(job, job_specs, task_key_map)
|
|
117
|
+
databricks_assets.append(asset_def)
|
|
118
|
+
|
|
119
|
+
return Definitions(assets=databricks_assets)
|
|
120
|
+
|
|
121
|
+
def _create_job_asset_def(
|
|
122
|
+
self, job: DatabricksJob, specs: list[Any], task_key_map: dict
|
|
123
|
+
) -> AssetsDefinition:
|
|
124
|
+
asset_name = f"databricks_job_{job.job_id}"
|
|
125
|
+
|
|
126
|
+
@multi_asset(name=asset_name, specs=specs, can_subset=True)
|
|
127
|
+
def _execution_fn(context: AssetExecutionContext):
|
|
128
|
+
client = self.workspace.get_client()
|
|
129
|
+
selected_keys = context.selected_asset_keys
|
|
130
|
+
|
|
131
|
+
tasks_to_run = [
|
|
132
|
+
task_key
|
|
133
|
+
for task_key, specs in (self.assets_by_task_key or {}).items()
|
|
134
|
+
if any(spec.key in selected_keys for spec in specs)
|
|
135
|
+
]
|
|
136
|
+
context.log.info(f"Triggering Databricks job {job.job_id} for tasks: {tasks_to_run}")
|
|
137
|
+
|
|
138
|
+
run = client.jobs.run_now(
|
|
139
|
+
job_id=job.job_id, only=tasks_to_run if tasks_to_run else None
|
|
140
|
+
)
|
|
141
|
+
if run.run_page_url:
|
|
142
|
+
context.log.info(f"Run URL: {run.run_page_url}")
|
|
143
|
+
|
|
144
|
+
client.jobs.wait_get_run_job_terminated_or_skipped(run.run_id)
|
|
145
|
+
|
|
146
|
+
final_run = client.jobs.get_run(run.run_id)
|
|
147
|
+
state_obj = final_run.state
|
|
148
|
+
result_state = state_obj.result_state if state_obj else None
|
|
149
|
+
|
|
150
|
+
if result_state != RunResultState.SUCCESS:
|
|
151
|
+
status_str = result_state.value if result_state else "UNKNOWN"
|
|
152
|
+
error_msg = f"Job {job.job_id} failed: {status_str}. URL: {run.run_page_url}"
|
|
153
|
+
context.log.error(error_msg)
|
|
154
|
+
raise Exception(error_msg)
|
|
155
|
+
|
|
156
|
+
for spec in specs:
|
|
157
|
+
if spec.key in selected_keys:
|
|
158
|
+
current_task_key = next(
|
|
159
|
+
(
|
|
160
|
+
t_key
|
|
161
|
+
for t_key, t_specs in (self.assets_by_task_key or {}).items()
|
|
162
|
+
if any(s.key == spec.key for s in t_specs)
|
|
163
|
+
),
|
|
164
|
+
"unknown",
|
|
165
|
+
)
|
|
166
|
+
yield MaterializeResult(
|
|
167
|
+
asset_key=spec.key,
|
|
168
|
+
metadata={
|
|
169
|
+
"dagster-databricks/job_id": MetadataValue.int(job.job_id),
|
|
170
|
+
"dagster-databricks/run_id": MetadataValue.int(run.run_id),
|
|
171
|
+
"dagster-databricks/run_url": MetadataValue.url(run.run_page_url or ""),
|
|
172
|
+
"dagster-databricks/task_key": current_task_key,
|
|
173
|
+
},
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
return _execution_fn
|
|
177
|
+
|
|
178
|
+
def get_asset_specs(self, task: DatabricksBaseTask, job_name: str) -> list[AssetSpec]:
|
|
179
|
+
"""Return a list of AssetSpec objects for the given task."""
|
|
180
|
+
task_key = task.task_key
|
|
181
|
+
|
|
182
|
+
if self.assets_by_task_key and task_key in self.assets_by_task_key:
|
|
183
|
+
return [
|
|
184
|
+
spec.merge_attributes(
|
|
185
|
+
kinds={"databricks"},
|
|
186
|
+
metadata={
|
|
187
|
+
"dagster-databricks/task_key": task_key,
|
|
188
|
+
"dagster-databricks/job_name": job_name,
|
|
189
|
+
},
|
|
190
|
+
)
|
|
191
|
+
for spec in self.assets_by_task_key[task_key]
|
|
192
|
+
]
|
|
193
|
+
|
|
194
|
+
clean_job = snake_case(job_name)
|
|
195
|
+
clean_task = snake_case(task_key)
|
|
196
|
+
|
|
197
|
+
return [
|
|
198
|
+
AssetSpec(
|
|
199
|
+
key=AssetKey([clean_job, clean_task]),
|
|
200
|
+
description=f"Databricks task {task_key} in job {job_name}",
|
|
201
|
+
kinds={"databricks"},
|
|
202
|
+
metadata={
|
|
203
|
+
"dagster-databricks/task_key": task_key,
|
|
204
|
+
"dagster-databricks/job_name": job_name,
|
|
205
|
+
},
|
|
206
|
+
)
|
|
207
|
+
]
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Annotated, Any, Optional, Union
|
|
4
|
+
|
|
5
|
+
from dagster.components import Resolver
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
8
|
+
from dagster_databricks.components.databricks_asset_bundle.configs import DatabricksJob
|
|
9
|
+
|
|
10
|
+
DatabricksJobInfo = Union[dict[str, Any], DatabricksJob]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class DatabricksFilter:
|
|
15
|
+
include_job: Callable[[DatabricksJobInfo], bool]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class IncludeJobsConfig(BaseModel):
|
|
19
|
+
job_ids: list[int]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DatabricksFilterConfig(BaseModel):
|
|
23
|
+
include_jobs: Optional[IncludeJobsConfig] = None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def resolve_databricks_filter(context, config: DatabricksFilterConfig) -> DatabricksFilter:
|
|
27
|
+
"""Convert a DatabricksFilterConfig into a DatabricksFilter."""
|
|
28
|
+
if config and config.include_jobs and getattr(config.include_jobs, "job_ids", None):
|
|
29
|
+
allowed_ids = set(config.include_jobs.job_ids)
|
|
30
|
+
|
|
31
|
+
def include_job(job: DatabricksJobInfo) -> bool:
|
|
32
|
+
job_id = job.get("job_id") if isinstance(job, dict) else job.job_id
|
|
33
|
+
return job_id in allowed_ids
|
|
34
|
+
else:
|
|
35
|
+
|
|
36
|
+
def include_job(job: DatabricksJobInfo) -> bool:
|
|
37
|
+
return True
|
|
38
|
+
|
|
39
|
+
return DatabricksFilter(include_job=include_job)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
ResolvedDatabricksFilter = Annotated[
|
|
43
|
+
DatabricksFilter,
|
|
44
|
+
Resolver(
|
|
45
|
+
resolve_databricks_filter,
|
|
46
|
+
model_field_type=DatabricksFilterConfig,
|
|
47
|
+
description="Filter which Databricks jobs to include",
|
|
48
|
+
),
|
|
49
|
+
]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.28.9"
|
{dagster_databricks-0.28.7 → dagster_databricks-0.28.9/dagster_databricks.egg-info}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dagster-databricks
|
|
3
|
-
Version: 0.28.
|
|
3
|
+
Version: 0.28.9
|
|
4
4
|
Summary: Package for Databricks-specific Dagster framework op and resource components.
|
|
5
5
|
Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-databricks
|
|
6
6
|
Author: Dagster Labs
|
|
@@ -11,9 +11,10 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
11
11
|
Classifier: Operating System :: OS Independent
|
|
12
12
|
Requires-Python: >=3.10,<3.15
|
|
13
13
|
License-File: LICENSE
|
|
14
|
-
Requires-Dist: dagster==1.12.
|
|
15
|
-
Requires-Dist: dagster-pipes==1.12.
|
|
16
|
-
Requires-Dist: dagster-pyspark==0.28.
|
|
14
|
+
Requires-Dist: dagster==1.12.9
|
|
15
|
+
Requires-Dist: dagster-pipes==1.12.9
|
|
16
|
+
Requires-Dist: dagster-pyspark==0.28.9
|
|
17
|
+
Requires-Dist: aiohttp
|
|
17
18
|
Requires-Dist: databricks-sdk<0.61.0,>=0.41
|
|
18
19
|
Dynamic: author
|
|
19
20
|
Dynamic: author-email
|
{dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks.egg-info/SOURCES.txt
RENAMED
|
@@ -14,6 +14,7 @@ dagster_databricks/pipes.py
|
|
|
14
14
|
dagster_databricks/py.typed
|
|
15
15
|
dagster_databricks/resources.py
|
|
16
16
|
dagster_databricks/types.py
|
|
17
|
+
dagster_databricks/utils.py
|
|
17
18
|
dagster_databricks/version.py
|
|
18
19
|
dagster_databricks.egg-info/PKG-INFO
|
|
19
20
|
dagster_databricks.egg-info/SOURCES.txt
|
|
@@ -26,4 +27,7 @@ dagster_databricks/components/databricks_asset_bundle/__init__.py
|
|
|
26
27
|
dagster_databricks/components/databricks_asset_bundle/component.py
|
|
27
28
|
dagster_databricks/components/databricks_asset_bundle/configs.py
|
|
28
29
|
dagster_databricks/components/databricks_asset_bundle/resource.py
|
|
29
|
-
dagster_databricks/components/databricks_asset_bundle/scaffolder.py
|
|
30
|
+
dagster_databricks/components/databricks_asset_bundle/scaffolder.py
|
|
31
|
+
dagster_databricks/components/databricks_workspace/__init__.py
|
|
32
|
+
dagster_databricks/components/databricks_workspace/component.py
|
|
33
|
+
dagster_databricks/components/databricks_workspace/schema.py
|
|
@@ -31,9 +31,10 @@ setup(
|
|
|
31
31
|
include_package_data=True,
|
|
32
32
|
python_requires=">=3.10,<3.15",
|
|
33
33
|
install_requires=[
|
|
34
|
-
"dagster==1.12.
|
|
35
|
-
"dagster-pipes==1.12.
|
|
36
|
-
"dagster-pyspark==0.28.
|
|
34
|
+
"dagster==1.12.9",
|
|
35
|
+
"dagster-pipes==1.12.9",
|
|
36
|
+
"dagster-pyspark==0.28.9",
|
|
37
|
+
"aiohttp",
|
|
37
38
|
"databricks-sdk>=0.41,<0.61.0", # dbt-databricks is pinned to this version
|
|
38
39
|
],
|
|
39
40
|
zip_safe=False,
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.28.7"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/components/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/databricks_step_main.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks.egg-info/not-zip-safe
RENAMED
|
File without changes
|
{dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|