dagster-databricks 0.28.7__tar.gz → 0.28.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {dagster_databricks-0.28.7/dagster_databricks.egg-info → dagster_databricks-0.28.9}/PKG-INFO +5 -4
  2. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/README.md +1 -1
  3. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/__init__.py +3 -0
  4. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/components/databricks_asset_bundle/component.py +4 -15
  5. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/components/databricks_asset_bundle/configs.py +19 -0
  6. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/components/databricks_asset_bundle/resource.py +64 -1
  7. dagster_databricks-0.28.9/dagster_databricks/components/databricks_workspace/__init__.py +0 -0
  8. dagster_databricks-0.28.9/dagster_databricks/components/databricks_workspace/component.py +207 -0
  9. dagster_databricks-0.28.9/dagster_databricks/components/databricks_workspace/schema.py +49 -0
  10. dagster_databricks-0.28.9/dagster_databricks/utils.py +8 -0
  11. dagster_databricks-0.28.9/dagster_databricks/version.py +1 -0
  12. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9/dagster_databricks.egg-info}/PKG-INFO +5 -4
  13. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks.egg-info/SOURCES.txt +5 -1
  14. dagster_databricks-0.28.9/dagster_databricks.egg-info/requires.txt +5 -0
  15. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/setup.py +4 -3
  16. dagster_databricks-0.28.7/dagster_databricks/version.py +0 -1
  17. dagster_databricks-0.28.7/dagster_databricks.egg-info/requires.txt +0 -4
  18. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/LICENSE +0 -0
  19. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/MANIFEST.in +0 -0
  20. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/_test_utils.py +0 -0
  21. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/components/__init__.py +0 -0
  22. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/components/databricks_asset_bundle/__init__.py +0 -0
  23. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/components/databricks_asset_bundle/scaffolder.py +0 -0
  24. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/configs.py +0 -0
  25. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/databricks.py +0 -0
  26. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/databricks_pyspark_step_launcher.py +0 -0
  27. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/databricks_step_main.py +0 -0
  28. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/ops.py +0 -0
  29. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/pipes.py +0 -0
  30. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/py.typed +0 -0
  31. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/resources.py +0 -0
  32. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks/types.py +0 -0
  33. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks.egg-info/dependency_links.txt +0 -0
  34. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks.egg-info/not-zip-safe +0 -0
  35. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/dagster_databricks.egg-info/top_level.txt +0 -0
  36. {dagster_databricks-0.28.7 → dagster_databricks-0.28.9}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dagster-databricks
3
- Version: 0.28.7
3
+ Version: 0.28.9
4
4
  Summary: Package for Databricks-specific Dagster framework op and resource components.
5
5
  Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-databricks
6
6
  Author: Dagster Labs
@@ -11,9 +11,10 @@ Classifier: License :: OSI Approved :: Apache Software License
11
11
  Classifier: Operating System :: OS Independent
12
12
  Requires-Python: >=3.10,<3.15
13
13
  License-File: LICENSE
14
- Requires-Dist: dagster==1.12.7
15
- Requires-Dist: dagster-pipes==1.12.7
16
- Requires-Dist: dagster-pyspark==0.28.7
14
+ Requires-Dist: dagster==1.12.9
15
+ Requires-Dist: dagster-pipes==1.12.9
16
+ Requires-Dist: dagster-pyspark==0.28.9
17
+ Requires-Dist: aiohttp
17
18
  Requires-Dist: databricks-sdk<0.61.0,>=0.41
18
19
  Dynamic: author
19
20
  Dynamic: author-email
@@ -1,7 +1,7 @@
1
1
  # dagster-databricks
2
2
 
3
3
  The docs for `dagster-databricks` can be found
4
- [here](https://docs.dagster.io/api/python-api/libraries/dagster-databricks).
4
+ [here](https://docs.dagster.io/integrations/libraries/databricks/dagster-databricks).
5
5
 
6
6
  A guide for integrating Databricks using Dagster Pipes can be found
7
7
  [here](https://docs.dagster.io/guides/dagster-pipes/databricks).
@@ -13,6 +13,9 @@ from dagster_shared.libraries import DagsterLibraryRegistry
13
13
  from dagster_databricks.components.databricks_asset_bundle.component import (
14
14
  DatabricksAssetBundleComponent as DatabricksAssetBundleComponent,
15
15
  )
16
+ from dagster_databricks.components.databricks_workspace.component import (
17
+ DatabricksWorkspaceComponent as DatabricksWorkspaceComponent,
18
+ )
16
19
  from dagster_databricks.databricks import (
17
20
  DatabricksClient as DatabricksClient,
18
21
  DatabricksError as DatabricksError,
@@ -1,5 +1,4 @@
1
1
  import os
2
- import re
3
2
  from collections import defaultdict
4
3
  from dataclasses import dataclass, field
5
4
  from functools import cached_property
@@ -35,17 +34,7 @@ from dagster_databricks.components.databricks_asset_bundle.resource import Datab
35
34
  from dagster_databricks.components.databricks_asset_bundle.scaffolder import (
36
35
  DatabricksAssetBundleScaffolder,
37
36
  )
38
-
39
-
40
- def snake_case(name: str) -> str:
41
- """Convert a string to snake_case."""
42
- # Remove file extension if present
43
- name = Path(name).stem
44
- # Replace special characters and spaces with underscores
45
- name = re.sub(r"[^a-zA-Z0-9]+", "_", name)
46
- # Convert CamelCase to snake_case
47
- name = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", name)
48
- return name.lower().strip("_")
37
+ from dagster_databricks.utils import snake_case
49
38
 
50
39
 
51
40
  @dataclass
@@ -251,9 +240,9 @@ class DatabricksAssetBundleComponent(Component, Resolvable):
251
240
  )
252
241
 
253
242
  def build_defs(self, context: ComponentLoadContext) -> Definitions:
254
- component_defs_path_as_python_str = str(
255
- os.path.relpath(context.component_path.file_path, start=context.project_root)
256
- ).replace("/", "_")
243
+ component_defs_path_as_python_str = snake_case(
244
+ str(os.path.relpath(context.component_path.file_path, start=context.project_root))
245
+ )
257
246
 
258
247
  databricks_assets = []
259
248
  for task_key, asset_specs in self.asset_specs_by_task_key.items():
@@ -12,6 +12,7 @@ from dagster import (
12
12
  get_dagster_logger,
13
13
  )
14
14
  from dagster._annotations import preview
15
+ from dagster._serdes import whitelist_for_serdes
15
16
  from dagster_shared.record import IHaveNew, record, record_custom
16
17
  from databricks.sdk.service import jobs
17
18
  from typing_extensions import Self, TypeVar
@@ -110,6 +111,7 @@ class DatabricksTaskDependsOnConfig:
110
111
  outcome: Optional[str]
111
112
 
112
113
 
114
+ @whitelist_for_serdes
113
115
  @record
114
116
  class DatabricksBaseTask(ABC, Generic[T_DatabricksSdkTask]):
115
117
  task_key: str
@@ -143,6 +145,7 @@ class DatabricksBaseTask(ABC, Generic[T_DatabricksSdkTask]):
143
145
  def to_databricks_sdk_task(self) -> T_DatabricksSdkTask: ...
144
146
 
145
147
 
148
+ @whitelist_for_serdes
146
149
  @record
147
150
  class DatabricksNotebookTask(DatabricksBaseTask[jobs.NotebookTask]):
148
151
  @property
@@ -186,6 +189,7 @@ class DatabricksNotebookTask(DatabricksBaseTask[jobs.NotebookTask]):
186
189
  )
187
190
 
188
191
 
192
+ @whitelist_for_serdes
189
193
  @record
190
194
  class DatabricksConditionTask(DatabricksBaseTask[jobs.ConditionTask]):
191
195
  @property
@@ -236,6 +240,7 @@ class DatabricksConditionTask(DatabricksBaseTask[jobs.ConditionTask]):
236
240
  )
237
241
 
238
242
 
243
+ @whitelist_for_serdes
239
244
  @record
240
245
  class DatabricksSparkPythonTask(DatabricksBaseTask[jobs.SparkPythonTask]):
241
246
  @property
@@ -282,6 +287,7 @@ class DatabricksSparkPythonTask(DatabricksBaseTask[jobs.SparkPythonTask]):
282
287
  )
283
288
 
284
289
 
290
+ @whitelist_for_serdes
285
291
  @record
286
292
  class DatabricksPythonWheelTask(DatabricksBaseTask[jobs.PythonWheelTask]):
287
293
  @property
@@ -331,6 +337,7 @@ class DatabricksPythonWheelTask(DatabricksBaseTask[jobs.PythonWheelTask]):
331
337
  )
332
338
 
333
339
 
340
+ @whitelist_for_serdes
334
341
  @record
335
342
  class DatabricksSparkJarTask(DatabricksBaseTask[jobs.SparkJarTask]):
336
343
  @property
@@ -376,6 +383,7 @@ class DatabricksSparkJarTask(DatabricksBaseTask[jobs.SparkJarTask]):
376
383
  )
377
384
 
378
385
 
386
+ @whitelist_for_serdes
379
387
  @record
380
388
  class DatabricksJobTask(DatabricksBaseTask[jobs.RunJobTask]):
381
389
  @property
@@ -420,6 +428,7 @@ class DatabricksJobTask(DatabricksBaseTask[jobs.RunJobTask]):
420
428
  )
421
429
 
422
430
 
431
+ @whitelist_for_serdes
423
432
  @record
424
433
  class DatabricksUnknownTask(DatabricksBaseTask):
425
434
  @property
@@ -609,3 +618,13 @@ class ResolvedDatabricksExistingClusterConfig(Resolvable, Model):
609
618
  @preview
610
619
  class ResolvedDatabricksServerlessConfig(Resolvable, Model):
611
620
  is_serverless: bool = True
621
+
622
+
623
+ @whitelist_for_serdes
624
+ @record
625
+ class DatabricksJob:
626
+ """Represents a Databricks Job structure for serialization."""
627
+
628
+ job_id: int
629
+ name: str
630
+ tasks: Optional[list[DatabricksBaseTask]] = None
@@ -1,5 +1,12 @@
1
+ import asyncio
1
2
  from collections.abc import Iterator, Mapping
2
- from typing import TYPE_CHECKING, Any, Union
3
+ from typing import TYPE_CHECKING, Any, Optional, Union
4
+
5
+ import aiohttp
6
+
7
+ DATABRICKS_JOBS_API_PATH = "/api/2.1/jobs"
8
+ MAX_CONCURRENT_REQUESTS = 10
9
+ RATE_LIMIT_STATUS_CODE = 429
3
10
 
4
11
  from dagster import (
5
12
  AssetExecutionContext,
@@ -16,6 +23,7 @@ from databricks.sdk.service import compute, jobs
16
23
  from pydantic import Field
17
24
 
18
25
  from dagster_databricks.components.databricks_asset_bundle.configs import (
26
+ DatabricksJob,
19
27
  ResolvedDatabricksExistingClusterConfig,
20
28
  ResolvedDatabricksNewClusterConfig,
21
29
  ResolvedDatabricksServerlessConfig,
@@ -44,6 +52,61 @@ class DatabricksWorkspace(ConfigurableResource):
44
52
  token=self.token,
45
53
  )
46
54
 
55
+ async def fetch_jobs(self, databricks_filter: Any) -> list[DatabricksJob]:
56
+ """Fetches jobs efficiently using async I/O directly from the resource."""
57
+ headers = {"Authorization": f"Bearer {self.token}"}
58
+ base_url = self.host.rstrip("/")
59
+
60
+ async with aiohttp.ClientSession(headers=headers) as session:
61
+ list_url = f"{base_url}{DATABRICKS_JOBS_API_PATH}list"
62
+ async with session.get(list_url) as resp:
63
+ resp.raise_for_status()
64
+ data = await resp.json()
65
+ all_jobs_lite = data.get("jobs", [])
66
+
67
+ job_ids_to_fetch = []
68
+ for j in all_jobs_lite:
69
+ if databricks_filter and not databricks_filter.include_job(j):
70
+ continue
71
+ job_ids_to_fetch.append(j["job_id"])
72
+
73
+ if not job_ids_to_fetch:
74
+ return []
75
+
76
+ semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS)
77
+
78
+ async def _fetch_single_job(job_id: int) -> Optional[dict]:
79
+ async with semaphore:
80
+ async with aiohttp.ClientSession(headers=headers) as session:
81
+ url = f"{base_url}{DATABRICKS_JOBS_API_PATH}/get?job_id={job_id}"
82
+ async with session.get(url) as resp:
83
+ if resp.status == MAX_CONCURRENT_REQUESTS:
84
+ await asyncio.sleep(1)
85
+ return await _fetch_single_job(job_id)
86
+
87
+ if resp.status != 200:
88
+ resp.raise_for_status()
89
+
90
+ return await resp.json()
91
+
92
+ tasks_coroutines = [_fetch_single_job(jid) for jid in job_ids_to_fetch]
93
+ raw_jobs = await asyncio.gather(*tasks_coroutines)
94
+
95
+ final_jobs = []
96
+ for rj in raw_jobs:
97
+ if not rj:
98
+ continue
99
+
100
+ settings = rj.get("settings", {})
101
+ job = DatabricksJob(
102
+ job_id=rj["job_id"],
103
+ name=settings.get("name", "Unnamed Job"),
104
+ tasks=settings.get("tasks", []),
105
+ )
106
+ final_jobs.append(job)
107
+
108
+ return final_jobs
109
+
47
110
  def submit_and_poll(
48
111
  self, component: "DatabricksAssetBundleComponent", context: AssetExecutionContext
49
112
  ) -> Iterator[Union[AssetMaterialization, MaterializeResult]]:
@@ -0,0 +1,207 @@
1
+ from dataclasses import dataclass, field
2
+ from pathlib import Path
3
+ from typing import Annotated, Any, Optional
4
+
5
+ from dagster import (
6
+ AssetExecutionContext,
7
+ AssetKey,
8
+ AssetsDefinition,
9
+ AssetSpec,
10
+ Definitions,
11
+ MaterializeResult,
12
+ MetadataValue,
13
+ Resolvable,
14
+ ResolvedAssetSpec,
15
+ multi_asset,
16
+ )
17
+ from dagster._serdes import whitelist_for_serdes
18
+ from dagster._symbol_annotations.lifecycle import preview
19
+ from dagster.components import Resolver
20
+ from dagster.components.component.state_backed_component import StateBackedComponent
21
+ from dagster.components.utils.defs_state import (
22
+ DefsStateConfig,
23
+ DefsStateConfigArgs,
24
+ ResolvedDefsStateConfig,
25
+ )
26
+ from dagster_shared.record import record
27
+ from dagster_shared.serdes.serdes import deserialize_value, serialize_value
28
+ from databricks.sdk.service.jobs import RunResultState
29
+
30
+ from dagster_databricks.components.databricks_asset_bundle.component import (
31
+ DatabricksWorkspaceArgs,
32
+ resolve_databricks_workspace,
33
+ )
34
+ from dagster_databricks.components.databricks_asset_bundle.configs import (
35
+ DatabricksBaseTask,
36
+ DatabricksJob,
37
+ )
38
+ from dagster_databricks.components.databricks_asset_bundle.resource import DatabricksWorkspace
39
+ from dagster_databricks.components.databricks_workspace.schema import ResolvedDatabricksFilter
40
+ from dagster_databricks.utils import snake_case
41
+
42
+
43
+ @whitelist_for_serdes
44
+ @record
45
+ class DatabricksWorkspaceData:
46
+ """Container for serialized Databricks workspace state."""
47
+
48
+ jobs: list[DatabricksJob]
49
+
50
+
51
+ @preview
52
+ @dataclass
53
+ class DatabricksWorkspaceComponent(StateBackedComponent, Resolvable):
54
+ """Component that fetches Databricks workspace jobs and exposes them as assets."""
55
+
56
+ workspace: Annotated[
57
+ DatabricksWorkspace,
58
+ Resolver(
59
+ resolve_databricks_workspace,
60
+ model_field_type=DatabricksWorkspaceArgs.model(),
61
+ description="The mapping defining a DatabricksWorkspace.",
62
+ ),
63
+ ]
64
+
65
+ databricks_filter: Annotated[
66
+ Optional[ResolvedDatabricksFilter],
67
+ Resolver.default(description="Filter which Databricks jobs to include"),
68
+ ] = None
69
+
70
+ assets_by_task_key: Annotated[
71
+ Optional[dict[str, list[ResolvedAssetSpec]]],
72
+ Resolver.default(
73
+ description="Optional mapping of Databricks task keys to lists of Dagster AssetSpecs.",
74
+ ),
75
+ ] = None
76
+
77
+ defs_state: ResolvedDefsStateConfig = field(
78
+ default_factory=DefsStateConfigArgs.legacy_code_server_snapshots
79
+ )
80
+
81
+ @property
82
+ def defs_state_config(self) -> DefsStateConfig:
83
+ default_key = f"{self.__class__.__name__}[{self.workspace.host}]"
84
+ return DefsStateConfig.from_args(self.defs_state, default_key=default_key)
85
+
86
+ async def write_state_to_path(self, state_path: Path) -> None:
87
+ """Async implementation of state fetching."""
88
+ jobs = await self.workspace.fetch_jobs(self.databricks_filter)
89
+
90
+ data = DatabricksWorkspaceData(jobs=jobs)
91
+ state_path.write_text(serialize_value(data))
92
+
93
+ def build_defs_from_state(self, context: Any, state_path: Optional[Path]) -> Definitions:
94
+ """Build Dagster Definitions from the cached state."""
95
+ if not state_path or not state_path.exists():
96
+ return Definitions()
97
+
98
+ workspace_data = deserialize_value(state_path.read_text(), DatabricksWorkspaceData)
99
+ jobs_state = workspace_data.jobs
100
+
101
+ databricks_assets = []
102
+
103
+ for job in jobs_state:
104
+ job_specs = []
105
+ task_key_map = {}
106
+ tasks = job.tasks or []
107
+
108
+ for task in tasks:
109
+ specs = self.get_asset_specs(task=task, job_name=job.name)
110
+
111
+ for spec in specs:
112
+ job_specs.append(spec)
113
+ task_key_map[spec.key] = task.task_key
114
+
115
+ if job_specs:
116
+ asset_def = self._create_job_asset_def(job, job_specs, task_key_map)
117
+ databricks_assets.append(asset_def)
118
+
119
+ return Definitions(assets=databricks_assets)
120
+
121
+ def _create_job_asset_def(
122
+ self, job: DatabricksJob, specs: list[Any], task_key_map: dict
123
+ ) -> AssetsDefinition:
124
+ asset_name = f"databricks_job_{job.job_id}"
125
+
126
+ @multi_asset(name=asset_name, specs=specs, can_subset=True)
127
+ def _execution_fn(context: AssetExecutionContext):
128
+ client = self.workspace.get_client()
129
+ selected_keys = context.selected_asset_keys
130
+
131
+ tasks_to_run = [
132
+ task_key
133
+ for task_key, specs in (self.assets_by_task_key or {}).items()
134
+ if any(spec.key in selected_keys for spec in specs)
135
+ ]
136
+ context.log.info(f"Triggering Databricks job {job.job_id} for tasks: {tasks_to_run}")
137
+
138
+ run = client.jobs.run_now(
139
+ job_id=job.job_id, only=tasks_to_run if tasks_to_run else None
140
+ )
141
+ if run.run_page_url:
142
+ context.log.info(f"Run URL: {run.run_page_url}")
143
+
144
+ client.jobs.wait_get_run_job_terminated_or_skipped(run.run_id)
145
+
146
+ final_run = client.jobs.get_run(run.run_id)
147
+ state_obj = final_run.state
148
+ result_state = state_obj.result_state if state_obj else None
149
+
150
+ if result_state != RunResultState.SUCCESS:
151
+ status_str = result_state.value if result_state else "UNKNOWN"
152
+ error_msg = f"Job {job.job_id} failed: {status_str}. URL: {run.run_page_url}"
153
+ context.log.error(error_msg)
154
+ raise Exception(error_msg)
155
+
156
+ for spec in specs:
157
+ if spec.key in selected_keys:
158
+ current_task_key = next(
159
+ (
160
+ t_key
161
+ for t_key, t_specs in (self.assets_by_task_key or {}).items()
162
+ if any(s.key == spec.key for s in t_specs)
163
+ ),
164
+ "unknown",
165
+ )
166
+ yield MaterializeResult(
167
+ asset_key=spec.key,
168
+ metadata={
169
+ "dagster-databricks/job_id": MetadataValue.int(job.job_id),
170
+ "dagster-databricks/run_id": MetadataValue.int(run.run_id),
171
+ "dagster-databricks/run_url": MetadataValue.url(run.run_page_url or ""),
172
+ "dagster-databricks/task_key": current_task_key,
173
+ },
174
+ )
175
+
176
+ return _execution_fn
177
+
178
+ def get_asset_specs(self, task: DatabricksBaseTask, job_name: str) -> list[AssetSpec]:
179
+ """Return a list of AssetSpec objects for the given task."""
180
+ task_key = task.task_key
181
+
182
+ if self.assets_by_task_key and task_key in self.assets_by_task_key:
183
+ return [
184
+ spec.merge_attributes(
185
+ kinds={"databricks"},
186
+ metadata={
187
+ "dagster-databricks/task_key": task_key,
188
+ "dagster-databricks/job_name": job_name,
189
+ },
190
+ )
191
+ for spec in self.assets_by_task_key[task_key]
192
+ ]
193
+
194
+ clean_job = snake_case(job_name)
195
+ clean_task = snake_case(task_key)
196
+
197
+ return [
198
+ AssetSpec(
199
+ key=AssetKey([clean_job, clean_task]),
200
+ description=f"Databricks task {task_key} in job {job_name}",
201
+ kinds={"databricks"},
202
+ metadata={
203
+ "dagster-databricks/task_key": task_key,
204
+ "dagster-databricks/job_name": job_name,
205
+ },
206
+ )
207
+ ]
@@ -0,0 +1,49 @@
1
+ from collections.abc import Callable
2
+ from dataclasses import dataclass
3
+ from typing import Annotated, Any, Optional, Union
4
+
5
+ from dagster.components import Resolver
6
+ from pydantic import BaseModel
7
+
8
+ from dagster_databricks.components.databricks_asset_bundle.configs import DatabricksJob
9
+
10
+ DatabricksJobInfo = Union[dict[str, Any], DatabricksJob]
11
+
12
+
13
+ @dataclass
14
+ class DatabricksFilter:
15
+ include_job: Callable[[DatabricksJobInfo], bool]
16
+
17
+
18
+ class IncludeJobsConfig(BaseModel):
19
+ job_ids: list[int]
20
+
21
+
22
+ class DatabricksFilterConfig(BaseModel):
23
+ include_jobs: Optional[IncludeJobsConfig] = None
24
+
25
+
26
+ def resolve_databricks_filter(context, config: DatabricksFilterConfig) -> DatabricksFilter:
27
+ """Convert a DatabricksFilterConfig into a DatabricksFilter."""
28
+ if config and config.include_jobs and getattr(config.include_jobs, "job_ids", None):
29
+ allowed_ids = set(config.include_jobs.job_ids)
30
+
31
+ def include_job(job: DatabricksJobInfo) -> bool:
32
+ job_id = job.get("job_id") if isinstance(job, dict) else job.job_id
33
+ return job_id in allowed_ids
34
+ else:
35
+
36
+ def include_job(job: DatabricksJobInfo) -> bool:
37
+ return True
38
+
39
+ return DatabricksFilter(include_job=include_job)
40
+
41
+
42
+ ResolvedDatabricksFilter = Annotated[
43
+ DatabricksFilter,
44
+ Resolver(
45
+ resolve_databricks_filter,
46
+ model_field_type=DatabricksFilterConfig,
47
+ description="Filter which Databricks jobs to include",
48
+ ),
49
+ ]
@@ -0,0 +1,8 @@
1
+ import re
2
+
3
+
4
+ def snake_case(name: str) -> str:
5
+ """Standard snake_case utility for Databricks components."""
6
+ name = re.sub(r"[^a-zA-Z0-9]+", "_", str(name))
7
+ name = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", name)
8
+ return name.lower().strip("_")
@@ -0,0 +1 @@
1
+ __version__ = "0.28.9"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dagster-databricks
3
- Version: 0.28.7
3
+ Version: 0.28.9
4
4
  Summary: Package for Databricks-specific Dagster framework op and resource components.
5
5
  Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-databricks
6
6
  Author: Dagster Labs
@@ -11,9 +11,10 @@ Classifier: License :: OSI Approved :: Apache Software License
11
11
  Classifier: Operating System :: OS Independent
12
12
  Requires-Python: >=3.10,<3.15
13
13
  License-File: LICENSE
14
- Requires-Dist: dagster==1.12.7
15
- Requires-Dist: dagster-pipes==1.12.7
16
- Requires-Dist: dagster-pyspark==0.28.7
14
+ Requires-Dist: dagster==1.12.9
15
+ Requires-Dist: dagster-pipes==1.12.9
16
+ Requires-Dist: dagster-pyspark==0.28.9
17
+ Requires-Dist: aiohttp
17
18
  Requires-Dist: databricks-sdk<0.61.0,>=0.41
18
19
  Dynamic: author
19
20
  Dynamic: author-email
@@ -14,6 +14,7 @@ dagster_databricks/pipes.py
14
14
  dagster_databricks/py.typed
15
15
  dagster_databricks/resources.py
16
16
  dagster_databricks/types.py
17
+ dagster_databricks/utils.py
17
18
  dagster_databricks/version.py
18
19
  dagster_databricks.egg-info/PKG-INFO
19
20
  dagster_databricks.egg-info/SOURCES.txt
@@ -26,4 +27,7 @@ dagster_databricks/components/databricks_asset_bundle/__init__.py
26
27
  dagster_databricks/components/databricks_asset_bundle/component.py
27
28
  dagster_databricks/components/databricks_asset_bundle/configs.py
28
29
  dagster_databricks/components/databricks_asset_bundle/resource.py
29
- dagster_databricks/components/databricks_asset_bundle/scaffolder.py
30
+ dagster_databricks/components/databricks_asset_bundle/scaffolder.py
31
+ dagster_databricks/components/databricks_workspace/__init__.py
32
+ dagster_databricks/components/databricks_workspace/component.py
33
+ dagster_databricks/components/databricks_workspace/schema.py
@@ -0,0 +1,5 @@
1
+ dagster==1.12.9
2
+ dagster-pipes==1.12.9
3
+ dagster-pyspark==0.28.9
4
+ aiohttp
5
+ databricks-sdk<0.61.0,>=0.41
@@ -31,9 +31,10 @@ setup(
31
31
  include_package_data=True,
32
32
  python_requires=">=3.10,<3.15",
33
33
  install_requires=[
34
- "dagster==1.12.7",
35
- "dagster-pipes==1.12.7",
36
- "dagster-pyspark==0.28.7",
34
+ "dagster==1.12.9",
35
+ "dagster-pipes==1.12.9",
36
+ "dagster-pyspark==0.28.9",
37
+ "aiohttp",
37
38
  "databricks-sdk>=0.41,<0.61.0", # dbt-databricks is pinned to this version
38
39
  ],
39
40
  zip_safe=False,
@@ -1 +0,0 @@
1
- __version__ = "0.28.7"
@@ -1,4 +0,0 @@
1
- dagster==1.12.7
2
- dagster-pipes==1.12.7
3
- dagster-pyspark==0.28.7
4
- databricks-sdk<0.61.0,>=0.41