dagster-sling 0.28.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,24 @@
1
+ from dagster_shared.libraries import DagsterLibraryRegistry
2
+
3
+ from dagster_sling.asset_decorator import sling_assets
4
+ from dagster_sling.components.sling_replication_collection.component import (
5
+ SlingReplicationCollectionComponent,
6
+ SlingReplicationSpecModel,
7
+ )
8
+ from dagster_sling.dagster_sling_translator import DagsterSlingTranslator
9
+ from dagster_sling.resources import SlingConnectionResource, SlingMode, SlingResource
10
+ from dagster_sling.sling_replication import SlingReplicationParam
11
+ from dagster_sling.version import __version__
12
+
13
+ __all__ = [
14
+ "DagsterSlingTranslator",
15
+ "SlingConnectionResource",
16
+ "SlingMode",
17
+ "SlingReplicationCollectionComponent",
18
+ "SlingReplicationParam",
19
+ "SlingReplicationSpecModel",
20
+ "SlingResource",
21
+ "sling_assets",
22
+ ]
23
+
24
+ DagsterLibraryRegistry.register("dagster-sling", __version__)
@@ -0,0 +1,159 @@
1
+ from collections.abc import Callable, Iterable, Mapping
2
+ from copy import deepcopy
3
+ from typing import Any, Optional
4
+
5
+ from dagster import (
6
+ AssetsDefinition,
7
+ BackfillPolicy,
8
+ PartitionsDefinition,
9
+ _check as check,
10
+ multi_asset,
11
+ )
12
+ from dagster._core.definitions.assets.definition.asset_spec import AssetSpec
13
+ from dagster._utils.merger import deep_merge_dicts
14
+ from dagster._utils.security import non_secure_md5_hash_str
15
+
16
+ from dagster_sling.dagster_sling_translator import DagsterSlingTranslator
17
+ from dagster_sling.sling_replication import SlingReplicationParam, validate_replication
18
+
19
+ METADATA_KEY_TRANSLATOR = "dagster_sling/dagster_sling_translator"
20
+ METADATA_KEY_REPLICATION_CONFIG = "dagster_sling/sling_replication_config"
21
+
22
+
23
+ def get_streams_from_replication(
24
+ replication_config: Mapping[str, Any],
25
+ ) -> Iterable[Mapping[str, Any]]:
26
+ """Returns a list of streams and their configs from a Sling replication config."""
27
+ for stream, config in replication_config.get("streams", {}).items():
28
+ if config and config.get("disabled", False):
29
+ continue
30
+ yield {"name": stream, "config": config}
31
+
32
+
33
+ def streams_with_default_dagster_meta(
34
+ streams: Iterable[Mapping[str, Any]], replication_config: Mapping[str, Any]
35
+ ) -> Iterable[Mapping[str, Any]]:
36
+ """Ensures dagster meta configs in the `defaults` block of the replication_config are passed to
37
+ the assets definition object.
38
+ """
39
+ default_dagster_meta = replication_config.get("defaults", {}).get("meta", {}).get("dagster", {})
40
+ if not default_dagster_meta:
41
+ yield from streams
42
+ else:
43
+ for stream in streams:
44
+ name = stream["name"]
45
+ config = deepcopy(stream["config"])
46
+ if not config:
47
+ config = {"meta": {"dagster": default_dagster_meta}}
48
+ else:
49
+ config["meta"] = deep_merge_dicts(
50
+ {"dagster": default_dagster_meta}, config.get("meta", {})
51
+ )
52
+ yield {"name": name, "config": config}
53
+
54
+
55
+ def sling_assets(
56
+ *,
57
+ replication_config: SlingReplicationParam,
58
+ dagster_sling_translator: Optional[DagsterSlingTranslator] = None,
59
+ name: Optional[str] = None,
60
+ partitions_def: Optional[PartitionsDefinition] = None,
61
+ backfill_policy: Optional[BackfillPolicy] = None,
62
+ op_tags: Optional[Mapping[str, Any]] = None,
63
+ pool: Optional[str] = None,
64
+ ) -> Callable[[Callable[..., Any]], AssetsDefinition]:
65
+ """Create a definition for how to materialize a set of Sling replication streams as Dagster assets, as
66
+ described by a Sling replication config. This will create on Asset for every Sling target stream.
67
+
68
+ A Sling Replication config is a configuration that maps sources to destinations. For the full
69
+ spec and descriptions, see `Sling's Documentation <https://docs.slingdata.io/sling-cli/run/configuration>`_.
70
+
71
+ Args:
72
+ replication_config (Union[Mapping[str, Any], str, Path]): A path to a Sling replication config, or a dictionary
73
+ of a replication config.
74
+ dagster_sling_translator: (DagsterSlingTranslator): Allows customization of how to map a Sling stream to a Dagster
75
+ AssetKey.
76
+ name (Optional[str]: The name of the op.
77
+ partitions_def (Optional[PartitionsDefinition]): The partitions definition for this asset.
78
+ backfill_policy (Optional[BackfillPolicy]): The backfill policy for this asset.
79
+ op_tags (Optional[Mapping[str, Any]]): The tags for the underlying op.
80
+ pool (Optional[str]): A string that identifies the concurrency pool that governs the sling assets' execution.
81
+
82
+ Examples:
83
+ Running a sync by providing a path to a Sling Replication config:
84
+
85
+ .. code-block:: python
86
+
87
+ from dagster_sling import sling_assets, SlingResource, SlingConnectionResource
88
+
89
+ sling_resource = SlingResource(
90
+ connections=[
91
+ SlingConnectionResource(
92
+ name="MY_POSTGRES", type="postgres", connection_string=EnvVar("POSTGRES_URL")
93
+ ),
94
+ SlingConnectionResource(
95
+ name="MY_DUCKDB",
96
+ type="duckdb",
97
+ connection_string="duckdb:///var/tmp/duckdb.db",
98
+ ),
99
+ ]
100
+ )
101
+
102
+ config_path = "/path/to/replication.yaml"
103
+ @sling_assets(replication_config=config_path)
104
+ def my_assets(context, sling: SlingResource):
105
+ yield from sling.replicate(context=context)
106
+ """
107
+ return multi_asset(
108
+ name=name,
109
+ partitions_def=partitions_def,
110
+ can_subset=True,
111
+ op_tags=op_tags,
112
+ backfill_policy=backfill_policy,
113
+ specs=get_sling_asset_specs(replication_config, dagster_sling_translator, partitions_def),
114
+ pool=pool,
115
+ )
116
+
117
+
118
+ def get_sling_asset_specs(
119
+ replication_config: SlingReplicationParam,
120
+ dagster_sling_translator: Optional[DagsterSlingTranslator] = None,
121
+ partitions_def: Optional[PartitionsDefinition] = None,
122
+ ) -> list[AssetSpec]:
123
+ replication_config = validate_replication(replication_config)
124
+
125
+ raw_streams = get_streams_from_replication(replication_config)
126
+
127
+ streams = streams_with_default_dagster_meta(raw_streams, replication_config)
128
+
129
+ code_version = non_secure_md5_hash_str(str(replication_config).encode())
130
+
131
+ dagster_sling_translator = (
132
+ check.opt_inst_param(
133
+ dagster_sling_translator, "dagster_sling_translator", DagsterSlingTranslator
134
+ )
135
+ or DagsterSlingTranslator()
136
+ )
137
+
138
+ def update_code_version_if_unset_by_translator(asset_spec: AssetSpec) -> AssetSpec:
139
+ if asset_spec.code_version is None:
140
+ return asset_spec.replace_attributes(code_version=code_version)
141
+ return asset_spec
142
+
143
+ base_specs = [
144
+ update_code_version_if_unset_by_translator(
145
+ dagster_sling_translator.get_asset_spec(stream).merge_attributes(
146
+ metadata={
147
+ METADATA_KEY_TRANSLATOR: dagster_sling_translator,
148
+ METADATA_KEY_REPLICATION_CONFIG: replication_config,
149
+ }
150
+ )
151
+ )
152
+ for stream in streams
153
+ ]
154
+ return [
155
+ spec.replace_attributes(
156
+ partitions_def=partitions_def or spec.partitions_def, skippable=True
157
+ )
158
+ for spec in base_specs
159
+ ]
File without changes
File without changes
@@ -0,0 +1,274 @@
1
+ from collections.abc import Iterator, Mapping, Sequence
2
+ from dataclasses import dataclass, field
3
+ from functools import cached_property
4
+ from pathlib import Path
5
+ from typing import Annotated, Any, Literal, Optional, TypeAlias, Union
6
+
7
+ from dagster import Resolvable, Resolver
8
+ from dagster._annotations import public
9
+ from dagster._core.definitions.assets.definition.asset_spec import AssetSpec
10
+ from dagster._core.definitions.assets.definition.assets_definition import AssetsDefinition
11
+ from dagster._core.definitions.definitions_class import Definitions
12
+ from dagster._core.definitions.events import AssetMaterialization
13
+ from dagster._core.definitions.metadata.source_code import (
14
+ LocalFileCodeReference,
15
+ merge_code_references,
16
+ )
17
+ from dagster._core.definitions.result import MaterializeResult
18
+ from dagster.components.component.component import Component
19
+ from dagster.components.core.context import ComponentLoadContext
20
+ from dagster.components.resolved.context import ResolutionContext
21
+ from dagster.components.resolved.core_models import OpSpec
22
+ from dagster.components.scaffold.scaffold import scaffold_with
23
+ from dagster.components.utils.translation import (
24
+ ComponentTranslator,
25
+ TranslationFn,
26
+ TranslationFnResolver,
27
+ create_component_translator_cls,
28
+ )
29
+ from dagster_shared.utils.warnings import deprecation_warning
30
+ from pydantic import BaseModel, ConfigDict, Field
31
+
32
+ from dagster_sling.asset_decorator import sling_assets
33
+ from dagster_sling.components.sling_replication_collection.scaffolder import (
34
+ SlingReplicationComponentScaffolder,
35
+ )
36
+ from dagster_sling.dagster_sling_translator import DagsterSlingTranslator
37
+ from dagster_sling.resources import AssetExecutionContext, SlingConnectionResource, SlingResource
38
+
39
+ SlingMetadataAddons: TypeAlias = Literal["column_metadata", "row_count"]
40
+
41
+
42
+ @dataclass
43
+ class SlingReplicationSpecModel(Resolvable):
44
+ path: str
45
+ op: Optional[OpSpec] = None
46
+ translation: Optional[
47
+ Annotated[
48
+ TranslationFn[Mapping[str, Any]],
49
+ TranslationFnResolver(
50
+ template_vars_for_translation_fn=lambda data: {"stream_definition": data}
51
+ ),
52
+ ]
53
+ ] = None
54
+ include_metadata: Annotated[
55
+ list[SlingMetadataAddons],
56
+ Resolver.default(
57
+ description="Optionally include additional metadata in materializations generated while executing your Sling models",
58
+ examples=[
59
+ ["row_count"],
60
+ ["row_count", "column_metadata"],
61
+ ],
62
+ ),
63
+ ] = field(default_factory=list)
64
+
65
+
66
+ class SlingConnectionResourcePropertiesModel(Resolvable, BaseModel):
67
+ """Properties of a Sling connection resource."""
68
+
69
+ # each connection type supports a variety of different properties
70
+ model_config = ConfigDict(extra="allow")
71
+
72
+ type: str = Field(
73
+ description="Type of the source connection, must match the Sling connection types. Use 'file' for local storage."
74
+ )
75
+ connection_string: Optional[str] = Field(
76
+ description="The optional connection string for the source database, if not using keyword arguments.",
77
+ default=None,
78
+ )
79
+
80
+
81
+ def resolve_connections(
82
+ context: ResolutionContext,
83
+ connections: Mapping[str, SlingConnectionResourcePropertiesModel],
84
+ ) -> list[SlingConnectionResource]:
85
+ return [
86
+ SlingConnectionResource(
87
+ name=name,
88
+ **context.resolve_value(
89
+ connection if isinstance(connection, dict) else connection.model_dump()
90
+ ),
91
+ )
92
+ for name, connection in connections.items()
93
+ ]
94
+
95
+
96
+ ResolvedSlingConnections: TypeAlias = Annotated[
97
+ list[SlingConnectionResource],
98
+ Resolver(
99
+ resolve_connections, model_field_type=Mapping[str, SlingConnectionResourcePropertiesModel]
100
+ ),
101
+ ]
102
+
103
+
104
+ def resolve_resource(
105
+ context: ResolutionContext,
106
+ sling,
107
+ ) -> Optional[SlingResource]:
108
+ if sling:
109
+ deprecation_warning(
110
+ "The `sling` field is deprecated, use `connections` instead. This field will be removed in a future release.",
111
+ "1.11.1",
112
+ )
113
+ return SlingResource(**context.resolve_value(sling.model_dump())) if sling else None
114
+
115
+
116
+ @public
117
+ @scaffold_with(SlingReplicationComponentScaffolder)
118
+ @dataclass
119
+ class SlingReplicationCollectionComponent(Component, Resolvable):
120
+ """Expose one or more Sling replications to Dagster as assets.
121
+
122
+ To get started, run:
123
+
124
+ ``dg scaffold defs dagster_sling.SlingReplicationCollectionComponent {defs_path}``
125
+
126
+ This will create a defs.yaml as well as a ``replication.yaml``, which is a Sling-specific configuration
127
+ file. See Sling's `documentation <https://docs.slingdata.io/concepts/replication#overview>`_ on ``replication.yaml``.
128
+ """
129
+
130
+ connections: ResolvedSlingConnections = field(default_factory=list)
131
+ replications: Sequence[SlingReplicationSpecModel] = field(default_factory=list)
132
+ resource: Annotated[
133
+ Optional[SlingResource],
134
+ Resolver(resolve_resource, model_field_name="sling"),
135
+ ] = None
136
+
137
+ @cached_property
138
+ def sling_resource(self) -> SlingResource:
139
+ return self.resource or SlingResource(connections=self.connections)
140
+
141
+ @cached_property
142
+ def _base_translator(self) -> DagsterSlingTranslator:
143
+ return DagsterSlingTranslator()
144
+
145
+ @public
146
+ def get_asset_spec(self, stream_definition: Mapping[str, Any]) -> AssetSpec:
147
+ """Generates an AssetSpec for a given Sling stream definition.
148
+
149
+ This method can be overridden in a subclass to customize how Sling stream definitions
150
+ are converted to Dagster asset specs. By default, it delegates to the configured
151
+ DagsterSlingTranslator.
152
+
153
+ Args:
154
+ stream_definition: A dictionary representing a single stream from the Sling
155
+ replication config, containing source and target information
156
+
157
+ Returns:
158
+ An AssetSpec that represents the Sling stream as a Dagster asset
159
+
160
+ Example:
161
+ Override this method to add custom metadata based on stream properties:
162
+
163
+ .. code-block:: python
164
+
165
+ from dagster_sling import SlingReplicationCollectionComponent
166
+ from dagster import AssetSpec
167
+
168
+ class CustomSlingComponent(SlingReplicationCollectionComponent):
169
+ def get_asset_spec(self, stream_definition):
170
+ base_spec = super().get_asset_spec(stream_definition)
171
+ return base_spec.replace_attributes(
172
+ metadata={
173
+ **base_spec.metadata,
174
+ "source": stream_definition.get("source"),
175
+ "target": stream_definition.get("target")
176
+ }
177
+ )
178
+ """
179
+ return self._base_translator.get_asset_spec(stream_definition)
180
+
181
+ def build_asset(
182
+ self, context: ComponentLoadContext, replication_spec_model: SlingReplicationSpecModel
183
+ ) -> AssetsDefinition:
184
+ op_spec = replication_spec_model.op or OpSpec()
185
+ translator = SlingComponentTranslator(self, replication_spec_model, context.path)
186
+
187
+ @sling_assets(
188
+ name=op_spec.name or Path(replication_spec_model.path).stem,
189
+ op_tags=op_spec.tags,
190
+ replication_config=context.path / replication_spec_model.path,
191
+ dagster_sling_translator=translator,
192
+ backfill_policy=op_spec.backfill_policy,
193
+ )
194
+ def _asset(context: AssetExecutionContext):
195
+ yield from self.execute(
196
+ context=context,
197
+ sling=self.sling_resource,
198
+ replication_spec_model=replication_spec_model,
199
+ )
200
+
201
+ return _asset
202
+
203
+ @public
204
+ def execute(
205
+ self,
206
+ context: AssetExecutionContext,
207
+ sling: SlingResource,
208
+ replication_spec_model: SlingReplicationSpecModel,
209
+ ) -> Iterator[Union[AssetMaterialization, MaterializeResult]]:
210
+ """Executes a Sling replication for the selected streams.
211
+
212
+ This method can be overridden in a subclass to customize the replication execution
213
+ behavior, such as adding custom logging, modifying metadata collection, or handling
214
+ results differently.
215
+
216
+ Args:
217
+ context: The asset execution context provided by Dagster
218
+ sling: The SlingResource used to execute the replication
219
+ replication_spec_model: The model containing replication configuration and metadata options
220
+
221
+ Yields:
222
+ AssetMaterialization or MaterializeResult events from the Sling replication
223
+
224
+ Example:
225
+ Override this method to add custom logging during replication:
226
+
227
+ .. code-block:: python
228
+
229
+ from dagster_sling import SlingReplicationCollectionComponent
230
+ from dagster import AssetExecutionContext
231
+
232
+ class CustomSlingComponent(SlingReplicationCollectionComponent):
233
+ def execute(self, context, sling, replication_spec_model):
234
+ context.log.info("Starting Sling replication")
235
+ yield from super().execute(context, sling, replication_spec_model)
236
+ context.log.info("Sling replication completed")
237
+ """
238
+ iterator = sling.replicate(context=context)
239
+ if "column_metadata" in replication_spec_model.include_metadata:
240
+ iterator = iterator.fetch_column_metadata()
241
+ if "row_count" in replication_spec_model.include_metadata:
242
+ iterator = iterator.fetch_row_count()
243
+ yield from iterator
244
+
245
+ def build_defs(self, context: ComponentLoadContext) -> Definitions:
246
+ return Definitions(
247
+ assets=[self.build_asset(context, replication) for replication in self.replications],
248
+ )
249
+
250
+
251
+ class SlingComponentTranslator(
252
+ create_component_translator_cls(SlingReplicationCollectionComponent, DagsterSlingTranslator),
253
+ ComponentTranslator[SlingReplicationCollectionComponent],
254
+ ):
255
+ def __init__(
256
+ self,
257
+ component: SlingReplicationCollectionComponent,
258
+ replication_spec: SlingReplicationSpecModel,
259
+ base_path: Path,
260
+ ):
261
+ self._component = component
262
+ self._replication_spec = replication_spec
263
+ self._base_path = base_path
264
+
265
+ def get_asset_spec(self, stream_definition: Mapping[str, Any]) -> AssetSpec:
266
+ spec = super().get_asset_spec(stream_definition)
267
+ if self._replication_spec.translation is not None:
268
+ spec = self._replication_spec.translation(spec, stream_definition)
269
+
270
+ # always add code references to the replication spec
271
+ code_reference = LocalFileCodeReference(
272
+ file_path=str(self._base_path / self._replication_spec.path)
273
+ )
274
+ return merge_code_references(spec, [code_reference])
@@ -0,0 +1,12 @@
1
+ import yaml
2
+ from dagster.components.component.component_scaffolder import Scaffolder
3
+ from dagster.components.component_scaffolding import scaffold_component
4
+ from dagster.components.scaffold.scaffold import ScaffoldRequest
5
+
6
+
7
+ class SlingReplicationComponentScaffolder(Scaffolder):
8
+ def scaffold(self, request: ScaffoldRequest) -> None:
9
+ scaffold_component(request, {"replications": [{"path": "replication.yaml"}]})
10
+ replication_path = request.target_path / "replication.yaml"
11
+ with open(replication_path, "w") as f:
12
+ yaml.dump({"source": {}, "target": {}, "streams": {}}, f)