dagster-sling 0.28.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dagster_sling/__init__.py +24 -0
- dagster_sling/asset_decorator.py +159 -0
- dagster_sling/asset_defs.py +0 -0
- dagster_sling/components/__init__.py +0 -0
- dagster_sling/components/sling_replication_collection/__init__.py +0 -0
- dagster_sling/components/sling_replication_collection/component.py +274 -0
- dagster_sling/components/sling_replication_collection/scaffolder.py +12 -0
- dagster_sling/dagster_sling_translator.py +509 -0
- dagster_sling/py.typed +1 -0
- dagster_sling/resources.py +646 -0
- dagster_sling/sling_event_iterator.py +236 -0
- dagster_sling/sling_replication.py +33 -0
- dagster_sling/version.py +1 -0
- dagster_sling-0.28.8.dist-info/METADATA +33 -0
- dagster_sling-0.28.8.dist-info/RECORD +19 -0
- dagster_sling-0.28.8.dist-info/WHEEL +5 -0
- dagster_sling-0.28.8.dist-info/entry_points.txt +2 -0
- dagster_sling-0.28.8.dist-info/licenses/LICENSE +201 -0
- dagster_sling-0.28.8.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from dagster_shared.libraries import DagsterLibraryRegistry
|
|
2
|
+
|
|
3
|
+
from dagster_sling.asset_decorator import sling_assets
|
|
4
|
+
from dagster_sling.components.sling_replication_collection.component import (
|
|
5
|
+
SlingReplicationCollectionComponent,
|
|
6
|
+
SlingReplicationSpecModel,
|
|
7
|
+
)
|
|
8
|
+
from dagster_sling.dagster_sling_translator import DagsterSlingTranslator
|
|
9
|
+
from dagster_sling.resources import SlingConnectionResource, SlingMode, SlingResource
|
|
10
|
+
from dagster_sling.sling_replication import SlingReplicationParam
|
|
11
|
+
from dagster_sling.version import __version__
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"DagsterSlingTranslator",
|
|
15
|
+
"SlingConnectionResource",
|
|
16
|
+
"SlingMode",
|
|
17
|
+
"SlingReplicationCollectionComponent",
|
|
18
|
+
"SlingReplicationParam",
|
|
19
|
+
"SlingReplicationSpecModel",
|
|
20
|
+
"SlingResource",
|
|
21
|
+
"sling_assets",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
DagsterLibraryRegistry.register("dagster-sling", __version__)
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
from collections.abc import Callable, Iterable, Mapping
|
|
2
|
+
from copy import deepcopy
|
|
3
|
+
from typing import Any, Optional
|
|
4
|
+
|
|
5
|
+
from dagster import (
|
|
6
|
+
AssetsDefinition,
|
|
7
|
+
BackfillPolicy,
|
|
8
|
+
PartitionsDefinition,
|
|
9
|
+
_check as check,
|
|
10
|
+
multi_asset,
|
|
11
|
+
)
|
|
12
|
+
from dagster._core.definitions.assets.definition.asset_spec import AssetSpec
|
|
13
|
+
from dagster._utils.merger import deep_merge_dicts
|
|
14
|
+
from dagster._utils.security import non_secure_md5_hash_str
|
|
15
|
+
|
|
16
|
+
from dagster_sling.dagster_sling_translator import DagsterSlingTranslator
|
|
17
|
+
from dagster_sling.sling_replication import SlingReplicationParam, validate_replication
|
|
18
|
+
|
|
19
|
+
METADATA_KEY_TRANSLATOR = "dagster_sling/dagster_sling_translator"
|
|
20
|
+
METADATA_KEY_REPLICATION_CONFIG = "dagster_sling/sling_replication_config"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_streams_from_replication(
|
|
24
|
+
replication_config: Mapping[str, Any],
|
|
25
|
+
) -> Iterable[Mapping[str, Any]]:
|
|
26
|
+
"""Returns a list of streams and their configs from a Sling replication config."""
|
|
27
|
+
for stream, config in replication_config.get("streams", {}).items():
|
|
28
|
+
if config and config.get("disabled", False):
|
|
29
|
+
continue
|
|
30
|
+
yield {"name": stream, "config": config}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def streams_with_default_dagster_meta(
|
|
34
|
+
streams: Iterable[Mapping[str, Any]], replication_config: Mapping[str, Any]
|
|
35
|
+
) -> Iterable[Mapping[str, Any]]:
|
|
36
|
+
"""Ensures dagster meta configs in the `defaults` block of the replication_config are passed to
|
|
37
|
+
the assets definition object.
|
|
38
|
+
"""
|
|
39
|
+
default_dagster_meta = replication_config.get("defaults", {}).get("meta", {}).get("dagster", {})
|
|
40
|
+
if not default_dagster_meta:
|
|
41
|
+
yield from streams
|
|
42
|
+
else:
|
|
43
|
+
for stream in streams:
|
|
44
|
+
name = stream["name"]
|
|
45
|
+
config = deepcopy(stream["config"])
|
|
46
|
+
if not config:
|
|
47
|
+
config = {"meta": {"dagster": default_dagster_meta}}
|
|
48
|
+
else:
|
|
49
|
+
config["meta"] = deep_merge_dicts(
|
|
50
|
+
{"dagster": default_dagster_meta}, config.get("meta", {})
|
|
51
|
+
)
|
|
52
|
+
yield {"name": name, "config": config}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def sling_assets(
|
|
56
|
+
*,
|
|
57
|
+
replication_config: SlingReplicationParam,
|
|
58
|
+
dagster_sling_translator: Optional[DagsterSlingTranslator] = None,
|
|
59
|
+
name: Optional[str] = None,
|
|
60
|
+
partitions_def: Optional[PartitionsDefinition] = None,
|
|
61
|
+
backfill_policy: Optional[BackfillPolicy] = None,
|
|
62
|
+
op_tags: Optional[Mapping[str, Any]] = None,
|
|
63
|
+
pool: Optional[str] = None,
|
|
64
|
+
) -> Callable[[Callable[..., Any]], AssetsDefinition]:
|
|
65
|
+
"""Create a definition for how to materialize a set of Sling replication streams as Dagster assets, as
|
|
66
|
+
described by a Sling replication config. This will create on Asset for every Sling target stream.
|
|
67
|
+
|
|
68
|
+
A Sling Replication config is a configuration that maps sources to destinations. For the full
|
|
69
|
+
spec and descriptions, see `Sling's Documentation <https://docs.slingdata.io/sling-cli/run/configuration>`_.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
replication_config (Union[Mapping[str, Any], str, Path]): A path to a Sling replication config, or a dictionary
|
|
73
|
+
of a replication config.
|
|
74
|
+
dagster_sling_translator: (DagsterSlingTranslator): Allows customization of how to map a Sling stream to a Dagster
|
|
75
|
+
AssetKey.
|
|
76
|
+
name (Optional[str]: The name of the op.
|
|
77
|
+
partitions_def (Optional[PartitionsDefinition]): The partitions definition for this asset.
|
|
78
|
+
backfill_policy (Optional[BackfillPolicy]): The backfill policy for this asset.
|
|
79
|
+
op_tags (Optional[Mapping[str, Any]]): The tags for the underlying op.
|
|
80
|
+
pool (Optional[str]): A string that identifies the concurrency pool that governs the sling assets' execution.
|
|
81
|
+
|
|
82
|
+
Examples:
|
|
83
|
+
Running a sync by providing a path to a Sling Replication config:
|
|
84
|
+
|
|
85
|
+
.. code-block:: python
|
|
86
|
+
|
|
87
|
+
from dagster_sling import sling_assets, SlingResource, SlingConnectionResource
|
|
88
|
+
|
|
89
|
+
sling_resource = SlingResource(
|
|
90
|
+
connections=[
|
|
91
|
+
SlingConnectionResource(
|
|
92
|
+
name="MY_POSTGRES", type="postgres", connection_string=EnvVar("POSTGRES_URL")
|
|
93
|
+
),
|
|
94
|
+
SlingConnectionResource(
|
|
95
|
+
name="MY_DUCKDB",
|
|
96
|
+
type="duckdb",
|
|
97
|
+
connection_string="duckdb:///var/tmp/duckdb.db",
|
|
98
|
+
),
|
|
99
|
+
]
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
config_path = "/path/to/replication.yaml"
|
|
103
|
+
@sling_assets(replication_config=config_path)
|
|
104
|
+
def my_assets(context, sling: SlingResource):
|
|
105
|
+
yield from sling.replicate(context=context)
|
|
106
|
+
"""
|
|
107
|
+
return multi_asset(
|
|
108
|
+
name=name,
|
|
109
|
+
partitions_def=partitions_def,
|
|
110
|
+
can_subset=True,
|
|
111
|
+
op_tags=op_tags,
|
|
112
|
+
backfill_policy=backfill_policy,
|
|
113
|
+
specs=get_sling_asset_specs(replication_config, dagster_sling_translator, partitions_def),
|
|
114
|
+
pool=pool,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def get_sling_asset_specs(
|
|
119
|
+
replication_config: SlingReplicationParam,
|
|
120
|
+
dagster_sling_translator: Optional[DagsterSlingTranslator] = None,
|
|
121
|
+
partitions_def: Optional[PartitionsDefinition] = None,
|
|
122
|
+
) -> list[AssetSpec]:
|
|
123
|
+
replication_config = validate_replication(replication_config)
|
|
124
|
+
|
|
125
|
+
raw_streams = get_streams_from_replication(replication_config)
|
|
126
|
+
|
|
127
|
+
streams = streams_with_default_dagster_meta(raw_streams, replication_config)
|
|
128
|
+
|
|
129
|
+
code_version = non_secure_md5_hash_str(str(replication_config).encode())
|
|
130
|
+
|
|
131
|
+
dagster_sling_translator = (
|
|
132
|
+
check.opt_inst_param(
|
|
133
|
+
dagster_sling_translator, "dagster_sling_translator", DagsterSlingTranslator
|
|
134
|
+
)
|
|
135
|
+
or DagsterSlingTranslator()
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
def update_code_version_if_unset_by_translator(asset_spec: AssetSpec) -> AssetSpec:
|
|
139
|
+
if asset_spec.code_version is None:
|
|
140
|
+
return asset_spec.replace_attributes(code_version=code_version)
|
|
141
|
+
return asset_spec
|
|
142
|
+
|
|
143
|
+
base_specs = [
|
|
144
|
+
update_code_version_if_unset_by_translator(
|
|
145
|
+
dagster_sling_translator.get_asset_spec(stream).merge_attributes(
|
|
146
|
+
metadata={
|
|
147
|
+
METADATA_KEY_TRANSLATOR: dagster_sling_translator,
|
|
148
|
+
METADATA_KEY_REPLICATION_CONFIG: replication_config,
|
|
149
|
+
}
|
|
150
|
+
)
|
|
151
|
+
)
|
|
152
|
+
for stream in streams
|
|
153
|
+
]
|
|
154
|
+
return [
|
|
155
|
+
spec.replace_attributes(
|
|
156
|
+
partitions_def=partitions_def or spec.partitions_def, skippable=True
|
|
157
|
+
)
|
|
158
|
+
for spec in base_specs
|
|
159
|
+
]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
from collections.abc import Iterator, Mapping, Sequence
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from functools import cached_property
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Annotated, Any, Literal, Optional, TypeAlias, Union
|
|
6
|
+
|
|
7
|
+
from dagster import Resolvable, Resolver
|
|
8
|
+
from dagster._annotations import public
|
|
9
|
+
from dagster._core.definitions.assets.definition.asset_spec import AssetSpec
|
|
10
|
+
from dagster._core.definitions.assets.definition.assets_definition import AssetsDefinition
|
|
11
|
+
from dagster._core.definitions.definitions_class import Definitions
|
|
12
|
+
from dagster._core.definitions.events import AssetMaterialization
|
|
13
|
+
from dagster._core.definitions.metadata.source_code import (
|
|
14
|
+
LocalFileCodeReference,
|
|
15
|
+
merge_code_references,
|
|
16
|
+
)
|
|
17
|
+
from dagster._core.definitions.result import MaterializeResult
|
|
18
|
+
from dagster.components.component.component import Component
|
|
19
|
+
from dagster.components.core.context import ComponentLoadContext
|
|
20
|
+
from dagster.components.resolved.context import ResolutionContext
|
|
21
|
+
from dagster.components.resolved.core_models import OpSpec
|
|
22
|
+
from dagster.components.scaffold.scaffold import scaffold_with
|
|
23
|
+
from dagster.components.utils.translation import (
|
|
24
|
+
ComponentTranslator,
|
|
25
|
+
TranslationFn,
|
|
26
|
+
TranslationFnResolver,
|
|
27
|
+
create_component_translator_cls,
|
|
28
|
+
)
|
|
29
|
+
from dagster_shared.utils.warnings import deprecation_warning
|
|
30
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
31
|
+
|
|
32
|
+
from dagster_sling.asset_decorator import sling_assets
|
|
33
|
+
from dagster_sling.components.sling_replication_collection.scaffolder import (
|
|
34
|
+
SlingReplicationComponentScaffolder,
|
|
35
|
+
)
|
|
36
|
+
from dagster_sling.dagster_sling_translator import DagsterSlingTranslator
|
|
37
|
+
from dagster_sling.resources import AssetExecutionContext, SlingConnectionResource, SlingResource
|
|
38
|
+
|
|
39
|
+
SlingMetadataAddons: TypeAlias = Literal["column_metadata", "row_count"]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class SlingReplicationSpecModel(Resolvable):
|
|
44
|
+
path: str
|
|
45
|
+
op: Optional[OpSpec] = None
|
|
46
|
+
translation: Optional[
|
|
47
|
+
Annotated[
|
|
48
|
+
TranslationFn[Mapping[str, Any]],
|
|
49
|
+
TranslationFnResolver(
|
|
50
|
+
template_vars_for_translation_fn=lambda data: {"stream_definition": data}
|
|
51
|
+
),
|
|
52
|
+
]
|
|
53
|
+
] = None
|
|
54
|
+
include_metadata: Annotated[
|
|
55
|
+
list[SlingMetadataAddons],
|
|
56
|
+
Resolver.default(
|
|
57
|
+
description="Optionally include additional metadata in materializations generated while executing your Sling models",
|
|
58
|
+
examples=[
|
|
59
|
+
["row_count"],
|
|
60
|
+
["row_count", "column_metadata"],
|
|
61
|
+
],
|
|
62
|
+
),
|
|
63
|
+
] = field(default_factory=list)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class SlingConnectionResourcePropertiesModel(Resolvable, BaseModel):
|
|
67
|
+
"""Properties of a Sling connection resource."""
|
|
68
|
+
|
|
69
|
+
# each connection type supports a variety of different properties
|
|
70
|
+
model_config = ConfigDict(extra="allow")
|
|
71
|
+
|
|
72
|
+
type: str = Field(
|
|
73
|
+
description="Type of the source connection, must match the Sling connection types. Use 'file' for local storage."
|
|
74
|
+
)
|
|
75
|
+
connection_string: Optional[str] = Field(
|
|
76
|
+
description="The optional connection string for the source database, if not using keyword arguments.",
|
|
77
|
+
default=None,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def resolve_connections(
|
|
82
|
+
context: ResolutionContext,
|
|
83
|
+
connections: Mapping[str, SlingConnectionResourcePropertiesModel],
|
|
84
|
+
) -> list[SlingConnectionResource]:
|
|
85
|
+
return [
|
|
86
|
+
SlingConnectionResource(
|
|
87
|
+
name=name,
|
|
88
|
+
**context.resolve_value(
|
|
89
|
+
connection if isinstance(connection, dict) else connection.model_dump()
|
|
90
|
+
),
|
|
91
|
+
)
|
|
92
|
+
for name, connection in connections.items()
|
|
93
|
+
]
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
ResolvedSlingConnections: TypeAlias = Annotated[
|
|
97
|
+
list[SlingConnectionResource],
|
|
98
|
+
Resolver(
|
|
99
|
+
resolve_connections, model_field_type=Mapping[str, SlingConnectionResourcePropertiesModel]
|
|
100
|
+
),
|
|
101
|
+
]
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def resolve_resource(
|
|
105
|
+
context: ResolutionContext,
|
|
106
|
+
sling,
|
|
107
|
+
) -> Optional[SlingResource]:
|
|
108
|
+
if sling:
|
|
109
|
+
deprecation_warning(
|
|
110
|
+
"The `sling` field is deprecated, use `connections` instead. This field will be removed in a future release.",
|
|
111
|
+
"1.11.1",
|
|
112
|
+
)
|
|
113
|
+
return SlingResource(**context.resolve_value(sling.model_dump())) if sling else None
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
@public
|
|
117
|
+
@scaffold_with(SlingReplicationComponentScaffolder)
|
|
118
|
+
@dataclass
|
|
119
|
+
class SlingReplicationCollectionComponent(Component, Resolvable):
|
|
120
|
+
"""Expose one or more Sling replications to Dagster as assets.
|
|
121
|
+
|
|
122
|
+
To get started, run:
|
|
123
|
+
|
|
124
|
+
``dg scaffold defs dagster_sling.SlingReplicationCollectionComponent {defs_path}``
|
|
125
|
+
|
|
126
|
+
This will create a defs.yaml as well as a ``replication.yaml``, which is a Sling-specific configuration
|
|
127
|
+
file. See Sling's `documentation <https://docs.slingdata.io/concepts/replication#overview>`_ on ``replication.yaml``.
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
connections: ResolvedSlingConnections = field(default_factory=list)
|
|
131
|
+
replications: Sequence[SlingReplicationSpecModel] = field(default_factory=list)
|
|
132
|
+
resource: Annotated[
|
|
133
|
+
Optional[SlingResource],
|
|
134
|
+
Resolver(resolve_resource, model_field_name="sling"),
|
|
135
|
+
] = None
|
|
136
|
+
|
|
137
|
+
@cached_property
|
|
138
|
+
def sling_resource(self) -> SlingResource:
|
|
139
|
+
return self.resource or SlingResource(connections=self.connections)
|
|
140
|
+
|
|
141
|
+
@cached_property
|
|
142
|
+
def _base_translator(self) -> DagsterSlingTranslator:
|
|
143
|
+
return DagsterSlingTranslator()
|
|
144
|
+
|
|
145
|
+
@public
|
|
146
|
+
def get_asset_spec(self, stream_definition: Mapping[str, Any]) -> AssetSpec:
|
|
147
|
+
"""Generates an AssetSpec for a given Sling stream definition.
|
|
148
|
+
|
|
149
|
+
This method can be overridden in a subclass to customize how Sling stream definitions
|
|
150
|
+
are converted to Dagster asset specs. By default, it delegates to the configured
|
|
151
|
+
DagsterSlingTranslator.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
stream_definition: A dictionary representing a single stream from the Sling
|
|
155
|
+
replication config, containing source and target information
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
An AssetSpec that represents the Sling stream as a Dagster asset
|
|
159
|
+
|
|
160
|
+
Example:
|
|
161
|
+
Override this method to add custom metadata based on stream properties:
|
|
162
|
+
|
|
163
|
+
.. code-block:: python
|
|
164
|
+
|
|
165
|
+
from dagster_sling import SlingReplicationCollectionComponent
|
|
166
|
+
from dagster import AssetSpec
|
|
167
|
+
|
|
168
|
+
class CustomSlingComponent(SlingReplicationCollectionComponent):
|
|
169
|
+
def get_asset_spec(self, stream_definition):
|
|
170
|
+
base_spec = super().get_asset_spec(stream_definition)
|
|
171
|
+
return base_spec.replace_attributes(
|
|
172
|
+
metadata={
|
|
173
|
+
**base_spec.metadata,
|
|
174
|
+
"source": stream_definition.get("source"),
|
|
175
|
+
"target": stream_definition.get("target")
|
|
176
|
+
}
|
|
177
|
+
)
|
|
178
|
+
"""
|
|
179
|
+
return self._base_translator.get_asset_spec(stream_definition)
|
|
180
|
+
|
|
181
|
+
def build_asset(
|
|
182
|
+
self, context: ComponentLoadContext, replication_spec_model: SlingReplicationSpecModel
|
|
183
|
+
) -> AssetsDefinition:
|
|
184
|
+
op_spec = replication_spec_model.op or OpSpec()
|
|
185
|
+
translator = SlingComponentTranslator(self, replication_spec_model, context.path)
|
|
186
|
+
|
|
187
|
+
@sling_assets(
|
|
188
|
+
name=op_spec.name or Path(replication_spec_model.path).stem,
|
|
189
|
+
op_tags=op_spec.tags,
|
|
190
|
+
replication_config=context.path / replication_spec_model.path,
|
|
191
|
+
dagster_sling_translator=translator,
|
|
192
|
+
backfill_policy=op_spec.backfill_policy,
|
|
193
|
+
)
|
|
194
|
+
def _asset(context: AssetExecutionContext):
|
|
195
|
+
yield from self.execute(
|
|
196
|
+
context=context,
|
|
197
|
+
sling=self.sling_resource,
|
|
198
|
+
replication_spec_model=replication_spec_model,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
return _asset
|
|
202
|
+
|
|
203
|
+
@public
|
|
204
|
+
def execute(
|
|
205
|
+
self,
|
|
206
|
+
context: AssetExecutionContext,
|
|
207
|
+
sling: SlingResource,
|
|
208
|
+
replication_spec_model: SlingReplicationSpecModel,
|
|
209
|
+
) -> Iterator[Union[AssetMaterialization, MaterializeResult]]:
|
|
210
|
+
"""Executes a Sling replication for the selected streams.
|
|
211
|
+
|
|
212
|
+
This method can be overridden in a subclass to customize the replication execution
|
|
213
|
+
behavior, such as adding custom logging, modifying metadata collection, or handling
|
|
214
|
+
results differently.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
context: The asset execution context provided by Dagster
|
|
218
|
+
sling: The SlingResource used to execute the replication
|
|
219
|
+
replication_spec_model: The model containing replication configuration and metadata options
|
|
220
|
+
|
|
221
|
+
Yields:
|
|
222
|
+
AssetMaterialization or MaterializeResult events from the Sling replication
|
|
223
|
+
|
|
224
|
+
Example:
|
|
225
|
+
Override this method to add custom logging during replication:
|
|
226
|
+
|
|
227
|
+
.. code-block:: python
|
|
228
|
+
|
|
229
|
+
from dagster_sling import SlingReplicationCollectionComponent
|
|
230
|
+
from dagster import AssetExecutionContext
|
|
231
|
+
|
|
232
|
+
class CustomSlingComponent(SlingReplicationCollectionComponent):
|
|
233
|
+
def execute(self, context, sling, replication_spec_model):
|
|
234
|
+
context.log.info("Starting Sling replication")
|
|
235
|
+
yield from super().execute(context, sling, replication_spec_model)
|
|
236
|
+
context.log.info("Sling replication completed")
|
|
237
|
+
"""
|
|
238
|
+
iterator = sling.replicate(context=context)
|
|
239
|
+
if "column_metadata" in replication_spec_model.include_metadata:
|
|
240
|
+
iterator = iterator.fetch_column_metadata()
|
|
241
|
+
if "row_count" in replication_spec_model.include_metadata:
|
|
242
|
+
iterator = iterator.fetch_row_count()
|
|
243
|
+
yield from iterator
|
|
244
|
+
|
|
245
|
+
def build_defs(self, context: ComponentLoadContext) -> Definitions:
|
|
246
|
+
return Definitions(
|
|
247
|
+
assets=[self.build_asset(context, replication) for replication in self.replications],
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
class SlingComponentTranslator(
|
|
252
|
+
create_component_translator_cls(SlingReplicationCollectionComponent, DagsterSlingTranslator),
|
|
253
|
+
ComponentTranslator[SlingReplicationCollectionComponent],
|
|
254
|
+
):
|
|
255
|
+
def __init__(
|
|
256
|
+
self,
|
|
257
|
+
component: SlingReplicationCollectionComponent,
|
|
258
|
+
replication_spec: SlingReplicationSpecModel,
|
|
259
|
+
base_path: Path,
|
|
260
|
+
):
|
|
261
|
+
self._component = component
|
|
262
|
+
self._replication_spec = replication_spec
|
|
263
|
+
self._base_path = base_path
|
|
264
|
+
|
|
265
|
+
def get_asset_spec(self, stream_definition: Mapping[str, Any]) -> AssetSpec:
|
|
266
|
+
spec = super().get_asset_spec(stream_definition)
|
|
267
|
+
if self._replication_spec.translation is not None:
|
|
268
|
+
spec = self._replication_spec.translation(spec, stream_definition)
|
|
269
|
+
|
|
270
|
+
# always add code references to the replication spec
|
|
271
|
+
code_reference = LocalFileCodeReference(
|
|
272
|
+
file_path=str(self._base_path / self._replication_spec.path)
|
|
273
|
+
)
|
|
274
|
+
return merge_code_references(spec, [code_reference])
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import yaml
|
|
2
|
+
from dagster.components.component.component_scaffolder import Scaffolder
|
|
3
|
+
from dagster.components.component_scaffolding import scaffold_component
|
|
4
|
+
from dagster.components.scaffold.scaffold import ScaffoldRequest
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class SlingReplicationComponentScaffolder(Scaffolder):
|
|
8
|
+
def scaffold(self, request: ScaffoldRequest) -> None:
|
|
9
|
+
scaffold_component(request, {"replications": [{"path": "replication.yaml"}]})
|
|
10
|
+
replication_path = request.target_path / "replication.yaml"
|
|
11
|
+
with open(replication_path, "w") as f:
|
|
12
|
+
yaml.dump({"source": {}, "target": {}, "streams": {}}, f)
|