dagster-sling 0.25.9__tar.gz → 0.25.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dagster-sling might be problematic. Click here for more details.

Files changed (24) hide show
  1. {dagster-sling-0.25.9/dagster_sling.egg-info → dagster-sling-0.25.11}/PKG-INFO +1 -1
  2. {dagster-sling-0.25.9 → dagster-sling-0.25.11}/dagster_sling/asset_decorator.py +4 -15
  3. dagster-sling-0.25.11/dagster_sling/dagster_sling_translator.py +564 -0
  4. {dagster-sling-0.25.9 → dagster-sling-0.25.11}/dagster_sling/resources.py +2 -2
  5. dagster-sling-0.25.11/dagster_sling/version.py +1 -0
  6. {dagster-sling-0.25.9 → dagster-sling-0.25.11/dagster_sling.egg-info}/PKG-INFO +1 -1
  7. {dagster-sling-0.25.9 → dagster-sling-0.25.11}/dagster_sling.egg-info/requires.txt +1 -1
  8. {dagster-sling-0.25.9 → dagster-sling-0.25.11}/setup.py +1 -1
  9. dagster-sling-0.25.9/dagster_sling/dagster_sling_translator.py +0 -297
  10. dagster-sling-0.25.9/dagster_sling/version.py +0 -1
  11. {dagster-sling-0.25.9 → dagster-sling-0.25.11}/LICENSE +0 -0
  12. {dagster-sling-0.25.9 → dagster-sling-0.25.11}/MANIFEST.in +0 -0
  13. {dagster-sling-0.25.9 → dagster-sling-0.25.11}/README.md +0 -0
  14. {dagster-sling-0.25.9 → dagster-sling-0.25.11}/dagster_sling/__init__.py +0 -0
  15. {dagster-sling-0.25.9 → dagster-sling-0.25.11}/dagster_sling/asset_defs.py +0 -0
  16. {dagster-sling-0.25.9 → dagster-sling-0.25.11}/dagster_sling/py.typed +0 -0
  17. {dagster-sling-0.25.9 → dagster-sling-0.25.11}/dagster_sling/sling_event_iterator.py +0 -0
  18. {dagster-sling-0.25.9 → dagster-sling-0.25.11}/dagster_sling/sling_replication.py +0 -0
  19. {dagster-sling-0.25.9 → dagster-sling-0.25.11}/dagster_sling.egg-info/SOURCES.txt +0 -0
  20. {dagster-sling-0.25.9 → dagster-sling-0.25.11}/dagster_sling.egg-info/dependency_links.txt +0 -0
  21. {dagster-sling-0.25.9 → dagster-sling-0.25.11}/dagster_sling.egg-info/not-zip-safe +0 -0
  22. {dagster-sling-0.25.9 → dagster-sling-0.25.11}/dagster_sling.egg-info/top_level.txt +0 -0
  23. {dagster-sling-0.25.9 → dagster-sling-0.25.11}/integration.yaml +0 -0
  24. {dagster-sling-0.25.9 → dagster-sling-0.25.11}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dagster-sling
3
- Version: 0.25.9
3
+ Version: 0.25.11
4
4
  Summary: Package for performing ETL/ELT tasks with Sling in Dagster.
5
5
  Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-sling
6
6
  Author: Dagster Labs
@@ -4,7 +4,6 @@ from typing import Any, Callable, Optional
4
4
 
5
5
  from dagster import (
6
6
  AssetsDefinition,
7
- AssetSpec,
8
7
  BackfillPolicy,
9
8
  PartitionsDefinition,
10
9
  _check as check,
@@ -124,23 +123,13 @@ def sling_assets(
124
123
  op_tags=op_tags,
125
124
  backfill_policy=backfill_policy,
126
125
  specs=[
127
- AssetSpec(
128
- key=dagster_sling_translator.get_asset_key(stream),
129
- deps=dagster_sling_translator.get_deps_asset_key(stream),
130
- description=dagster_sling_translator.get_description(stream),
126
+ dagster_sling_translator.get_asset_spec(stream)
127
+ .replace_attributes(code_version=code_version)
128
+ .merge_attributes(
131
129
  metadata={
132
- **dagster_sling_translator.get_metadata(stream),
133
130
  METADATA_KEY_TRANSLATOR: dagster_sling_translator,
134
131
  METADATA_KEY_REPLICATION_CONFIG: replication_config,
135
- },
136
- tags=dagster_sling_translator.get_tags(stream),
137
- kinds=dagster_sling_translator.get_kinds(stream),
138
- group_name=dagster_sling_translator.get_group_name(stream),
139
- freshness_policy=dagster_sling_translator.get_freshness_policy(stream),
140
- auto_materialize_policy=dagster_sling_translator.get_auto_materialize_policy(
141
- stream
142
- ),
143
- code_version=code_version,
132
+ }
144
133
  )
145
134
  for stream in streams
146
135
  ],
@@ -0,0 +1,564 @@
1
+ import re
2
+ from collections.abc import Iterable, Mapping
3
+ from dataclasses import dataclass
4
+ from typing import Any, Callable, Optional
5
+
6
+ from dagster import AssetKey, AssetSpec, AutoMaterializePolicy, FreshnessPolicy, MetadataValue
7
+ from dagster._annotations import public, superseded
8
+ from dagster._utils.warnings import supersession_warning
9
+
10
+
11
+ @dataclass
12
+ class DagsterSlingTranslator:
13
+ target_prefix: str = "target"
14
+
15
+ @public
16
+ def get_asset_spec(self, stream_definition: Mapping[str, Any]) -> AssetSpec:
17
+ """A function that takes a stream definition from a Sling replication config and returns a
18
+ Dagster AssetSpec.
19
+
20
+ The stream definition is a dictionary key/value pair where the key is the stream name and
21
+ the value is a dictionary representing the Sling Replication Stream Config.
22
+ """
23
+ return AssetSpec(
24
+ key=self._resolve_back_compat_method(
25
+ "get_asset_key", self._default_asset_key_fn, stream_definition
26
+ ),
27
+ deps=self._resolve_back_compat_method(
28
+ "get_deps_asset_key", self._default_deps_fn, stream_definition
29
+ ),
30
+ description=self._resolve_back_compat_method(
31
+ "get_description", self._default_description_fn, stream_definition
32
+ ),
33
+ metadata=self._resolve_back_compat_method(
34
+ "get_metadata", self._default_metadata_fn, stream_definition
35
+ ),
36
+ tags=self._resolve_back_compat_method(
37
+ "get_tags", self._default_tags_fn, stream_definition
38
+ ),
39
+ kinds=self._resolve_back_compat_method(
40
+ "get_kinds", self._default_kinds_fn, stream_definition
41
+ ),
42
+ group_name=self._resolve_back_compat_method(
43
+ "get_group_name", self._default_group_name_fn, stream_definition
44
+ ),
45
+ freshness_policy=self._resolve_back_compat_method(
46
+ "get_freshness_policy", self._default_freshness_policy_fn, stream_definition
47
+ ),
48
+ auto_materialize_policy=self._resolve_back_compat_method(
49
+ "get_auto_materialize_policy",
50
+ self._default_auto_materialize_policy_fn,
51
+ stream_definition,
52
+ ),
53
+ )
54
+
55
+ def _resolve_back_compat_method(
56
+ self,
57
+ method_name: str,
58
+ default_fn: Callable[[Mapping[str, Any]], Any],
59
+ stream_definition: Mapping[str, Any],
60
+ ):
61
+ method = getattr(type(self), method_name)
62
+ base_method = getattr(DagsterSlingTranslator, method_name)
63
+ if method is not base_method: # user defined this
64
+ supersession_warning(
65
+ subject=method_name,
66
+ additional_warn_text=(
67
+ f"Instead of overriding DagsterSlingTranslator.{method_name}(), "
68
+ f"override DagsterSlingTranslator.get_asset_spec()."
69
+ ),
70
+ )
71
+ return method(self, stream_definition)
72
+ else:
73
+ return default_fn(stream_definition)
74
+
75
+ @public
76
+ def sanitize_stream_name(self, stream_name: str) -> str:
77
+ """A function that takes a stream name from a Sling replication config and returns a
78
+ sanitized name for the stream.
79
+
80
+ By default, this removes any non-alphanumeric characters from the stream name and replaces
81
+ them with underscores, while removing any double quotes.
82
+
83
+ Args:
84
+ stream_name (str): The name of the stream.
85
+
86
+ Examples:
87
+ Using a custom stream name sanitizer:
88
+
89
+ .. code-block:: python
90
+
91
+ class CustomSlingTranslator(DagsterSlingTranslator):
92
+ def sanitize_stream_name(self, stream_name: str) -> str:
93
+ return stream_name.replace(".", "")
94
+ """
95
+ return re.sub(r"[^a-zA-Z0-9_.]", "_", stream_name.replace('"', "").lower())
96
+
97
+ @superseded(
98
+ additional_warn_text="Use `DagsterSlingTranslator.get_asset_spec(...).key` instead.",
99
+ )
100
+ @public
101
+ def get_asset_key(self, stream_definition: Mapping[str, Any]) -> AssetKey:
102
+ """A function that takes a stream definition from a Sling replication config and returns a
103
+ Dagster AssetKey.
104
+
105
+ The stream definition is a dictionary key/value pair where the key is the stream name and
106
+ the value is a dictionary representing the Sling Replication Stream Config.
107
+
108
+ For example:
109
+
110
+ .. code-block:: python
111
+
112
+ stream_definition = {"public.users":
113
+ {'sql': 'select all_user_id, name from public."all_Users"',
114
+ 'object': 'public.all_users'}
115
+ }
116
+
117
+ By default, this returns the class's target_prefix parameter concatenated with the stream name.
118
+ A stream named "public.accounts" will create an AssetKey named "target_public_accounts".
119
+
120
+ Override this function to customize how to map a Sling stream to a Dagster AssetKey.
121
+
122
+ Alternatively, you can provide metadata in your Sling replication config to specify the
123
+ Dagster AssetKey for a stream as follows:
124
+
125
+ .. code-block:: yaml
126
+
127
+ public.users:
128
+ meta:
129
+ dagster:
130
+ asset_key: "mydb_users"
131
+
132
+ Args:
133
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition
134
+
135
+ Returns:
136
+ AssetKey: The Dagster AssetKey for the replication stream.
137
+
138
+ Examples:
139
+ Using a custom mapping for streams:
140
+
141
+ .. code-block:: python
142
+
143
+ class CustomSlingTranslator(DagsterSlingTranslator):
144
+ def get_asset_spec(self, stream_definition: Mapping[str, Any]) -> AssetKey:
145
+ default_spec = super().get_asset_spec(stream_definition)
146
+ map = {"stream1": "asset1", "stream2": "asset2"}
147
+ return default_spec.replace_attributes(key=AssetKey(map[stream_definition["name"]]))
148
+ """
149
+ return self._default_asset_key_fn(stream_definition)
150
+
151
+ def _default_asset_key_fn(self, stream_definition: Mapping[str, Any]) -> AssetKey:
152
+ """A function that takes a stream definition from a Sling replication config and returns a
153
+ Dagster AssetKey.
154
+
155
+ The stream definition is a dictionary key/value pair where the key is the stream name and
156
+ the value is a dictionary representing the Sling Replication Stream Config.
157
+
158
+ For example:
159
+
160
+ .. code-block:: python
161
+
162
+ stream_definition = {"public.users":
163
+ {'sql': 'select all_user_id, name from public."all_Users"',
164
+ 'object': 'public.all_users'}
165
+ }
166
+
167
+ This returns the class's target_prefix parameter concatenated with the stream name.
168
+ A stream named "public.accounts" will create an AssetKey named "target_public_accounts".
169
+
170
+ Alternatively, you can provide metadata in your Sling replication config to specify the
171
+ Dagster AssetKey for a stream as follows:
172
+
173
+ .. code-block:: yaml
174
+
175
+ public.users:
176
+ meta:
177
+ dagster:
178
+ asset_key: "mydb_users"
179
+
180
+ Args:
181
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition
182
+
183
+ Returns:
184
+ AssetKey: The Dagster AssetKey for the replication stream.
185
+
186
+ Examples:
187
+ Using a custom mapping for streams:
188
+
189
+ .. code-block:: python
190
+
191
+ class CustomSlingTranslator(DagsterSlingTranslator):
192
+ def get_asset_spec(self, stream_definition: Mapping[str, Any]) -> AssetKey:
193
+ default_spec = super().get_asset_spec(stream_definition)
194
+ map = {"stream1": "asset1", "stream2": "asset2"}
195
+ return default_spec.replace_attributes(key=AssetKey(map[stream_definition["name"]]))
196
+ """
197
+ config = stream_definition.get("config", {}) or {}
198
+ object_key = config.get("object")
199
+ meta = config.get("meta", {})
200
+ asset_key = meta.get("dagster", {}).get("asset_key")
201
+
202
+ if asset_key:
203
+ if self.sanitize_stream_name(asset_key) != asset_key:
204
+ raise ValueError(
205
+ f"Asset key {asset_key} for stream {stream_definition['name']} is not "
206
+ "sanitized. Please use only alphanumeric characters and underscores."
207
+ )
208
+ return AssetKey(asset_key.split("."))
209
+
210
+ # You can override the Sling Replication default object with an object key
211
+ stream_name = object_key or stream_definition["name"]
212
+ sanitized_components = self.sanitize_stream_name(stream_name).split(".")
213
+ return AssetKey([self.target_prefix] + sanitized_components)
214
+
215
+ @superseded(
216
+ additional_warn_text=(
217
+ "Iterate over `DagsterSlingTranslator.get_asset_spec(...).deps` to access `AssetDep.asset_key` instead."
218
+ ),
219
+ )
220
+ @public
221
+ def get_deps_asset_key(self, stream_definition: Mapping[str, Any]) -> Iterable[AssetKey]:
222
+ """A function that takes a stream definition from a Sling replication config and returns a
223
+ Dagster AssetKey for each dependency of the replication stream.
224
+
225
+ By default, this returns the stream name. For example, a stream named "public.accounts"
226
+ will create an AssetKey named "target_public_accounts" and a dependency named "public_accounts".
227
+
228
+ Override this function to customize how to map a Sling stream to a Dagster dependency.
229
+ Alternatively, you can provide metadata in your Sling replication config to specify the
230
+ Dagster AssetKey for a stream as follows:
231
+
232
+ .. code-block:: yaml
233
+
234
+ public.users:
235
+ meta:
236
+ dagster:
237
+ deps: "sourcedb_users"
238
+
239
+ Args:
240
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition
241
+
242
+ Returns:
243
+ Iterable[AssetKey]: A list of Dagster AssetKey for each dependency of the replication stream.
244
+ """
245
+ return self._default_deps_fn(stream_definition)
246
+
247
+ def _default_deps_fn(self, stream_definition: Mapping[str, Any]) -> Iterable[AssetKey]:
248
+ """A function that takes a stream definition from a Sling replication config and returns a
249
+ Dagster AssetKey for each dependency of the replication stream.
250
+
251
+ This returns the stream name. For example, a stream named "public.accounts"
252
+ will create an AssetKey named "target_public_accounts" and a dependency named "public_accounts".
253
+
254
+ Alternatively, you can provide metadata in your Sling replication config to specify the
255
+ Dagster AssetKey for a stream as follows:
256
+
257
+ .. code-block:: yaml
258
+
259
+ public.users:
260
+ meta:
261
+ dagster:
262
+ deps: "sourcedb_users"
263
+
264
+ Args:
265
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition
266
+
267
+ Returns:
268
+ Iterable[AssetKey]: A list of Dagster AssetKey for each dependency of the replication stream.
269
+ """
270
+ config = stream_definition.get("config", {}) or {}
271
+ meta = config.get("meta", {})
272
+ deps = meta.get("dagster", {}).get("deps")
273
+ deps_out = []
274
+ if deps and isinstance(deps, str):
275
+ deps = [deps]
276
+ if deps:
277
+ assert isinstance(deps, list)
278
+ for asset_key in deps:
279
+ if self.sanitize_stream_name(asset_key) != asset_key:
280
+ raise ValueError(
281
+ f"Deps Asset key {asset_key} for stream {stream_definition['name']} is not "
282
+ "sanitized. Please use only alphanumeric characters and underscores."
283
+ )
284
+ deps_out.append(AssetKey(asset_key.split(".")))
285
+ return deps_out
286
+
287
+ stream_name = stream_definition["name"]
288
+ components = self.sanitize_stream_name(stream_name).split(".")
289
+ return [AssetKey(components)]
290
+
291
+ @superseded(
292
+ additional_warn_text="Use `DagsterSlingTranslator.get_asset_spec(...).description` instead.",
293
+ )
294
+ @public
295
+ def get_description(self, stream_definition: Mapping[str, Any]) -> Optional[str]:
296
+ """Retrieves the description for a given stream definition.
297
+
298
+ This method checks the provided stream definition for a description. It first looks
299
+ for an "sql" key in the configuration and returns its value if found. If not, it looks
300
+ for a description in the metadata under the "dagster" key.
301
+
302
+ Parameters:
303
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
304
+ which includes configuration details.
305
+
306
+ Returns:
307
+ Optional[str]: The description of the stream if found, otherwise None.
308
+ """
309
+ return self._default_description_fn(stream_definition)
310
+
311
+ def _default_description_fn(self, stream_definition: Mapping[str, Any]) -> Optional[str]:
312
+ """Retrieves the description for a given stream definition.
313
+
314
+ This method checks the provided stream definition for a description. It first looks
315
+ for an "sql" key in the configuration and returns its value if found. If not, it looks
316
+ for a description in the metadata under the "dagster" key.
317
+
318
+ Parameters:
319
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
320
+ which includes configuration details.
321
+
322
+ Returns:
323
+ Optional[str]: The description of the stream if found, otherwise None.
324
+ """
325
+ config = stream_definition.get("config", {}) or {}
326
+ if "sql" in config:
327
+ return config["sql"]
328
+ meta = config.get("meta", {})
329
+ description = meta.get("dagster", {}).get("description")
330
+ return description
331
+
332
+ @superseded(
333
+ additional_warn_text="Use `DagsterSlingTranslator.get_asset_spec(...).metadata` instead.",
334
+ )
335
+ @public
336
+ def get_metadata(self, stream_definition: Mapping[str, Any]) -> Mapping[str, Any]:
337
+ """Retrieves the metadata for a given stream definition.
338
+
339
+ This method extracts the configuration from the provided stream definition and returns
340
+ it as a JSON metadata value.
341
+
342
+ Parameters:
343
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
344
+ which includes configuration details.
345
+
346
+ Returns:
347
+ Mapping[str, Any]: A dictionary containing the stream configuration as JSON metadata.
348
+ """
349
+ return self._default_metadata_fn(stream_definition)
350
+
351
+ def _default_metadata_fn(self, stream_definition: Mapping[str, Any]) -> Mapping[str, Any]:
352
+ """Retrieves the metadata for a given stream definition.
353
+
354
+ This method extracts the configuration from the provided stream definition and returns
355
+ it as a JSON metadata value.
356
+
357
+ Parameters:
358
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
359
+ which includes configuration details.
360
+
361
+ Returns:
362
+ Mapping[str, Any]: A dictionary containing the stream configuration as JSON metadata.
363
+ """
364
+ return {"stream_config": MetadataValue.json(stream_definition.get("config", {}))}
365
+
366
+ @superseded(
367
+ additional_warn_text="Use `DagsterSlingTranslator.get_asset_spec(...).tags` instead.",
368
+ )
369
+ @public
370
+ def get_tags(self, stream_definition: Mapping[str, Any]) -> Mapping[str, Any]:
371
+ """Retrieves the tags for a given stream definition.
372
+
373
+ This method returns an empty dictionary, indicating that no tags are associated with
374
+ the stream definition by default. This method can be overridden to provide custom tags.
375
+
376
+ Parameters:
377
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
378
+ which includes configuration details.
379
+
380
+ Returns:
381
+ Mapping[str, Any]: An empty dictionary.
382
+ """
383
+ return self._default_tags_fn(stream_definition)
384
+
385
+ def _default_tags_fn(self, stream_definition: Mapping[str, Any]) -> Mapping[str, Any]:
386
+ """Retrieves the tags for a given stream definition.
387
+
388
+ This method returns an empty dictionary, indicating that no tags are associated with
389
+ the stream definition by default. This method can be overridden to provide custom tags.
390
+
391
+ Parameters:
392
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
393
+ which includes configuration details.
394
+
395
+ Returns:
396
+ Mapping[str, Any]: An empty dictionary.
397
+ """
398
+ return {}
399
+
400
+ @superseded(
401
+ additional_warn_text="Use `DagsterSlingTranslator.get_asset_spec(...).kinds` instead.",
402
+ )
403
+ @public
404
+ def get_kinds(self, stream_definition: Mapping[str, Any]) -> set[str]:
405
+ """Retrieves the kinds for a given stream definition.
406
+
407
+ This method returns "sling" by default. This method can be overridden to provide custom kinds.
408
+
409
+ Parameters:
410
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
411
+ which includes configuration details.
412
+
413
+ Returns:
414
+ Set[str]: A set containing kinds for the stream's assets.
415
+ """
416
+ return self._default_kinds_fn(stream_definition)
417
+
418
+ def _default_kinds_fn(self, stream_definition: Mapping[str, Any]) -> set[str]:
419
+ """Retrieves the kinds for a given stream definition.
420
+
421
+ This method returns "sling" by default. This method can be overridden to provide custom kinds.
422
+
423
+ Parameters:
424
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
425
+ which includes configuration details.
426
+
427
+ Returns:
428
+ Set[str]: A set containing kinds for the stream's assets.
429
+ """
430
+ return {"sling"}
431
+
432
+ @superseded(
433
+ additional_warn_text="Use `DagsterSlingTranslator.get_asset_spec(...).group_name` instead.",
434
+ )
435
+ @public
436
+ def get_group_name(self, stream_definition: Mapping[str, Any]) -> Optional[str]:
437
+ """Retrieves the group name for a given stream definition.
438
+
439
+ This method checks the provided stream definition for a group name in the metadata
440
+ under the "dagster" key.
441
+
442
+ Parameters:
443
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
444
+ which includes configuration details.
445
+
446
+ Returns:
447
+ Optional[str]: The group name if found, otherwise None.
448
+ """
449
+ return self._default_group_name_fn(stream_definition)
450
+
451
+ def _default_group_name_fn(self, stream_definition: Mapping[str, Any]) -> Optional[str]:
452
+ """Retrieves the group name for a given stream definition.
453
+
454
+ This method checks the provided stream definition for a group name in the metadata
455
+ under the "dagster" key.
456
+
457
+ Parameters:
458
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
459
+ which includes configuration details.
460
+
461
+ Returns:
462
+ Optional[str]: The group name if found, otherwise None.
463
+ """
464
+ config = stream_definition.get("config", {}) or {}
465
+ meta = config.get("meta", {})
466
+ return meta.get("dagster", {}).get("group")
467
+
468
+ @superseded(
469
+ additional_warn_text="Use `DagsterSlingTranslator.get_asset_spec(...).freshness_policy` instead.",
470
+ )
471
+ @public
472
+ def get_freshness_policy(
473
+ self, stream_definition: Mapping[str, Any]
474
+ ) -> Optional[FreshnessPolicy]:
475
+ """Retrieves the freshness policy for a given stream definition.
476
+
477
+ This method checks the provided stream definition for a specific configuration
478
+ indicating a freshness policy. If the configuration is found, it constructs and
479
+ returns a FreshnessPolicy object based on the provided parameters. Otherwise,
480
+ it returns None.
481
+
482
+ Parameters:
483
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
484
+ which includes configuration details.
485
+
486
+ Returns:
487
+ Optional[FreshnessPolicy]: A FreshnessPolicy object if the configuration is found,
488
+ otherwise None.
489
+ """
490
+ return self._default_freshness_policy_fn(stream_definition)
491
+
492
+ def _default_freshness_policy_fn(
493
+ self, stream_definition: Mapping[str, Any]
494
+ ) -> Optional[FreshnessPolicy]:
495
+ """Retrieves the freshness policy for a given stream definition.
496
+
497
+ This method checks the provided stream definition for a specific configuration
498
+ indicating a freshness policy. If the configuration is found, it constructs and
499
+ returns a FreshnessPolicy object based on the provided parameters. Otherwise,
500
+ it returns None.
501
+
502
+ Parameters:
503
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
504
+ which includes configuration details.
505
+
506
+ Returns:
507
+ Optional[FreshnessPolicy]: A FreshnessPolicy object if the configuration is found,
508
+ otherwise None.
509
+ """
510
+ config = stream_definition.get("config", {}) or {}
511
+ meta = config.get("meta", {})
512
+ freshness_policy_config = meta.get("dagster", {}).get("freshness_policy")
513
+ if freshness_policy_config:
514
+ return FreshnessPolicy(
515
+ maximum_lag_minutes=float(freshness_policy_config["maximum_lag_minutes"]),
516
+ cron_schedule=freshness_policy_config.get("cron_schedule"),
517
+ cron_schedule_timezone=freshness_policy_config.get("cron_schedule_timezone"),
518
+ )
519
+
520
+ @superseded(
521
+ additional_warn_text="Use `DagsterSlingTranslator.get_asset_spec(...).auto_materialize_policy` instead.",
522
+ )
523
+ @public
524
+ def get_auto_materialize_policy(
525
+ self, stream_definition: Mapping[str, Any]
526
+ ) -> Optional[AutoMaterializePolicy]:
527
+ """Defines the auto-materialize policy for a given stream definition.
528
+
529
+ This method checks the provided stream definition for a specific configuration
530
+ indicating an auto-materialize policy. If the configuration is found, it returns
531
+ an eager auto-materialize policy. Otherwise, it returns None.
532
+
533
+ Parameters:
534
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
535
+ which includes configuration details.
536
+
537
+ Returns:
538
+ Optional[AutoMaterializePolicy]: An eager auto-materialize policy if the configuration
539
+ is found, otherwise None.
540
+ """
541
+ return self._default_auto_materialize_policy_fn(stream_definition)
542
+
543
+ def _default_auto_materialize_policy_fn(
544
+ self, stream_definition: Mapping[str, Any]
545
+ ) -> Optional[AutoMaterializePolicy]:
546
+ """Defines the auto-materialize policy for a given stream definition.
547
+
548
+ This method checks the provided stream definition for a specific configuration
549
+ indicating an auto-materialize policy. If the configuration is found, it returns
550
+ an eager auto-materialize policy. Otherwise, it returns None.
551
+
552
+ Parameters:
553
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
554
+ which includes configuration details.
555
+
556
+ Returns:
557
+ Optional[AutoMaterializePolicy]: An eager auto-materialize policy if the configuration
558
+ is found, otherwise None.
559
+ """
560
+ config = stream_definition.get("config", {}) or {}
561
+ meta = config.get("meta", {})
562
+ auto_materialize_policy_config = "auto_materialize_policy" in meta.get("dagster", {})
563
+ if auto_materialize_policy_config:
564
+ return AutoMaterializePolicy.eager()
@@ -198,7 +198,7 @@ class SlingResource(ConfigurableResource):
198
198
  streams = streams_with_default_dagster_meta(raw_streams, replication_config)
199
199
  selected_asset_keys = context.selected_asset_keys
200
200
  for stream in streams:
201
- asset_key = dagster_sling_translator.get_asset_key(stream)
201
+ asset_key = dagster_sling_translator.get_asset_spec(stream).key
202
202
  if asset_key in selected_asset_keys:
203
203
  context_streams.update({stream["name"]: stream["config"]})
204
204
 
@@ -401,7 +401,7 @@ class SlingResource(ConfigurableResource):
401
401
  # TODO: In the future, it'd be nice to yield these materializations as they come in
402
402
  # rather than waiting until the end of the replication
403
403
  for stream in stream_definitions:
404
- asset_key = dagster_sling_translator.get_asset_key(stream)
404
+ asset_key = dagster_sling_translator.get_asset_spec(stream).key
405
405
 
406
406
  object_key = (stream.get("config") or {}).get("object")
407
407
  destination_stream_name = object_key or stream["name"]
@@ -0,0 +1 @@
1
+ __version__ = "0.25.11"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dagster-sling
3
- Version: 0.25.9
3
+ Version: 0.25.11
4
4
  Summary: Package for performing ETL/ELT tasks with Sling in Dagster.
5
5
  Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-sling
6
6
  Author: Dagster Labs
@@ -1,4 +1,4 @@
1
- dagster==1.9.9
1
+ dagster==1.9.11
2
2
  sling>=1.1.5
3
3
 
4
4
  [test]
@@ -33,7 +33,7 @@ setup(
33
33
  packages=find_packages(exclude=["dagster_sling_tests*"]),
34
34
  include_package_data=True,
35
35
  python_requires=">=3.9,<3.13",
36
- install_requires=["dagster==1.9.9", "sling>=1.1.5"],
36
+ install_requires=["dagster==1.9.11", "sling>=1.1.5"],
37
37
  zip_safe=False,
38
38
  extras_require={
39
39
  "test": [
@@ -1,297 +0,0 @@
1
- import re
2
- from collections.abc import Iterable, Mapping
3
- from dataclasses import dataclass
4
- from typing import Any, Optional
5
-
6
- from dagster import AssetKey, AutoMaterializePolicy, FreshnessPolicy, MetadataValue
7
- from dagster._annotations import public
8
-
9
-
10
- @dataclass
11
- class DagsterSlingTranslator:
12
- target_prefix: str = "target"
13
-
14
- @public
15
- def sanitize_stream_name(self, stream_name: str) -> str:
16
- """A function that takes a stream name from a Sling replication config and returns a
17
- sanitized name for the stream.
18
-
19
- By default, this removes any non-alphanumeric characters from the stream name and replaces
20
- them with underscores, while removing any double quotes.
21
-
22
- Args:
23
- stream_name (str): The name of the stream.
24
-
25
- Examples:
26
- Using a custom stream name sanitizer:
27
-
28
- .. code-block:: python
29
-
30
- class CustomSlingTranslator(DagsterSlingTranslator):
31
- def sanitize_stream_name(self, stream_name: str) -> str:
32
- return stream_name.replace(".", "")
33
- """
34
- return re.sub(r"[^a-zA-Z0-9_.]", "_", stream_name.replace('"', "").lower())
35
-
36
- @public
37
- def get_asset_key(self, stream_definition: Mapping[str, Any]) -> AssetKey:
38
- """A function that takes a stream definition from a Sling replication config and returns a
39
- Dagster AssetKey.
40
-
41
- The stream definition is a dictionary key/value pair where the key is the stream name and
42
- the value is a dictionary representing the Sling Replication Stream Config.
43
-
44
- For example:
45
-
46
- .. code-block:: python
47
-
48
- stream_definition = {"public.users":
49
- {'sql': 'select all_user_id, name from public."all_Users"',
50
- 'object': 'public.all_users'}
51
- }
52
-
53
- By default, this returns the class's target_prefix paramater concatenated with the stream name.
54
- A stream named "public.accounts" will create an AssetKey named "target_public_accounts".
55
-
56
- Override this function to customize how to map a Sling stream to a Dagster AssetKey.
57
-
58
- Alternatively, you can provide metadata in your Sling replication config to specify the
59
- Dagster AssetKey for a stream as follows:
60
-
61
- .. code-block:: yaml
62
-
63
- public.users:
64
- meta:
65
- dagster:
66
- asset_key: "mydb_users"
67
-
68
- Args:
69
- stream_definition (Mapping[str, Any]): A dictionary representing the stream definition
70
-
71
- Returns:
72
- AssetKey: The Dagster AssetKey for the replication stream.
73
-
74
- Examples:
75
- Using a custom mapping for streams:
76
-
77
- .. code-block:: python
78
-
79
- class CustomSlingTranslator(DagsterSlingTranslator):
80
- def get_asset_key_for_target(self, stream_definition) -> AssetKey:
81
- map = {"stream1": "asset1", "stream2": "asset2"}
82
- return AssetKey(map[stream_name])
83
- """
84
- config = stream_definition.get("config", {}) or {}
85
- object_key = config.get("object")
86
- meta = config.get("meta", {})
87
- asset_key = meta.get("dagster", {}).get("asset_key")
88
-
89
- if asset_key:
90
- if self.sanitize_stream_name(asset_key) != asset_key:
91
- raise ValueError(
92
- f"Asset key {asset_key} for stream {stream_definition['name']} is not "
93
- "sanitized. Please use only alphanumeric characters and underscores."
94
- )
95
- return AssetKey(asset_key.split("."))
96
-
97
- # You can override the Sling Replication default object with an object key
98
- stream_name = object_key or stream_definition["name"]
99
- sanitized_components = self.sanitize_stream_name(stream_name).split(".")
100
- return AssetKey([self.target_prefix] + sanitized_components)
101
-
102
- @public
103
- def get_deps_asset_key(self, stream_definition: Mapping[str, Any]) -> Iterable[AssetKey]:
104
- """A function that takes a stream name from a Sling replication config and returns a
105
- Dagster AssetKey for the dependencies of the replication stream.
106
-
107
- By default, this returns the stream name. For example, a stream named "public.accounts"
108
- will create an AssetKey named "target_public_accounts" and a dependency named "public_accounts".
109
-
110
- Override this function to customize how to map a Sling stream to a Dagster depenency.
111
- Alternatively, you can provide metadata in your Sling replication config to specify the
112
- Dagster AssetKey for a stream as follows:
113
-
114
- .. code-block:: yaml
115
-
116
- public.users:
117
- meta:
118
- dagster:
119
- deps: "sourcedb_users"
120
-
121
- Args:
122
- stream_name (str): The name of the stream.
123
-
124
- Returns:
125
- AssetKey: The Dagster AssetKey dependency for the replication stream.
126
-
127
- Examples:
128
- Using a custom mapping for streams:
129
-
130
- .. code-block:: python
131
-
132
- class CustomSlingTranslator(DagsterSlingTranslator):
133
- def get_deps_asset_key(self, stream_name: str) -> AssetKey:
134
- map = {"stream1": "asset1", "stream2": "asset2"}
135
- return AssetKey(map[stream_name])
136
-
137
- """
138
- config = stream_definition.get("config", {}) or {}
139
- meta = config.get("meta", {})
140
- deps = meta.get("dagster", {}).get("deps")
141
- deps_out = []
142
- if deps and isinstance(deps, str):
143
- deps = [deps]
144
- if deps:
145
- assert isinstance(deps, list)
146
- for asset_key in deps:
147
- if self.sanitize_stream_name(asset_key) != asset_key:
148
- raise ValueError(
149
- f"Deps Asset key {asset_key} for stream {stream_definition['name']} is not "
150
- "sanitized. Please use only alphanumeric characters and underscores."
151
- )
152
- deps_out.append(AssetKey(asset_key.split(".")))
153
- return deps_out
154
-
155
- stream_name = stream_definition["name"]
156
- components = self.sanitize_stream_name(stream_name).split(".")
157
- return [AssetKey(components)]
158
-
159
- @public
160
- def get_description(self, stream_definition: Mapping[str, Any]) -> Optional[str]:
161
- """Retrieves the description for a given stream definition.
162
-
163
- This method checks the provided stream definition for a description. It first looks
164
- for an "sql" key in the configuration and returns its value if found. If not, it looks
165
- for a description in the metadata under the "dagster" key.
166
-
167
- Parameters:
168
- stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
169
- which includes configuration details.
170
-
171
- Returns:
172
- Optional[str]: The description of the stream if found, otherwise None.
173
- """
174
- config = stream_definition.get("config", {}) or {}
175
- if "sql" in config:
176
- return config["sql"]
177
- meta = config.get("meta", {})
178
- description = meta.get("dagster", {}).get("description")
179
- return description
180
-
181
- @public
182
- def get_metadata(self, stream_definition: Mapping[str, Any]) -> Mapping[str, Any]:
183
- """Retrieves the metadata for a given stream definition.
184
-
185
- This method extracts the configuration from the provided stream definition and returns
186
- it as a JSON metadata value.
187
-
188
- Parameters:
189
- stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
190
- which includes configuration details.
191
-
192
- Returns:
193
- Mapping[str, Any]: A dictionary containing the stream configuration as JSON metadata.
194
- """
195
- return {"stream_config": MetadataValue.json(stream_definition.get("config", {}))}
196
-
197
- @public
198
- def get_tags(self, stream_definition: Mapping[str, Any]) -> Mapping[str, Any]:
199
- """Retrieves the tags for a given stream definition.
200
-
201
- This method returns an empty dictionary, indicating that no tags are associated with
202
- the stream definition by default. This method can be overridden to provide custom tags.
203
-
204
- Parameters:
205
- stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
206
- which includes configuration details.
207
-
208
- Returns:
209
- Mapping[str, Any]: An empty dictionary.
210
- """
211
- return {}
212
-
213
- @public
214
- def get_kinds(self, stream_definition: Mapping[str, Any]) -> set[str]:
215
- """Retrieves the kinds for a given stream definition.
216
-
217
- This method returns "sling" by default. This method can be overridden to provide custom kinds.
218
-
219
- Parameters:
220
- stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
221
- which includes configuration details.
222
-
223
- Returns:
224
- Set[str]: A set containing kinds for the stream's assets.
225
- """
226
- return {"sling"}
227
-
228
- @public
229
- def get_group_name(self, stream_definition: Mapping[str, Any]) -> Optional[str]:
230
- """Retrieves the group name for a given stream definition.
231
-
232
- This method checks the provided stream definition for a group name in the metadata
233
- under the "dagster" key.
234
-
235
- Parameters:
236
- stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
237
- which includes configuration details.
238
-
239
- Returns:
240
- Optional[str]: The group name if found, otherwise None.
241
- """
242
- config = stream_definition.get("config", {}) or {}
243
- meta = config.get("meta", {})
244
- return meta.get("dagster", {}).get("group")
245
-
246
- @public
247
- def get_freshness_policy(
248
- self, stream_definition: Mapping[str, Any]
249
- ) -> Optional[FreshnessPolicy]:
250
- """Retrieves the freshness policy for a given stream definition.
251
-
252
- This method checks the provided stream definition for a specific configuration
253
- indicating a freshness policy. If the configuration is found, it constructs and
254
- returns a FreshnessPolicy object based on the provided parameters. Otherwise,
255
- it returns None.
256
-
257
- Parameters:
258
- stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
259
- which includes configuration details.
260
-
261
- Returns:
262
- Optional[FreshnessPolicy]: A FreshnessPolicy object if the configuration is found,
263
- otherwise None.
264
- """
265
- config = stream_definition.get("config", {}) or {}
266
- meta = config.get("meta", {})
267
- freshness_policy_config = meta.get("dagster", {}).get("freshness_policy")
268
- if freshness_policy_config:
269
- return FreshnessPolicy(
270
- maximum_lag_minutes=float(freshness_policy_config["maximum_lag_minutes"]),
271
- cron_schedule=freshness_policy_config.get("cron_schedule"),
272
- cron_schedule_timezone=freshness_policy_config.get("cron_schedule_timezone"),
273
- )
274
-
275
- @public
276
- def get_auto_materialize_policy(
277
- self, stream_definition: Mapping[str, Any]
278
- ) -> Optional[AutoMaterializePolicy]:
279
- """Defines the auto-materialize policy for a given stream definition.
280
-
281
- This method checks the provided stream definition for a specific configuration
282
- indicating an auto-materialize policy. If the configuration is found, it returns
283
- an eager auto-materialize policy. Otherwise, it returns None.
284
-
285
- Parameters:
286
- stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
287
- which includes configuration details.
288
-
289
- Returns:
290
- Optional[AutoMaterializePolicy]: An eager auto-materialize policy if the configuration
291
- is found, otherwise None.
292
- """
293
- config = stream_definition.get("config", {}) or {}
294
- meta = config.get("meta", {})
295
- auto_materialize_policy_config = "auto_materialize_policy" in meta.get("dagster", {})
296
- if auto_materialize_policy_config:
297
- return AutoMaterializePolicy.eager()
@@ -1 +0,0 @@
1
- __version__ = "0.25.9"
File without changes