dagster-sling 0.28.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,509 @@
1
+ from collections.abc import Callable, Iterable, Mapping
2
+ from dataclasses import dataclass
3
+ from typing import Any, Optional
4
+
5
+ from dagster import AssetKey, AssetSpec, AutoMaterializePolicy, MetadataValue
6
+ from dagster._annotations import public, superseded
7
+ from dagster._utils.names import clean_name_lower_with_dots
8
+ from dagster._utils.warnings import supersession_warning
9
+
10
+
11
+ @dataclass
12
+ class DagsterSlingTranslator:
13
+ target_prefix: str = "target"
14
+
15
+ @public
16
+ def get_asset_spec(self, stream_definition: Mapping[str, Any]) -> AssetSpec:
17
+ """A function that takes a stream definition from a Sling replication config and returns a
18
+ Dagster AssetSpec.
19
+
20
+ The stream definition is a dictionary key/value pair where the key is the stream name and
21
+ the value is a dictionary representing the Sling Replication Stream Config.
22
+ """
23
+ return AssetSpec(
24
+ key=self._resolve_back_compat_method(
25
+ "get_asset_key", self._default_asset_key_fn, stream_definition
26
+ ),
27
+ deps=self._resolve_back_compat_method(
28
+ "get_deps_asset_key", self._default_deps_fn, stream_definition
29
+ ),
30
+ description=self._resolve_back_compat_method(
31
+ "get_description", self._default_description_fn, stream_definition
32
+ ),
33
+ metadata=self._resolve_back_compat_method(
34
+ "get_metadata", self._default_metadata_fn, stream_definition
35
+ ),
36
+ tags=self._resolve_back_compat_method(
37
+ "get_tags", self._default_tags_fn, stream_definition
38
+ ),
39
+ kinds=self._resolve_back_compat_method(
40
+ "get_kinds", self._default_kinds_fn, stream_definition
41
+ ),
42
+ group_name=self._resolve_back_compat_method(
43
+ "get_group_name", self._default_group_name_fn, stream_definition
44
+ ),
45
+ auto_materialize_policy=self._resolve_back_compat_method(
46
+ "get_auto_materialize_policy",
47
+ self._default_auto_materialize_policy_fn,
48
+ stream_definition,
49
+ ),
50
+ )
51
+
52
+ def _resolve_back_compat_method(
53
+ self,
54
+ method_name: str,
55
+ default_fn: Callable[[Mapping[str, Any]], Any],
56
+ stream_definition: Mapping[str, Any],
57
+ ):
58
+ method = getattr(type(self), method_name)
59
+ base_method = getattr(DagsterSlingTranslator, method_name)
60
+ if method is not base_method: # user defined this
61
+ supersession_warning(
62
+ subject=method_name,
63
+ additional_warn_text=(
64
+ f"Instead of overriding DagsterSlingTranslator.{method_name}(), "
65
+ f"override DagsterSlingTranslator.get_asset_spec()."
66
+ ),
67
+ )
68
+ return method(self, stream_definition)
69
+ else:
70
+ return default_fn(stream_definition)
71
+
72
+ @public
73
+ def sanitize_stream_name(self, stream_name: str) -> str:
74
+ """A function that takes a stream name from a Sling replication config and returns a
75
+ sanitized name for the stream.
76
+
77
+ By default, this removes any non-alphanumeric characters from the stream name and replaces
78
+ them with underscores, while removing any double quotes.
79
+
80
+ Args:
81
+ stream_name (str): The name of the stream.
82
+
83
+ Examples:
84
+ Using a custom stream name sanitizer:
85
+
86
+ .. code-block:: python
87
+
88
+ class CustomSlingTranslator(DagsterSlingTranslator):
89
+ def sanitize_stream_name(self, stream_name: str) -> str:
90
+ return stream_name.replace(".", "")
91
+ """
92
+ return clean_name_lower_with_dots(name=stream_name.replace('"', ""))
93
+
94
+ @superseded(
95
+ additional_warn_text="Use `DagsterSlingTranslator.get_asset_spec(...).key` instead.",
96
+ )
97
+ @public
98
+ def get_asset_key(self, stream_definition: Mapping[str, Any]) -> AssetKey:
99
+ """A function that takes a stream definition from a Sling replication config and returns a
100
+ Dagster AssetKey.
101
+
102
+ The stream definition is a dictionary key/value pair where the key is the stream name and
103
+ the value is a dictionary representing the Sling Replication Stream Config.
104
+
105
+ For example:
106
+
107
+ .. code-block:: python
108
+
109
+ stream_definition = {"public.users":
110
+ {'sql': 'select all_user_id, name from public."all_Users"',
111
+ 'object': 'public.all_users'}
112
+ }
113
+
114
+ By default, this returns the class's target_prefix parameter concatenated with the stream name.
115
+ A stream named "public.accounts" will create an AssetKey named "target_public_accounts".
116
+
117
+ Override this function to customize how to map a Sling stream to a Dagster AssetKey.
118
+
119
+ Alternatively, you can provide metadata in your Sling replication config to specify the
120
+ Dagster AssetKey for a stream as follows:
121
+
122
+ .. code-block:: yaml
123
+
124
+ public.users:
125
+ meta:
126
+ dagster:
127
+ asset_key: "mydb_users"
128
+
129
+ Args:
130
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition
131
+
132
+ Returns:
133
+ AssetKey: The Dagster AssetKey for the replication stream.
134
+
135
+ Examples:
136
+ Using a custom mapping for streams:
137
+
138
+ .. code-block:: python
139
+
140
+ class CustomSlingTranslator(DagsterSlingTranslator):
141
+ def get_asset_spec(self, stream_definition: Mapping[str, Any]) -> AssetKey:
142
+ default_spec = super().get_asset_spec(stream_definition)
143
+ map = {"stream1": "asset1", "stream2": "asset2"}
144
+ return default_spec.replace_attributes(key=AssetKey(map[stream_definition["name"]]))
145
+ """
146
+ return self._default_asset_key_fn(stream_definition)
147
+
148
+ def _default_asset_key_fn(self, stream_definition: Mapping[str, Any]) -> AssetKey:
149
+ """A function that takes a stream definition from a Sling replication config and returns a
150
+ Dagster AssetKey.
151
+
152
+ The stream definition is a dictionary key/value pair where the key is the stream name and
153
+ the value is a dictionary representing the Sling Replication Stream Config.
154
+
155
+ For example:
156
+
157
+ .. code-block:: python
158
+
159
+ stream_definition = {"public.users":
160
+ {'sql': 'select all_user_id, name from public."all_Users"',
161
+ 'object': 'public.all_users'}
162
+ }
163
+
164
+ This returns the class's target_prefix parameter concatenated with the stream name.
165
+ A stream named "public.accounts" will create an AssetKey named "target_public_accounts".
166
+
167
+ Alternatively, you can provide metadata in your Sling replication config to specify the
168
+ Dagster AssetKey for a stream as follows:
169
+
170
+ .. code-block:: yaml
171
+
172
+ public.users:
173
+ meta:
174
+ dagster:
175
+ asset_key: "mydb_users"
176
+
177
+ Args:
178
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition
179
+
180
+ Returns:
181
+ AssetKey: The Dagster AssetKey for the replication stream.
182
+
183
+ Examples:
184
+ Using a custom mapping for streams:
185
+
186
+ .. code-block:: python
187
+
188
+ class CustomSlingTranslator(DagsterSlingTranslator):
189
+ def get_asset_spec(self, stream_definition: Mapping[str, Any]) -> AssetKey:
190
+ default_spec = super().get_asset_spec(stream_definition)
191
+ map = {"stream1": "asset1", "stream2": "asset2"}
192
+ return default_spec.replace_attributes(key=AssetKey(map[stream_definition["name"]]))
193
+ """
194
+ config = stream_definition.get("config", {}) or {}
195
+ object_key = config.get("object")
196
+ meta = config.get("meta", {})
197
+ asset_key = meta.get("dagster", {}).get("asset_key")
198
+
199
+ if asset_key:
200
+ if self.sanitize_stream_name(asset_key) != asset_key:
201
+ raise ValueError(
202
+ f"Asset key {asset_key} for stream {stream_definition['name']} is not "
203
+ "sanitized. Please use only alphanumeric characters and underscores."
204
+ )
205
+ return AssetKey(asset_key.split("."))
206
+
207
+ # You can override the Sling Replication default object with an object key
208
+ stream_name = object_key or stream_definition["name"]
209
+ sanitized_components = self.sanitize_stream_name(stream_name).split(".")
210
+ return AssetKey([self.target_prefix] + sanitized_components)
211
+
212
+ @superseded(
213
+ additional_warn_text=(
214
+ "Iterate over `DagsterSlingTranslator.get_asset_spec(...).deps` to access `AssetDep.asset_key` instead."
215
+ ),
216
+ )
217
+ @public
218
+ def get_deps_asset_key(self, stream_definition: Mapping[str, Any]) -> Iterable[AssetKey]:
219
+ """A function that takes a stream definition from a Sling replication config and returns a
220
+ Dagster AssetKey for each dependency of the replication stream.
221
+
222
+ By default, this returns the stream name. For example, a stream named "public.accounts"
223
+ will create an AssetKey named "target_public_accounts" and a dependency named "public_accounts".
224
+
225
+ Override this function to customize how to map a Sling stream to a Dagster dependency.
226
+ Alternatively, you can provide metadata in your Sling replication config to specify the
227
+ Dagster AssetKey for a stream as follows:
228
+
229
+ .. code-block:: yaml
230
+
231
+ public.users:
232
+ meta:
233
+ dagster:
234
+ deps: "sourcedb_users"
235
+
236
+ Args:
237
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition
238
+
239
+ Returns:
240
+ Iterable[AssetKey]: A list of Dagster AssetKey for each dependency of the replication stream.
241
+ """
242
+ return self._default_deps_fn(stream_definition)
243
+
244
+ def _default_deps_fn(self, stream_definition: Mapping[str, Any]) -> Iterable[AssetKey]:
245
+ """A function that takes a stream definition from a Sling replication config and returns a
246
+ Dagster AssetKey for each dependency of the replication stream.
247
+
248
+ This returns the stream name. For example, a stream named "public.accounts"
249
+ will create an AssetKey named "target_public_accounts" and a dependency named "public_accounts".
250
+
251
+ Alternatively, you can provide metadata in your Sling replication config to specify the
252
+ Dagster AssetKey for a stream as follows:
253
+
254
+ .. code-block:: yaml
255
+
256
+ public.users:
257
+ meta:
258
+ dagster:
259
+ deps: "sourcedb_users"
260
+
261
+ Args:
262
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition
263
+
264
+ Returns:
265
+ Iterable[AssetKey]: A list of Dagster AssetKey for each dependency of the replication stream.
266
+ """
267
+ config = stream_definition.get("config", {}) or {}
268
+ meta = config.get("meta", {})
269
+ deps = meta.get("dagster", {}).get("deps")
270
+ deps_out = []
271
+ if deps and isinstance(deps, str):
272
+ deps = [deps]
273
+ if deps:
274
+ assert isinstance(deps, list)
275
+ for asset_key in deps:
276
+ if self.sanitize_stream_name(asset_key) != asset_key:
277
+ raise ValueError(
278
+ f"Deps Asset key {asset_key} for stream {stream_definition['name']} is not "
279
+ "sanitized. Please use only alphanumeric characters and underscores."
280
+ )
281
+ deps_out.append(AssetKey(asset_key.split(".")))
282
+ return deps_out
283
+
284
+ stream_name = stream_definition["name"]
285
+ components = self.sanitize_stream_name(stream_name).split(".")
286
+ return [AssetKey(components)]
287
+
288
+ @superseded(
289
+ additional_warn_text="Use `DagsterSlingTranslator.get_asset_spec(...).description` instead.",
290
+ )
291
+ @public
292
+ def get_description(self, stream_definition: Mapping[str, Any]) -> Optional[str]:
293
+ """Retrieves the description for a given stream definition.
294
+
295
+ This method checks the provided stream definition for a description. It first looks
296
+ for an "sql" key in the configuration and returns its value if found. If not, it looks
297
+ for a description in the metadata under the "dagster" key.
298
+
299
+ Parameters:
300
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
301
+ which includes configuration details.
302
+
303
+ Returns:
304
+ Optional[str]: The description of the stream if found, otherwise None.
305
+ """
306
+ return self._default_description_fn(stream_definition)
307
+
308
+ def _default_description_fn(self, stream_definition: Mapping[str, Any]) -> Optional[str]:
309
+ """Retrieves the description for a given stream definition.
310
+
311
+ This method checks the provided stream definition for a description. It first looks
312
+ for an "sql" key in the configuration and returns its value if found. If not, it looks
313
+ for a description in the metadata under the "dagster" key.
314
+
315
+ Parameters:
316
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
317
+ which includes configuration details.
318
+
319
+ Returns:
320
+ Optional[str]: The description of the stream if found, otherwise None.
321
+ """
322
+ config = stream_definition.get("config", {}) or {}
323
+ if "sql" in config:
324
+ return config["sql"]
325
+ meta = config.get("meta", {})
326
+ description = meta.get("dagster", {}).get("description")
327
+ return description
328
+
329
+ @superseded(
330
+ additional_warn_text="Use `DagsterSlingTranslator.get_asset_spec(...).metadata` instead.",
331
+ )
332
+ @public
333
+ def get_metadata(self, stream_definition: Mapping[str, Any]) -> Mapping[str, Any]:
334
+ """Retrieves the metadata for a given stream definition.
335
+
336
+ This method extracts the configuration from the provided stream definition and returns
337
+ it as a JSON metadata value.
338
+
339
+ Parameters:
340
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
341
+ which includes configuration details.
342
+
343
+ Returns:
344
+ Mapping[str, Any]: A dictionary containing the stream configuration as JSON metadata.
345
+ """
346
+ return self._default_metadata_fn(stream_definition)
347
+
348
+ def _default_metadata_fn(self, stream_definition: Mapping[str, Any]) -> Mapping[str, Any]:
349
+ """Retrieves the metadata for a given stream definition.
350
+
351
+ This method extracts the configuration from the provided stream definition and returns
352
+ it as a JSON metadata value.
353
+
354
+ Parameters:
355
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
356
+ which includes configuration details.
357
+
358
+ Returns:
359
+ Mapping[str, Any]: A dictionary containing the stream configuration as JSON metadata.
360
+ """
361
+ return {"stream_config": MetadataValue.json(stream_definition.get("config", {}))}
362
+
363
+ @superseded(
364
+ additional_warn_text="Use `DagsterSlingTranslator.get_asset_spec(...).tags` instead.",
365
+ )
366
+ @public
367
+ def get_tags(self, stream_definition: Mapping[str, Any]) -> Mapping[str, Any]:
368
+ """Retrieves the tags for a given stream definition.
369
+
370
+ This method returns an empty dictionary, indicating that no tags are associated with
371
+ the stream definition by default. This method can be overridden to provide custom tags.
372
+
373
+ Parameters:
374
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
375
+ which includes configuration details.
376
+
377
+ Returns:
378
+ Mapping[str, Any]: An empty dictionary.
379
+ """
380
+ return self._default_tags_fn(stream_definition)
381
+
382
+ def _default_tags_fn(self, stream_definition: Mapping[str, Any]) -> Mapping[str, Any]:
383
+ """Retrieves the tags for a given stream definition.
384
+
385
+ This method returns an empty dictionary, indicating that no tags are associated with
386
+ the stream definition by default. This method can be overridden to provide custom tags.
387
+
388
+ Parameters:
389
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
390
+ which includes configuration details.
391
+
392
+ Returns:
393
+ Mapping[str, Any]: An empty dictionary.
394
+ """
395
+ return {}
396
+
397
+ @superseded(
398
+ additional_warn_text="Use `DagsterSlingTranslator.get_asset_spec(...).kinds` instead.",
399
+ )
400
+ @public
401
+ def get_kinds(self, stream_definition: Mapping[str, Any]) -> set[str]:
402
+ """Retrieves the kinds for a given stream definition.
403
+
404
+ This method returns "sling" by default. This method can be overridden to provide custom kinds.
405
+
406
+ Parameters:
407
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
408
+ which includes configuration details.
409
+
410
+ Returns:
411
+ Set[str]: A set containing kinds for the stream's assets.
412
+ """
413
+ return self._default_kinds_fn(stream_definition)
414
+
415
+ def _default_kinds_fn(self, stream_definition: Mapping[str, Any]) -> set[str]:
416
+ """Retrieves the kinds for a given stream definition.
417
+
418
+ This method returns "sling" by default. This method can be overridden to provide custom kinds.
419
+
420
+ Parameters:
421
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
422
+ which includes configuration details.
423
+
424
+ Returns:
425
+ Set[str]: A set containing kinds for the stream's assets.
426
+ """
427
+ return {"sling"}
428
+
429
+ @superseded(
430
+ additional_warn_text="Use `DagsterSlingTranslator.get_asset_spec(...).group_name` instead.",
431
+ )
432
+ @public
433
+ def get_group_name(self, stream_definition: Mapping[str, Any]) -> Optional[str]:
434
+ """Retrieves the group name for a given stream definition.
435
+
436
+ This method checks the provided stream definition for a group name in the metadata
437
+ under the "dagster" key.
438
+
439
+ Parameters:
440
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
441
+ which includes configuration details.
442
+
443
+ Returns:
444
+ Optional[str]: The group name if found, otherwise None.
445
+ """
446
+ return self._default_group_name_fn(stream_definition)
447
+
448
+ def _default_group_name_fn(self, stream_definition: Mapping[str, Any]) -> Optional[str]:
449
+ """Retrieves the group name for a given stream definition.
450
+
451
+ This method checks the provided stream definition for a group name in the metadata
452
+ under the "dagster" key.
453
+
454
+ Parameters:
455
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
456
+ which includes configuration details.
457
+
458
+ Returns:
459
+ Optional[str]: The group name if found, otherwise None.
460
+ """
461
+ config = stream_definition.get("config", {}) or {}
462
+ meta = config.get("meta", {})
463
+ return meta.get("dagster", {}).get("group")
464
+
465
+ @superseded(
466
+ additional_warn_text="Use `DagsterSlingTranslator.get_asset_spec(...).auto_materialize_policy` instead.",
467
+ )
468
+ @public
469
+ def get_auto_materialize_policy(
470
+ self, stream_definition: Mapping[str, Any]
471
+ ) -> Optional[AutoMaterializePolicy]:
472
+ """Defines the auto-materialize policy for a given stream definition.
473
+
474
+ This method checks the provided stream definition for a specific configuration
475
+ indicating an auto-materialize policy. If the configuration is found, it returns
476
+ an eager auto-materialize policy. Otherwise, it returns None.
477
+
478
+ Parameters:
479
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
480
+ which includes configuration details.
481
+
482
+ Returns:
483
+ Optional[AutoMaterializePolicy]: An eager auto-materialize policy if the configuration
484
+ is found, otherwise None.
485
+ """
486
+ return self._default_auto_materialize_policy_fn(stream_definition)
487
+
488
+ def _default_auto_materialize_policy_fn(
489
+ self, stream_definition: Mapping[str, Any]
490
+ ) -> Optional[AutoMaterializePolicy]:
491
+ """Defines the auto-materialize policy for a given stream definition.
492
+
493
+ This method checks the provided stream definition for a specific configuration
494
+ indicating an auto-materialize policy. If the configuration is found, it returns
495
+ an eager auto-materialize policy. Otherwise, it returns None.
496
+
497
+ Parameters:
498
+ stream_definition (Mapping[str, Any]): A dictionary representing the stream definition,
499
+ which includes configuration details.
500
+
501
+ Returns:
502
+ Optional[AutoMaterializePolicy]: An eager auto-materialize policy if the configuration
503
+ is found, otherwise None.
504
+ """
505
+ config = stream_definition.get("config", {}) or {}
506
+ meta = config.get("meta", {})
507
+ auto_materialize_policy_config = "auto_materialize_policy" in meta.get("dagster", {})
508
+ if auto_materialize_policy_config:
509
+ return AutoMaterializePolicy.eager()
dagster_sling/py.typed ADDED
@@ -0,0 +1 @@
1
+ partial