acryl-datahub-cloud 0.3.11.1rc7__py3-none-any.whl → 0.3.12rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (37) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/_sdk_extras/__init__.py +17 -2
  3. acryl_datahub_cloud/_sdk_extras/assertion.py +603 -8
  4. acryl_datahub_cloud/_sdk_extras/assertion_input.py +1074 -0
  5. acryl_datahub_cloud/_sdk_extras/assertions_client.py +705 -11
  6. acryl_datahub_cloud/_sdk_extras/entities/__init__.py +0 -0
  7. acryl_datahub_cloud/_sdk_extras/entities/assertion.py +425 -0
  8. acryl_datahub_cloud/_sdk_extras/entities/monitor.py +291 -0
  9. acryl_datahub_cloud/_sdk_extras/entities/subscription.py +84 -0
  10. acryl_datahub_cloud/_sdk_extras/errors.py +34 -0
  11. acryl_datahub_cloud/_sdk_extras/resolver_client.py +39 -0
  12. acryl_datahub_cloud/_sdk_extras/subscription_client.py +565 -0
  13. acryl_datahub_cloud/action_request/action_request_owner_source.py +36 -6
  14. acryl_datahub_cloud/metadata/_urns/urn_defs.py +2023 -2023
  15. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +2 -2
  16. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +4 -0
  17. acryl_datahub_cloud/metadata/schema.avsc +24889 -25252
  18. acryl_datahub_cloud/metadata/schema_classes.py +1133 -1008
  19. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +189 -201
  20. acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +9 -1
  21. acryl_datahub_cloud/metadata/schemas/AssertionKey.avsc +1 -1
  22. acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +9 -1
  23. acryl_datahub_cloud/metadata/schemas/{AssertionSummary.avsc → AssertionRunSummary.avsc} +2 -2
  24. acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  25. acryl_datahub_cloud/metadata/schemas/FormKey.avsc +2 -1
  26. acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +27 -0
  27. acryl_datahub_cloud/metadata/schemas/MonitorAnomalyEvent.avsc +21 -9
  28. acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +12 -4
  29. acryl_datahub_cloud/metadata/schemas/MonitorSuiteInfo.avsc +1 -1
  30. acryl_datahub_cloud/metadata/schemas/Operation.avsc +17 -0
  31. acryl_datahub_cloud/metadata/schemas/SubscriptionInfo.avsc +3 -3
  32. acryl_datahub_cloud/metadata/schemas/__init__.py +3 -3
  33. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/METADATA +46 -46
  34. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/RECORD +37 -28
  35. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/WHEEL +1 -1
  36. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/entry_points.txt +0 -0
  37. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,565 @@
1
+ import logging
2
+ from datetime import datetime, timezone
3
+ from typing import Dict, List, Optional, Tuple, Union
4
+
5
+ from typing_extensions import TypeAlias
6
+
7
+ import datahub.metadata.schema_classes as models
8
+ from acryl_datahub_cloud._sdk_extras.entities.assertion import Assertion
9
+ from acryl_datahub_cloud._sdk_extras.entities.subscription import Subscription
10
+ from datahub.emitter.enum_helpers import get_enum_options
11
+ from datahub.emitter.mce_builder import make_ts_millis
12
+ from datahub.errors import SdkUsageError
13
+ from datahub.metadata.urns import AssertionUrn, CorpGroupUrn, CorpUserUrn, DatasetUrn
14
+ from datahub.sdk._utils import DEFAULT_ACTOR_URN
15
+ from datahub.sdk.main_client import DataHubClient
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ SubscriberInputType: TypeAlias = Union[CorpUserUrn, CorpGroupUrn]
20
+
21
+
22
+ ASSERTION_RELATED_ENTITY_CHANGE_TYPES = {
23
+ models.EntityChangeTypeClass.ASSERTION_PASSED,
24
+ models.EntityChangeTypeClass.ASSERTION_FAILED,
25
+ models.EntityChangeTypeClass.ASSERTION_ERROR,
26
+ }
27
+
28
+ ALL_EXISTING_ENTITY_CHANGE_TYPES = {
29
+ getattr(models.EntityChangeTypeClass, attr)
30
+ for attr in dir(models.EntityChangeTypeClass)
31
+ if not attr.startswith("_")
32
+ and isinstance(getattr(models.EntityChangeTypeClass, attr), str)
33
+ }
34
+
35
+
36
+ class SubscriptionClient:
37
+ """
38
+ A client for managing subscriptions to entity changes in Acryl DataHub Cloud.
39
+
40
+ Subscriptions can be created at two granularity levels for different entity change types:
41
+ - Dataset level: Affects all assertions associated with the dataset
42
+ - Assertion level: Affects only the specific assertion and overrides any dataset-level subscriptions
43
+
44
+ Notes:
45
+ - This implementation currently returns low-level Subscription entities from the entities
46
+ submodule. In future versions, this may be replaced with a higher-level Subscription abstraction
47
+ for improved usability.
48
+ - The client is designed to work with both datasets and assertions, but currently only supports
49
+ datasets. Assertions will be supported in future versions.
50
+ - Only ENTITY_CHANGE subscription types is supported.
51
+ - No notificationConfig is set
52
+ - This client is experimental and under heavy development. Expect breaking changes.
53
+ """
54
+
55
+ def __init__(self, client: DataHubClient):
56
+ self.client = client
57
+ _print_experimental_warning()
58
+
59
+ def subscribe(
60
+ self,
61
+ *,
62
+ urn: Union[str, DatasetUrn, AssertionUrn],
63
+ subscriber_urn: SubscriberInputType,
64
+ entity_change_types: Optional[List[str]] = None,
65
+ ) -> None:
66
+ """
67
+ Create a subscription to receive notifications for entity changes.
68
+
69
+ Args:
70
+ urn: The URN (string or URN object) of the dataset or assertion to subscribe to.
71
+ For datasets: subscription applies to all assertions on the dataset.
72
+ For assertions: subscription applies only to that specific assertion.
73
+ subscriber_urn: The URN of the user or group that will receive notifications.
74
+ entity_change_types: Specific change types to subscribe to. If None, defaults are:
75
+ - Dataset: all existing change types
76
+ - Assertion: assertion-related types (ASSERTION_PASSED,
77
+ ASSERTION_FAILED, ASSERTION_ERROR)
78
+
79
+ Returns:
80
+ None
81
+
82
+ Raises:
83
+ SdkUsageError: If URN format is invalid, entity not found, or empty change types list.
84
+ SdkUsageError: For assertion subscription - if non-assertion-related change types
85
+ are provided (only ASSERTION_PASSED, ASSERTION_FAILED, ASSERTION_ERROR allowed).
86
+ """
87
+ _print_experimental_warning()
88
+
89
+ # Parse URN string if needed
90
+ parsed_urn = self._maybe_parse_urn(urn)
91
+
92
+ dataset_urn: DatasetUrn
93
+ assertion_urn: Optional[AssertionUrn]
94
+ dataset_urn, assertion_urn = (
95
+ (parsed_urn, None)
96
+ if isinstance(parsed_urn, DatasetUrn)
97
+ else self._fetch_dataset_from_assertion(parsed_urn)
98
+ )
99
+
100
+ logger.info(
101
+ f"Subscribing to dataset={dataset_urn} assertion={assertion_urn} for subscriber={subscriber_urn} with change types: {entity_change_types}"
102
+ )
103
+
104
+ # Get entity change types (use all if none provided)
105
+ entity_change_type_strs = self._get_entity_change_types(
106
+ assertion_scope=assertion_urn is not None,
107
+ entity_change_types=entity_change_types,
108
+ )
109
+
110
+ existing_subscriptions = self.client.resolve.subscription( # type: ignore[attr-defined]
111
+ entity_urn=dataset_urn.urn(),
112
+ actor_urn=subscriber_urn.urn(),
113
+ )
114
+ if not existing_subscriptions:
115
+ # new subscription
116
+ subscription = Subscription(
117
+ info=models.SubscriptionInfoClass(
118
+ entityUrn=dataset_urn.urn(),
119
+ actorUrn=subscriber_urn.urn(),
120
+ actorType=CorpUserUrn.ENTITY_TYPE
121
+ if isinstance(subscriber_urn, CorpUserUrn)
122
+ else CorpGroupUrn.ENTITY_TYPE,
123
+ types=[
124
+ models.SubscriptionTypeClass.ENTITY_CHANGE,
125
+ ],
126
+ entityChangeTypes=self._merge_entity_change_types(
127
+ existing_change_types=None,
128
+ new_change_type_strs=entity_change_type_strs,
129
+ new_assertion_urn=assertion_urn,
130
+ ),
131
+ createdOn=self._create_audit_stamp(),
132
+ updatedOn=self._create_audit_stamp(),
133
+ ),
134
+ )
135
+ self.client.entities.upsert(subscription)
136
+ logger.info(f"Subscription created: {subscription.urn}")
137
+ return
138
+ elif len(existing_subscriptions) == 1:
139
+ # update existing subscription
140
+ subscription_urn = existing_subscriptions[0]
141
+ existing_subscription_entity = self.client.entities.get(subscription_urn)
142
+ assert isinstance(existing_subscription_entity, Subscription), (
143
+ f"Expected Subscription entity type for subscription urn={subscription_urn}"
144
+ )
145
+ logger.info(
146
+ f"Found existing subscription to be updated: {existing_subscription_entity.urn}"
147
+ )
148
+ existing_subscription_entity.info.entityChangeTypes = self._merge_entity_change_types(
149
+ existing_change_types=existing_subscription_entity.info.entityChangeTypes,
150
+ new_change_type_strs=entity_change_type_strs,
151
+ new_assertion_urn=assertion_urn,
152
+ )
153
+ existing_subscription_entity.info.updatedOn = self._create_audit_stamp()
154
+ self.client.entities.upsert(existing_subscription_entity)
155
+ logger.info(f"Subscription updated: {existing_subscription_entity.urn}")
156
+ return
157
+ else:
158
+ raise SdkUsageError(
159
+ f"We have a mesh here - {len(existing_subscriptions)} subscriptions found for dataset={dataset_urn} assertion={assertion_urn} and subscriber={subscriber_urn}!"
160
+ )
161
+
162
+ def list_subscriptions(
163
+ self,
164
+ *,
165
+ urn: Union[str, DatasetUrn, AssertionUrn],
166
+ entity_change_types: Optional[List[models.EntityChangeTypeClass]] = None,
167
+ subscriber_urn: Optional[SubscriberInputType] = None,
168
+ ) -> List[Subscription]:
169
+ """
170
+ Retrieve existing subscriptions for a dataset or assertion.
171
+
172
+ Args:
173
+ urn: The URN of the dataset or assertion to query subscriptions for.
174
+ entity_change_types: Optional filter to return only subscriptions for specific
175
+ change types. If None, returns subscriptions for all change types.
176
+ subscriber_urn: Optional filter to return only subscriptions for a specific user
177
+ or group. If None, returns subscriptions for all subscribers.
178
+
179
+ Returns:
180
+ List[Subscription]: List of matching subscription objects.
181
+ """
182
+ _print_experimental_warning()
183
+ logger.info(
184
+ f"Listing subscriptions for {urn} with change types: {entity_change_types} and subscriber: {subscriber_urn}"
185
+ )
186
+ # TODO: Implement the actual logic to retrieve subscriptions.
187
+ return [Subscription(**{})]
188
+
189
+ def unsubscribe(
190
+ self,
191
+ *,
192
+ urn: Union[str, DatasetUrn, AssertionUrn],
193
+ subscriber_urn: SubscriberInputType,
194
+ entity_change_types: Optional[List[models.EntityChangeTypeClass]] = None,
195
+ ) -> None:
196
+ """
197
+ Remove subscriptions for entity change notifications.
198
+
199
+ This method supports selective unsubscription based on subscriber and change types.
200
+ The behavior varies depending on whether the target is a dataset or assertion:
201
+
202
+ **Dataset unsubscription:**
203
+ - Removes specified change types from the subscription
204
+ - If no change types specified, removes all existing change types
205
+ - Deletes entire subscription if no change types remain
206
+
207
+ **Assertion unsubscription:**
208
+ - Removes assertion from specified change type filters
209
+ - If no change types specified, removes assertion from all assertion-related change types
210
+ (ASSERTION_PASSED, ASSERTION_FAILED, ASSERTION_ERROR)
211
+ - Deletes change type if no assertions remain in filter
212
+ (prevents assertion-level subscription from silently upgrading to dataset-level)
213
+ - Deletes entire subscription if no change types remain
214
+
215
+ Args:
216
+ urn: URN (string or URN object) of the dataset or assertion to unsubscribe from.
217
+ subscriber_urn: User or group URN to unsubscribe.
218
+ entity_change_types: Specific change types to remove. If None, defaults are:
219
+ - Dataset: all existing change types in the subscription
220
+ - Assertion: assertion-related types (ASSERTION_PASSED,
221
+ ASSERTION_FAILED, ASSERTION_ERROR)
222
+
223
+ Returns:
224
+ None
225
+
226
+ Raises:
227
+ SdkUsageError: If URN format is invalid, entity not found, or empty change types list.
228
+ SdkUsageError: For assertion unsubscription - if assertion not included in specified
229
+ change types, or if not assertion-related change types (ASSERTION_PASSED, ASSERTION_FAILED,
230
+ ASSERTION_ERROR) are provided.
231
+
232
+ Note:
233
+ This method is experimental and may change in future versions.
234
+ """
235
+ _print_experimental_warning()
236
+
237
+ # Parse URN string if needed
238
+ parsed_urn = self._maybe_parse_urn(urn)
239
+
240
+ # For assertion case, fail as requested
241
+ if isinstance(parsed_urn, AssertionUrn):
242
+ raise SdkUsageError(
243
+ "Assertion unsubscription is not yet implemented. Only dataset unsubscription is currently supported."
244
+ )
245
+
246
+ dataset_urn: DatasetUrn
247
+ assertion_urn: Optional[AssertionUrn]
248
+ dataset_urn, assertion_urn = (
249
+ (parsed_urn, None)
250
+ if isinstance(parsed_urn, DatasetUrn)
251
+ else self._fetch_dataset_from_assertion(parsed_urn) # type: ignore[arg-type] # TODO: Remove when assertion unsubscribe is implemented
252
+ )
253
+
254
+ logger.info(
255
+ f"Unsubscribing from dataset={dataset_urn} for subscriber={subscriber_urn} with change types: {entity_change_types}"
256
+ )
257
+
258
+ # Find existing subscription
259
+ existing_subscription_urns = self.client.resolve.subscription( # type: ignore[attr-defined]
260
+ entity_urn=dataset_urn.urn(),
261
+ actor_urn=subscriber_urn.urn(),
262
+ )
263
+
264
+ if not existing_subscription_urns:
265
+ logger.info(
266
+ f"No subscription found for dataset={dataset_urn} and subscriber={subscriber_urn}"
267
+ )
268
+ return
269
+ elif len(existing_subscription_urns) > 1:
270
+ raise SdkUsageError(
271
+ f"Multiple subscriptions found for dataset={dataset_urn} and subscriber={subscriber_urn}. "
272
+ f"Expected at most 1, got {len(existing_subscription_urns)}"
273
+ )
274
+
275
+ subscription_urn = existing_subscription_urns[0]
276
+ subscription_entity = self.client.entities.get(subscription_urn)
277
+ assert isinstance(subscription_entity, Subscription), (
278
+ f"Expected Subscription entity type for subscription urn={subscription_urn}"
279
+ )
280
+ logger.info(
281
+ f"Found existing subscription to be updated: {subscription_entity.urn}"
282
+ )
283
+
284
+ # Get the change types to remove (validated input or defaults)
285
+ change_types_to_remove = self._get_entity_change_types(
286
+ assertion_scope=assertion_urn is not None,
287
+ entity_change_types=[str(ect) for ect in entity_change_types]
288
+ if entity_change_types
289
+ else None,
290
+ )
291
+
292
+ # Remove the specified change types
293
+ if subscription_entity.info.entityChangeTypes is None:
294
+ raise SdkUsageError(
295
+ f"Subscription {subscription_entity.urn} has no change types to remove"
296
+ )
297
+ updated_change_types = self._remove_change_types(
298
+ subscription_entity.info.entityChangeTypes, change_types_to_remove
299
+ )
300
+
301
+ # If no change types remain, delete the subscription
302
+ if not updated_change_types:
303
+ logger.info(
304
+ f"No change types remain, deleting subscription: {subscription_entity.urn}"
305
+ )
306
+ self.client.entities.delete(subscription_entity.urn)
307
+ return
308
+
309
+ # Update the subscription with remaining change types
310
+ subscription_entity.info.entityChangeTypes = updated_change_types
311
+ subscription_entity.info.updatedOn = self._create_audit_stamp()
312
+ self.client.entities.upsert(subscription_entity)
313
+ logger.info(f"Subscription updated: {subscription_entity.urn}")
314
+
315
+ def _get_entity_change_types(
316
+ self,
317
+ assertion_scope: bool,
318
+ entity_change_types: Optional[List[str]] = None,
319
+ ) -> List[str]:
320
+ """Get entity change types with validation and defaults.
321
+
322
+ Args:
323
+ assertion_scope: True for assertion subscriptions, False for dataset subscriptions.
324
+ entity_change_types: Specific change types to validate. If None, returns defaults.
325
+
326
+ Returns:
327
+ List of validated entity change types. Defaults are:
328
+ - Dataset: all available change types
329
+ - Assertion: assertion-related types (ASSERTION_PASSED, ASSERTION_FAILED, ASSERTION_ERROR)
330
+
331
+ Raises:
332
+ SdkUsageError: If entity_change_types is an empty list.
333
+ SdkUsageError: If invalid change types provided or assertion scope receives
334
+ non-assertion change types.
335
+ """
336
+ if entity_change_types is not None:
337
+ if len(entity_change_types) == 0:
338
+ raise SdkUsageError("Entity change types cannot be an empty list.")
339
+
340
+ all_options = get_enum_options(models.EntityChangeTypeClass)
341
+ if any([ect not in all_options for ect in entity_change_types]):
342
+ raise SdkUsageError(
343
+ f"Invalid entity change types provided: {entity_change_types}. "
344
+ f"Valid options are: {all_options}"
345
+ )
346
+
347
+ # For assertion scope, validate that only assertion-related change types are provided
348
+ if assertion_scope:
349
+ invalid_types = [
350
+ ect
351
+ for ect in entity_change_types
352
+ if ect not in ASSERTION_RELATED_ENTITY_CHANGE_TYPES
353
+ ]
354
+ if invalid_types:
355
+ raise SdkUsageError(
356
+ f"For assertion subscriptions, only assertion-related change types are allowed. "
357
+ f"Invalid types: {invalid_types}. "
358
+ f"Valid types: {list(ASSERTION_RELATED_ENTITY_CHANGE_TYPES)}"
359
+ )
360
+
361
+ return entity_change_types
362
+
363
+ # If no specific change types are provided, return defaults based on scope
364
+ if assertion_scope:
365
+ return list(ASSERTION_RELATED_ENTITY_CHANGE_TYPES)
366
+ else:
367
+ return list(ALL_EXISTING_ENTITY_CHANGE_TYPES)
368
+
369
+ def _create_audit_stamp(self) -> models.AuditStampClass:
370
+ """Create an audit stamp with current timestamp and default actor."""
371
+ return models.AuditStampClass(
372
+ make_ts_millis(datetime.now(tz=timezone.utc)),
373
+ actor=DEFAULT_ACTOR_URN, # TODO: Replace with actual actor URN from token if available
374
+ )
375
+
376
+ def _merge_entity_change_types(
377
+ self,
378
+ existing_change_types: Optional[List[models.EntityChangeDetailsClass]],
379
+ new_change_type_strs: List[str],
380
+ new_assertion_urn: Optional[AssertionUrn] = None,
381
+ ) -> List[models.EntityChangeDetailsClass]:
382
+ """Merge existing entity change types with new ones, avoiding duplicates.
383
+
384
+ Args:
385
+ existing_change_types: Existing entity change types from the subscription.
386
+ Can be None when creating a new subscription.
387
+ new_change_type_strs: New entity change type strings to add
388
+ new_assertion_urn: Optional Assertion URN to associate with the new change types
389
+
390
+ Returns:
391
+ List of EntityChangeDetailsClass with merged change types
392
+
393
+ Note:
394
+ This method does not modify existing_change_types in-place; it returns a new list.
395
+ """
396
+ assert len(new_change_type_strs) > 0, (
397
+ "new_change_type_strs cannot be empty, worse case we have the default values"
398
+ )
399
+
400
+ existing_change_type_str_map_filters: Dict[
401
+ str, Optional[models.EntityChangeDetailsFilterClass]
402
+ ] = (
403
+ {
404
+ # ect.entityChangeType: Union[str, EntityChangeTypeClass]; EntityChangeTypeClass is cosmetic, just a decorator
405
+ str(ect.entityChangeType): ect.filter
406
+ for ect in existing_change_types
407
+ }
408
+ if existing_change_types
409
+ else {}
410
+ )
411
+
412
+ # Combine existing and new change types (avoid duplicates)
413
+ all_change_types = set(existing_change_type_str_map_filters.keys()).union(
414
+ set(new_change_type_strs)
415
+ )
416
+
417
+ return [
418
+ models.EntityChangeDetailsClass(
419
+ entityChangeType=ect,
420
+ filter=self._merge_entity_change_types_filter(
421
+ existing_filter=existing_change_type_str_map_filters.get(ect),
422
+ # Apply new assertion URN to change types that are being newly added or re-specified
423
+ new_assertion_urn=new_assertion_urn
424
+ if ect in new_change_type_strs
425
+ else None,
426
+ ),
427
+ )
428
+ for ect in all_change_types
429
+ ]
430
+
431
+ def _merge_entity_change_types_filter(
432
+ self,
433
+ existing_filter: Optional[models.EntityChangeDetailsFilterClass],
434
+ new_assertion_urn: Optional[AssertionUrn] = None,
435
+ ) -> Optional[models.EntityChangeDetailsFilterClass]:
436
+ """Merge existing filter with new assertion URN if provided.
437
+
438
+ Args:
439
+ existing_filter: Existing filter from the subscription
440
+ new_assertion_urn: New assertion URN to add to the filter
441
+
442
+ Returns:
443
+ Merged filter with new assertion URN if provided, otherwise returns existing filter.
444
+ """
445
+ if not existing_filter:
446
+ # if new assertion, create a new filter with it, otherwise return None
447
+ return (
448
+ models.EntityChangeDetailsFilterClass(
449
+ includeAssertions=[new_assertion_urn.urn()]
450
+ )
451
+ if new_assertion_urn
452
+ else None
453
+ )
454
+
455
+ if not new_assertion_urn:
456
+ # If no new assertion URN, just return the existing filter, None or whatever it is
457
+ return existing_filter
458
+
459
+ assert existing_filter is not None and new_assertion_urn is not None
460
+
461
+ if (
462
+ existing_filter.includeAssertions is None
463
+ or len(existing_filter.includeAssertions) == 0
464
+ ):
465
+ # An existing filter with empty includeAssertions is weird, but we handle it just in case
466
+ existing_filter.includeAssertions = [new_assertion_urn.urn()]
467
+ return existing_filter
468
+
469
+ assert len(existing_filter.includeAssertions) > 0
470
+
471
+ if new_assertion_urn.urn() not in existing_filter.includeAssertions:
472
+ # Only added if not present already
473
+ existing_filter.includeAssertions.append(new_assertion_urn.urn())
474
+
475
+ return existing_filter
476
+
477
+ def _remove_change_types(
478
+ self,
479
+ existing_change_types: List[models.EntityChangeDetailsClass],
480
+ change_types_to_remove: List[str],
481
+ ) -> List[models.EntityChangeDetailsClass]:
482
+ """Remove specified change types from subscription, returning a new list.
483
+
484
+ Args:
485
+ existing_change_types: Current entity change types from the subscription.
486
+ Never None since this method is only called for existing subscriptions.
487
+ change_types_to_remove: List of change type strings to remove (must not be empty)
488
+
489
+ Returns:
490
+ New list of EntityChangeDetailsClass with specified change types removed
491
+
492
+ Note:
493
+ This method does not modify existing_change_types in-place; it returns a new list.
494
+ """
495
+ assert len(change_types_to_remove) > 0, (
496
+ "change_types_to_remove cannot be empty, worse case we have the default values"
497
+ )
498
+ assert len(existing_change_types) > 0, (
499
+ "Subscription must have at least one change type (no model restriction but business rule)"
500
+ )
501
+
502
+ change_types_to_remove_set = set(change_types_to_remove)
503
+ existing_change_types_set = {
504
+ str(ect.entityChangeType) for ect in existing_change_types
505
+ }
506
+
507
+ # Warn about change types that don't exist in the subscription
508
+ nonexistent_change_types = (
509
+ change_types_to_remove_set - existing_change_types_set
510
+ )
511
+ if nonexistent_change_types:
512
+ logger.warning(
513
+ f"The following change types do not exist in the subscription and will be ignored: {sorted(nonexistent_change_types)}"
514
+ )
515
+
516
+ return [
517
+ ect
518
+ for ect in existing_change_types
519
+ if str(ect.entityChangeType) not in change_types_to_remove_set
520
+ ]
521
+
522
+ def _maybe_parse_urn(
523
+ self, urn: Union[str, DatasetUrn, AssertionUrn]
524
+ ) -> Union[DatasetUrn, AssertionUrn]:
525
+ """Parse URN string into appropriate URN object if needed.
526
+
527
+ Args:
528
+ urn: String URN or URN object (DatasetUrn or AssertionUrn)
529
+
530
+ Returns:
531
+ Parsed URN object (DatasetUrn or AssertionUrn)
532
+
533
+ Raises:
534
+ SdkUsageError: If the URN string format is unsupported entity type
535
+ """
536
+ if isinstance(urn, (DatasetUrn, AssertionUrn)):
537
+ return urn
538
+
539
+ # Try to determine URN type from string format
540
+ if ":dataset:" in urn:
541
+ return DatasetUrn.from_string(urn)
542
+ elif ":assertion:" in urn:
543
+ return AssertionUrn.from_string(urn)
544
+ else:
545
+ raise SdkUsageError(
546
+ f"Unsupported URN type. Only dataset and assertion URNs are supported, got: {urn}"
547
+ )
548
+
549
+ def _fetch_dataset_from_assertion(
550
+ self, assertion_urn: AssertionUrn
551
+ ) -> Tuple[DatasetUrn, AssertionUrn]:
552
+ assertion = self.client.entities.get(assertion_urn)
553
+ if assertion is None:
554
+ raise SdkUsageError(f"Assertion {assertion_urn} not found.")
555
+
556
+ assert isinstance(assertion, Assertion), (
557
+ f"Expected Assertion entity type for assertion urn={assertion_urn}"
558
+ )
559
+ return (assertion.dataset, assertion_urn)
560
+
561
+
562
+ def _print_experimental_warning() -> None:
563
+ print(
564
+ "Warning: The subscriptions client is experimental and under heavy development. Expect breaking changes."
565
+ )
@@ -1,6 +1,14 @@
1
1
  import logging
2
+ import time
2
3
  from typing import Dict, Iterable, List, Optional
3
4
 
5
+ from tenacity import (
6
+ retry,
7
+ retry_if_exception_type,
8
+ stop_after_attempt,
9
+ wait_exponential,
10
+ )
11
+
4
12
  from datahub.configuration import ConfigModel
5
13
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
6
14
  from datahub.ingestion.api.common import PipelineContext
@@ -21,7 +29,7 @@ logger = logging.getLogger(__name__)
21
29
 
22
30
 
23
31
  class ActionRequestOwnerSourceConfig(ConfigModel):
24
- batch_size: int = 20
32
+ batch_size: int = 200
25
33
 
26
34
 
27
35
  class ActionRequestOwnerSourceReport(SourceReport):
@@ -30,7 +38,7 @@ class ActionRequestOwnerSourceReport(SourceReport):
30
38
  correct_assignees_not_found = 0
31
39
  correct_proposal_owners = 0
32
40
  incorrect_proposal_owners = 0
33
- missing_entity_owners = 0
41
+ missing_entity = 0
34
42
  action_request_info_not_found = 0
35
43
 
36
44
 
@@ -73,6 +81,14 @@ class ActionRequestOwnerSource(Source):
73
81
  self.report = ActionRequestOwnerSourceReport()
74
82
  self.graph = ctx.require_graph("Proposal Owner source")
75
83
  self.event_not_produced_warn = False
84
+ self.last_print_time = time.time()
85
+
86
+ def _print_report(self) -> None:
87
+ time_taken = round(time.time() - self.last_print_time, 1)
88
+ # Print report every 2 minutes
89
+ if time_taken > 120:
90
+ self.last_print_time = time.time()
91
+ logger.info(f"\n{self.report.as_string()}")
76
92
 
77
93
  def _process_action_request(
78
94
  self, action_request: Dict
@@ -82,8 +98,12 @@ class ActionRequestOwnerSource(Source):
82
98
  action_type = action_request.get("type")
83
99
  action_request_entity = action_request.get("entity")
84
100
  if action_request_entity is None:
85
- logger.error(f"Action request entity not found for {action_request_urn}")
86
- self.report.missing_entity_owners += 1
101
+ self.report.failure(
102
+ title="Action request entity not found",
103
+ message="Action request entity not found",
104
+ context=str(action_request_urn),
105
+ )
106
+ self.report.missing_entity += 1
87
107
  return None
88
108
  resource_urn = action_request_entity.get("urn")
89
109
  sub_resource = action_request.get("subResource")
@@ -134,8 +154,10 @@ class ActionRequestOwnerSource(Source):
134
154
  )
135
155
  if action_request_info is None:
136
156
  self.report.action_request_info_not_found += 1
137
- logger.error(
138
- f"Action request info not found for action request {action_request_urn}"
157
+ self.report.failure(
158
+ title="Action request info not found for action request",
159
+ message="Action request info not found for action request",
160
+ context=str(action_request_urn),
139
161
  )
140
162
  return None
141
163
  action_request_info.assignedUsers = correct_users
@@ -145,6 +167,12 @@ class ActionRequestOwnerSource(Source):
145
167
  entityUrn=action_request_urn, aspect=action_request_info
146
168
  )
147
169
 
170
+ @retry(
171
+ retry=retry_if_exception_type(ConnectionError),
172
+ stop=stop_after_attempt(3),
173
+ wait=wait_exponential(multiplier=1, min=4, max=10),
174
+ reraise=True,
175
+ )
148
176
  def _get_action_requests(self, start: int) -> List:
149
177
  list_action_requests = self.graph.execute_graphql(
150
178
  query=ACTION_REQUESTS,
@@ -166,10 +194,12 @@ class ActionRequestOwnerSource(Source):
166
194
  def get_workunits(self) -> Iterable[MetadataWorkUnit]:
167
195
  start = 0
168
196
  while True:
197
+ logger.info(f"Fetching action requests starting from {start}")
169
198
  action_requests = self._get_action_requests(start)
170
199
  if len(action_requests) == 0:
171
200
  break
172
201
  for action_request in action_requests:
202
+ self._print_report()
173
203
  result = self._process_action_request(action_request)
174
204
  if result is not None:
175
205
  yield result.as_workunit()