acryl-datahub-cloud 0.3.11.1rc7__py3-none-any.whl → 0.3.12rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/_sdk_extras/__init__.py +17 -2
- acryl_datahub_cloud/_sdk_extras/assertion.py +603 -8
- acryl_datahub_cloud/_sdk_extras/assertion_input.py +1074 -0
- acryl_datahub_cloud/_sdk_extras/assertions_client.py +705 -11
- acryl_datahub_cloud/_sdk_extras/entities/__init__.py +0 -0
- acryl_datahub_cloud/_sdk_extras/entities/assertion.py +425 -0
- acryl_datahub_cloud/_sdk_extras/entities/monitor.py +291 -0
- acryl_datahub_cloud/_sdk_extras/entities/subscription.py +84 -0
- acryl_datahub_cloud/_sdk_extras/errors.py +34 -0
- acryl_datahub_cloud/_sdk_extras/resolver_client.py +39 -0
- acryl_datahub_cloud/_sdk_extras/subscription_client.py +565 -0
- acryl_datahub_cloud/action_request/action_request_owner_source.py +36 -6
- acryl_datahub_cloud/metadata/_urns/urn_defs.py +2023 -2023
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +2 -2
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +4 -0
- acryl_datahub_cloud/metadata/schema.avsc +24889 -25252
- acryl_datahub_cloud/metadata/schema_classes.py +1133 -1008
- acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +189 -201
- acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/AssertionKey.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/{AssertionSummary.avsc → AssertionRunSummary.avsc} +2 -2
- acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/FormKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +27 -0
- acryl_datahub_cloud/metadata/schemas/MonitorAnomalyEvent.avsc +21 -9
- acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +12 -4
- acryl_datahub_cloud/metadata/schemas/MonitorSuiteInfo.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/Operation.avsc +17 -0
- acryl_datahub_cloud/metadata/schemas/SubscriptionInfo.avsc +3 -3
- acryl_datahub_cloud/metadata/schemas/__init__.py +3 -3
- {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/METADATA +46 -46
- {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/RECORD +37 -28
- {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/WHEEL +1 -1
- {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,565 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from datetime import datetime, timezone
|
|
3
|
+
from typing import Dict, List, Optional, Tuple, Union
|
|
4
|
+
|
|
5
|
+
from typing_extensions import TypeAlias
|
|
6
|
+
|
|
7
|
+
import datahub.metadata.schema_classes as models
|
|
8
|
+
from acryl_datahub_cloud._sdk_extras.entities.assertion import Assertion
|
|
9
|
+
from acryl_datahub_cloud._sdk_extras.entities.subscription import Subscription
|
|
10
|
+
from datahub.emitter.enum_helpers import get_enum_options
|
|
11
|
+
from datahub.emitter.mce_builder import make_ts_millis
|
|
12
|
+
from datahub.errors import SdkUsageError
|
|
13
|
+
from datahub.metadata.urns import AssertionUrn, CorpGroupUrn, CorpUserUrn, DatasetUrn
|
|
14
|
+
from datahub.sdk._utils import DEFAULT_ACTOR_URN
|
|
15
|
+
from datahub.sdk.main_client import DataHubClient
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
SubscriberInputType: TypeAlias = Union[CorpUserUrn, CorpGroupUrn]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
ASSERTION_RELATED_ENTITY_CHANGE_TYPES = {
|
|
23
|
+
models.EntityChangeTypeClass.ASSERTION_PASSED,
|
|
24
|
+
models.EntityChangeTypeClass.ASSERTION_FAILED,
|
|
25
|
+
models.EntityChangeTypeClass.ASSERTION_ERROR,
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
ALL_EXISTING_ENTITY_CHANGE_TYPES = {
|
|
29
|
+
getattr(models.EntityChangeTypeClass, attr)
|
|
30
|
+
for attr in dir(models.EntityChangeTypeClass)
|
|
31
|
+
if not attr.startswith("_")
|
|
32
|
+
and isinstance(getattr(models.EntityChangeTypeClass, attr), str)
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class SubscriptionClient:
|
|
37
|
+
"""
|
|
38
|
+
A client for managing subscriptions to entity changes in Acryl DataHub Cloud.
|
|
39
|
+
|
|
40
|
+
Subscriptions can be created at two granularity levels for different entity change types:
|
|
41
|
+
- Dataset level: Affects all assertions associated with the dataset
|
|
42
|
+
- Assertion level: Affects only the specific assertion and overrides any dataset-level subscriptions
|
|
43
|
+
|
|
44
|
+
Notes:
|
|
45
|
+
- This implementation currently returns low-level Subscription entities from the entities
|
|
46
|
+
submodule. In future versions, this may be replaced with a higher-level Subscription abstraction
|
|
47
|
+
for improved usability.
|
|
48
|
+
- The client is designed to work with both datasets and assertions, but currently only supports
|
|
49
|
+
datasets. Assertions will be supported in future versions.
|
|
50
|
+
- Only ENTITY_CHANGE subscription types is supported.
|
|
51
|
+
- No notificationConfig is set
|
|
52
|
+
- This client is experimental and under heavy development. Expect breaking changes.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(self, client: DataHubClient):
|
|
56
|
+
self.client = client
|
|
57
|
+
_print_experimental_warning()
|
|
58
|
+
|
|
59
|
+
def subscribe(
|
|
60
|
+
self,
|
|
61
|
+
*,
|
|
62
|
+
urn: Union[str, DatasetUrn, AssertionUrn],
|
|
63
|
+
subscriber_urn: SubscriberInputType,
|
|
64
|
+
entity_change_types: Optional[List[str]] = None,
|
|
65
|
+
) -> None:
|
|
66
|
+
"""
|
|
67
|
+
Create a subscription to receive notifications for entity changes.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
urn: The URN (string or URN object) of the dataset or assertion to subscribe to.
|
|
71
|
+
For datasets: subscription applies to all assertions on the dataset.
|
|
72
|
+
For assertions: subscription applies only to that specific assertion.
|
|
73
|
+
subscriber_urn: The URN of the user or group that will receive notifications.
|
|
74
|
+
entity_change_types: Specific change types to subscribe to. If None, defaults are:
|
|
75
|
+
- Dataset: all existing change types
|
|
76
|
+
- Assertion: assertion-related types (ASSERTION_PASSED,
|
|
77
|
+
ASSERTION_FAILED, ASSERTION_ERROR)
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
None
|
|
81
|
+
|
|
82
|
+
Raises:
|
|
83
|
+
SdkUsageError: If URN format is invalid, entity not found, or empty change types list.
|
|
84
|
+
SdkUsageError: For assertion subscription - if non-assertion-related change types
|
|
85
|
+
are provided (only ASSERTION_PASSED, ASSERTION_FAILED, ASSERTION_ERROR allowed).
|
|
86
|
+
"""
|
|
87
|
+
_print_experimental_warning()
|
|
88
|
+
|
|
89
|
+
# Parse URN string if needed
|
|
90
|
+
parsed_urn = self._maybe_parse_urn(urn)
|
|
91
|
+
|
|
92
|
+
dataset_urn: DatasetUrn
|
|
93
|
+
assertion_urn: Optional[AssertionUrn]
|
|
94
|
+
dataset_urn, assertion_urn = (
|
|
95
|
+
(parsed_urn, None)
|
|
96
|
+
if isinstance(parsed_urn, DatasetUrn)
|
|
97
|
+
else self._fetch_dataset_from_assertion(parsed_urn)
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
logger.info(
|
|
101
|
+
f"Subscribing to dataset={dataset_urn} assertion={assertion_urn} for subscriber={subscriber_urn} with change types: {entity_change_types}"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Get entity change types (use all if none provided)
|
|
105
|
+
entity_change_type_strs = self._get_entity_change_types(
|
|
106
|
+
assertion_scope=assertion_urn is not None,
|
|
107
|
+
entity_change_types=entity_change_types,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
existing_subscriptions = self.client.resolve.subscription( # type: ignore[attr-defined]
|
|
111
|
+
entity_urn=dataset_urn.urn(),
|
|
112
|
+
actor_urn=subscriber_urn.urn(),
|
|
113
|
+
)
|
|
114
|
+
if not existing_subscriptions:
|
|
115
|
+
# new subscription
|
|
116
|
+
subscription = Subscription(
|
|
117
|
+
info=models.SubscriptionInfoClass(
|
|
118
|
+
entityUrn=dataset_urn.urn(),
|
|
119
|
+
actorUrn=subscriber_urn.urn(),
|
|
120
|
+
actorType=CorpUserUrn.ENTITY_TYPE
|
|
121
|
+
if isinstance(subscriber_urn, CorpUserUrn)
|
|
122
|
+
else CorpGroupUrn.ENTITY_TYPE,
|
|
123
|
+
types=[
|
|
124
|
+
models.SubscriptionTypeClass.ENTITY_CHANGE,
|
|
125
|
+
],
|
|
126
|
+
entityChangeTypes=self._merge_entity_change_types(
|
|
127
|
+
existing_change_types=None,
|
|
128
|
+
new_change_type_strs=entity_change_type_strs,
|
|
129
|
+
new_assertion_urn=assertion_urn,
|
|
130
|
+
),
|
|
131
|
+
createdOn=self._create_audit_stamp(),
|
|
132
|
+
updatedOn=self._create_audit_stamp(),
|
|
133
|
+
),
|
|
134
|
+
)
|
|
135
|
+
self.client.entities.upsert(subscription)
|
|
136
|
+
logger.info(f"Subscription created: {subscription.urn}")
|
|
137
|
+
return
|
|
138
|
+
elif len(existing_subscriptions) == 1:
|
|
139
|
+
# update existing subscription
|
|
140
|
+
subscription_urn = existing_subscriptions[0]
|
|
141
|
+
existing_subscription_entity = self.client.entities.get(subscription_urn)
|
|
142
|
+
assert isinstance(existing_subscription_entity, Subscription), (
|
|
143
|
+
f"Expected Subscription entity type for subscription urn={subscription_urn}"
|
|
144
|
+
)
|
|
145
|
+
logger.info(
|
|
146
|
+
f"Found existing subscription to be updated: {existing_subscription_entity.urn}"
|
|
147
|
+
)
|
|
148
|
+
existing_subscription_entity.info.entityChangeTypes = self._merge_entity_change_types(
|
|
149
|
+
existing_change_types=existing_subscription_entity.info.entityChangeTypes,
|
|
150
|
+
new_change_type_strs=entity_change_type_strs,
|
|
151
|
+
new_assertion_urn=assertion_urn,
|
|
152
|
+
)
|
|
153
|
+
existing_subscription_entity.info.updatedOn = self._create_audit_stamp()
|
|
154
|
+
self.client.entities.upsert(existing_subscription_entity)
|
|
155
|
+
logger.info(f"Subscription updated: {existing_subscription_entity.urn}")
|
|
156
|
+
return
|
|
157
|
+
else:
|
|
158
|
+
raise SdkUsageError(
|
|
159
|
+
f"We have a mesh here - {len(existing_subscriptions)} subscriptions found for dataset={dataset_urn} assertion={assertion_urn} and subscriber={subscriber_urn}!"
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
def list_subscriptions(
|
|
163
|
+
self,
|
|
164
|
+
*,
|
|
165
|
+
urn: Union[str, DatasetUrn, AssertionUrn],
|
|
166
|
+
entity_change_types: Optional[List[models.EntityChangeTypeClass]] = None,
|
|
167
|
+
subscriber_urn: Optional[SubscriberInputType] = None,
|
|
168
|
+
) -> List[Subscription]:
|
|
169
|
+
"""
|
|
170
|
+
Retrieve existing subscriptions for a dataset or assertion.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
urn: The URN of the dataset or assertion to query subscriptions for.
|
|
174
|
+
entity_change_types: Optional filter to return only subscriptions for specific
|
|
175
|
+
change types. If None, returns subscriptions for all change types.
|
|
176
|
+
subscriber_urn: Optional filter to return only subscriptions for a specific user
|
|
177
|
+
or group. If None, returns subscriptions for all subscribers.
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
List[Subscription]: List of matching subscription objects.
|
|
181
|
+
"""
|
|
182
|
+
_print_experimental_warning()
|
|
183
|
+
logger.info(
|
|
184
|
+
f"Listing subscriptions for {urn} with change types: {entity_change_types} and subscriber: {subscriber_urn}"
|
|
185
|
+
)
|
|
186
|
+
# TODO: Implement the actual logic to retrieve subscriptions.
|
|
187
|
+
return [Subscription(**{})]
|
|
188
|
+
|
|
189
|
+
def unsubscribe(
|
|
190
|
+
self,
|
|
191
|
+
*,
|
|
192
|
+
urn: Union[str, DatasetUrn, AssertionUrn],
|
|
193
|
+
subscriber_urn: SubscriberInputType,
|
|
194
|
+
entity_change_types: Optional[List[models.EntityChangeTypeClass]] = None,
|
|
195
|
+
) -> None:
|
|
196
|
+
"""
|
|
197
|
+
Remove subscriptions for entity change notifications.
|
|
198
|
+
|
|
199
|
+
This method supports selective unsubscription based on subscriber and change types.
|
|
200
|
+
The behavior varies depending on whether the target is a dataset or assertion:
|
|
201
|
+
|
|
202
|
+
**Dataset unsubscription:**
|
|
203
|
+
- Removes specified change types from the subscription
|
|
204
|
+
- If no change types specified, removes all existing change types
|
|
205
|
+
- Deletes entire subscription if no change types remain
|
|
206
|
+
|
|
207
|
+
**Assertion unsubscription:**
|
|
208
|
+
- Removes assertion from specified change type filters
|
|
209
|
+
- If no change types specified, removes assertion from all assertion-related change types
|
|
210
|
+
(ASSERTION_PASSED, ASSERTION_FAILED, ASSERTION_ERROR)
|
|
211
|
+
- Deletes change type if no assertions remain in filter
|
|
212
|
+
(prevents assertion-level subscription from silently upgrading to dataset-level)
|
|
213
|
+
- Deletes entire subscription if no change types remain
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
urn: URN (string or URN object) of the dataset or assertion to unsubscribe from.
|
|
217
|
+
subscriber_urn: User or group URN to unsubscribe.
|
|
218
|
+
entity_change_types: Specific change types to remove. If None, defaults are:
|
|
219
|
+
- Dataset: all existing change types in the subscription
|
|
220
|
+
- Assertion: assertion-related types (ASSERTION_PASSED,
|
|
221
|
+
ASSERTION_FAILED, ASSERTION_ERROR)
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
None
|
|
225
|
+
|
|
226
|
+
Raises:
|
|
227
|
+
SdkUsageError: If URN format is invalid, entity not found, or empty change types list.
|
|
228
|
+
SdkUsageError: For assertion unsubscription - if assertion not included in specified
|
|
229
|
+
change types, or if not assertion-related change types (ASSERTION_PASSED, ASSERTION_FAILED,
|
|
230
|
+
ASSERTION_ERROR) are provided.
|
|
231
|
+
|
|
232
|
+
Note:
|
|
233
|
+
This method is experimental and may change in future versions.
|
|
234
|
+
"""
|
|
235
|
+
_print_experimental_warning()
|
|
236
|
+
|
|
237
|
+
# Parse URN string if needed
|
|
238
|
+
parsed_urn = self._maybe_parse_urn(urn)
|
|
239
|
+
|
|
240
|
+
# For assertion case, fail as requested
|
|
241
|
+
if isinstance(parsed_urn, AssertionUrn):
|
|
242
|
+
raise SdkUsageError(
|
|
243
|
+
"Assertion unsubscription is not yet implemented. Only dataset unsubscription is currently supported."
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
dataset_urn: DatasetUrn
|
|
247
|
+
assertion_urn: Optional[AssertionUrn]
|
|
248
|
+
dataset_urn, assertion_urn = (
|
|
249
|
+
(parsed_urn, None)
|
|
250
|
+
if isinstance(parsed_urn, DatasetUrn)
|
|
251
|
+
else self._fetch_dataset_from_assertion(parsed_urn) # type: ignore[arg-type] # TODO: Remove when assertion unsubscribe is implemented
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
logger.info(
|
|
255
|
+
f"Unsubscribing from dataset={dataset_urn} for subscriber={subscriber_urn} with change types: {entity_change_types}"
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
# Find existing subscription
|
|
259
|
+
existing_subscription_urns = self.client.resolve.subscription( # type: ignore[attr-defined]
|
|
260
|
+
entity_urn=dataset_urn.urn(),
|
|
261
|
+
actor_urn=subscriber_urn.urn(),
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
if not existing_subscription_urns:
|
|
265
|
+
logger.info(
|
|
266
|
+
f"No subscription found for dataset={dataset_urn} and subscriber={subscriber_urn}"
|
|
267
|
+
)
|
|
268
|
+
return
|
|
269
|
+
elif len(existing_subscription_urns) > 1:
|
|
270
|
+
raise SdkUsageError(
|
|
271
|
+
f"Multiple subscriptions found for dataset={dataset_urn} and subscriber={subscriber_urn}. "
|
|
272
|
+
f"Expected at most 1, got {len(existing_subscription_urns)}"
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
subscription_urn = existing_subscription_urns[0]
|
|
276
|
+
subscription_entity = self.client.entities.get(subscription_urn)
|
|
277
|
+
assert isinstance(subscription_entity, Subscription), (
|
|
278
|
+
f"Expected Subscription entity type for subscription urn={subscription_urn}"
|
|
279
|
+
)
|
|
280
|
+
logger.info(
|
|
281
|
+
f"Found existing subscription to be updated: {subscription_entity.urn}"
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
# Get the change types to remove (validated input or defaults)
|
|
285
|
+
change_types_to_remove = self._get_entity_change_types(
|
|
286
|
+
assertion_scope=assertion_urn is not None,
|
|
287
|
+
entity_change_types=[str(ect) for ect in entity_change_types]
|
|
288
|
+
if entity_change_types
|
|
289
|
+
else None,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
# Remove the specified change types
|
|
293
|
+
if subscription_entity.info.entityChangeTypes is None:
|
|
294
|
+
raise SdkUsageError(
|
|
295
|
+
f"Subscription {subscription_entity.urn} has no change types to remove"
|
|
296
|
+
)
|
|
297
|
+
updated_change_types = self._remove_change_types(
|
|
298
|
+
subscription_entity.info.entityChangeTypes, change_types_to_remove
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
# If no change types remain, delete the subscription
|
|
302
|
+
if not updated_change_types:
|
|
303
|
+
logger.info(
|
|
304
|
+
f"No change types remain, deleting subscription: {subscription_entity.urn}"
|
|
305
|
+
)
|
|
306
|
+
self.client.entities.delete(subscription_entity.urn)
|
|
307
|
+
return
|
|
308
|
+
|
|
309
|
+
# Update the subscription with remaining change types
|
|
310
|
+
subscription_entity.info.entityChangeTypes = updated_change_types
|
|
311
|
+
subscription_entity.info.updatedOn = self._create_audit_stamp()
|
|
312
|
+
self.client.entities.upsert(subscription_entity)
|
|
313
|
+
logger.info(f"Subscription updated: {subscription_entity.urn}")
|
|
314
|
+
|
|
315
|
+
def _get_entity_change_types(
|
|
316
|
+
self,
|
|
317
|
+
assertion_scope: bool,
|
|
318
|
+
entity_change_types: Optional[List[str]] = None,
|
|
319
|
+
) -> List[str]:
|
|
320
|
+
"""Get entity change types with validation and defaults.
|
|
321
|
+
|
|
322
|
+
Args:
|
|
323
|
+
assertion_scope: True for assertion subscriptions, False for dataset subscriptions.
|
|
324
|
+
entity_change_types: Specific change types to validate. If None, returns defaults.
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
List of validated entity change types. Defaults are:
|
|
328
|
+
- Dataset: all available change types
|
|
329
|
+
- Assertion: assertion-related types (ASSERTION_PASSED, ASSERTION_FAILED, ASSERTION_ERROR)
|
|
330
|
+
|
|
331
|
+
Raises:
|
|
332
|
+
SdkUsageError: If entity_change_types is an empty list.
|
|
333
|
+
SdkUsageError: If invalid change types provided or assertion scope receives
|
|
334
|
+
non-assertion change types.
|
|
335
|
+
"""
|
|
336
|
+
if entity_change_types is not None:
|
|
337
|
+
if len(entity_change_types) == 0:
|
|
338
|
+
raise SdkUsageError("Entity change types cannot be an empty list.")
|
|
339
|
+
|
|
340
|
+
all_options = get_enum_options(models.EntityChangeTypeClass)
|
|
341
|
+
if any([ect not in all_options for ect in entity_change_types]):
|
|
342
|
+
raise SdkUsageError(
|
|
343
|
+
f"Invalid entity change types provided: {entity_change_types}. "
|
|
344
|
+
f"Valid options are: {all_options}"
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
# For assertion scope, validate that only assertion-related change types are provided
|
|
348
|
+
if assertion_scope:
|
|
349
|
+
invalid_types = [
|
|
350
|
+
ect
|
|
351
|
+
for ect in entity_change_types
|
|
352
|
+
if ect not in ASSERTION_RELATED_ENTITY_CHANGE_TYPES
|
|
353
|
+
]
|
|
354
|
+
if invalid_types:
|
|
355
|
+
raise SdkUsageError(
|
|
356
|
+
f"For assertion subscriptions, only assertion-related change types are allowed. "
|
|
357
|
+
f"Invalid types: {invalid_types}. "
|
|
358
|
+
f"Valid types: {list(ASSERTION_RELATED_ENTITY_CHANGE_TYPES)}"
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
return entity_change_types
|
|
362
|
+
|
|
363
|
+
# If no specific change types are provided, return defaults based on scope
|
|
364
|
+
if assertion_scope:
|
|
365
|
+
return list(ASSERTION_RELATED_ENTITY_CHANGE_TYPES)
|
|
366
|
+
else:
|
|
367
|
+
return list(ALL_EXISTING_ENTITY_CHANGE_TYPES)
|
|
368
|
+
|
|
369
|
+
def _create_audit_stamp(self) -> models.AuditStampClass:
|
|
370
|
+
"""Create an audit stamp with current timestamp and default actor."""
|
|
371
|
+
return models.AuditStampClass(
|
|
372
|
+
make_ts_millis(datetime.now(tz=timezone.utc)),
|
|
373
|
+
actor=DEFAULT_ACTOR_URN, # TODO: Replace with actual actor URN from token if available
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
def _merge_entity_change_types(
|
|
377
|
+
self,
|
|
378
|
+
existing_change_types: Optional[List[models.EntityChangeDetailsClass]],
|
|
379
|
+
new_change_type_strs: List[str],
|
|
380
|
+
new_assertion_urn: Optional[AssertionUrn] = None,
|
|
381
|
+
) -> List[models.EntityChangeDetailsClass]:
|
|
382
|
+
"""Merge existing entity change types with new ones, avoiding duplicates.
|
|
383
|
+
|
|
384
|
+
Args:
|
|
385
|
+
existing_change_types: Existing entity change types from the subscription.
|
|
386
|
+
Can be None when creating a new subscription.
|
|
387
|
+
new_change_type_strs: New entity change type strings to add
|
|
388
|
+
new_assertion_urn: Optional Assertion URN to associate with the new change types
|
|
389
|
+
|
|
390
|
+
Returns:
|
|
391
|
+
List of EntityChangeDetailsClass with merged change types
|
|
392
|
+
|
|
393
|
+
Note:
|
|
394
|
+
This method does not modify existing_change_types in-place; it returns a new list.
|
|
395
|
+
"""
|
|
396
|
+
assert len(new_change_type_strs) > 0, (
|
|
397
|
+
"new_change_type_strs cannot be empty, worse case we have the default values"
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
existing_change_type_str_map_filters: Dict[
|
|
401
|
+
str, Optional[models.EntityChangeDetailsFilterClass]
|
|
402
|
+
] = (
|
|
403
|
+
{
|
|
404
|
+
# ect.entityChangeType: Union[str, EntityChangeTypeClass]; EntityChangeTypeClass is cosmetic, just a decorator
|
|
405
|
+
str(ect.entityChangeType): ect.filter
|
|
406
|
+
for ect in existing_change_types
|
|
407
|
+
}
|
|
408
|
+
if existing_change_types
|
|
409
|
+
else {}
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
# Combine existing and new change types (avoid duplicates)
|
|
413
|
+
all_change_types = set(existing_change_type_str_map_filters.keys()).union(
|
|
414
|
+
set(new_change_type_strs)
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
return [
|
|
418
|
+
models.EntityChangeDetailsClass(
|
|
419
|
+
entityChangeType=ect,
|
|
420
|
+
filter=self._merge_entity_change_types_filter(
|
|
421
|
+
existing_filter=existing_change_type_str_map_filters.get(ect),
|
|
422
|
+
# Apply new assertion URN to change types that are being newly added or re-specified
|
|
423
|
+
new_assertion_urn=new_assertion_urn
|
|
424
|
+
if ect in new_change_type_strs
|
|
425
|
+
else None,
|
|
426
|
+
),
|
|
427
|
+
)
|
|
428
|
+
for ect in all_change_types
|
|
429
|
+
]
|
|
430
|
+
|
|
431
|
+
def _merge_entity_change_types_filter(
|
|
432
|
+
self,
|
|
433
|
+
existing_filter: Optional[models.EntityChangeDetailsFilterClass],
|
|
434
|
+
new_assertion_urn: Optional[AssertionUrn] = None,
|
|
435
|
+
) -> Optional[models.EntityChangeDetailsFilterClass]:
|
|
436
|
+
"""Merge existing filter with new assertion URN if provided.
|
|
437
|
+
|
|
438
|
+
Args:
|
|
439
|
+
existing_filter: Existing filter from the subscription
|
|
440
|
+
new_assertion_urn: New assertion URN to add to the filter
|
|
441
|
+
|
|
442
|
+
Returns:
|
|
443
|
+
Merged filter with new assertion URN if provided, otherwise returns existing filter.
|
|
444
|
+
"""
|
|
445
|
+
if not existing_filter:
|
|
446
|
+
# if new assertion, create a new filter with it, otherwise return None
|
|
447
|
+
return (
|
|
448
|
+
models.EntityChangeDetailsFilterClass(
|
|
449
|
+
includeAssertions=[new_assertion_urn.urn()]
|
|
450
|
+
)
|
|
451
|
+
if new_assertion_urn
|
|
452
|
+
else None
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
if not new_assertion_urn:
|
|
456
|
+
# If no new assertion URN, just return the existing filter, None or whatever it is
|
|
457
|
+
return existing_filter
|
|
458
|
+
|
|
459
|
+
assert existing_filter is not None and new_assertion_urn is not None
|
|
460
|
+
|
|
461
|
+
if (
|
|
462
|
+
existing_filter.includeAssertions is None
|
|
463
|
+
or len(existing_filter.includeAssertions) == 0
|
|
464
|
+
):
|
|
465
|
+
# An existing filter with empty includeAssertions is weird, but we handle it just in case
|
|
466
|
+
existing_filter.includeAssertions = [new_assertion_urn.urn()]
|
|
467
|
+
return existing_filter
|
|
468
|
+
|
|
469
|
+
assert len(existing_filter.includeAssertions) > 0
|
|
470
|
+
|
|
471
|
+
if new_assertion_urn.urn() not in existing_filter.includeAssertions:
|
|
472
|
+
# Only added if not present already
|
|
473
|
+
existing_filter.includeAssertions.append(new_assertion_urn.urn())
|
|
474
|
+
|
|
475
|
+
return existing_filter
|
|
476
|
+
|
|
477
|
+
def _remove_change_types(
|
|
478
|
+
self,
|
|
479
|
+
existing_change_types: List[models.EntityChangeDetailsClass],
|
|
480
|
+
change_types_to_remove: List[str],
|
|
481
|
+
) -> List[models.EntityChangeDetailsClass]:
|
|
482
|
+
"""Remove specified change types from subscription, returning a new list.
|
|
483
|
+
|
|
484
|
+
Args:
|
|
485
|
+
existing_change_types: Current entity change types from the subscription.
|
|
486
|
+
Never None since this method is only called for existing subscriptions.
|
|
487
|
+
change_types_to_remove: List of change type strings to remove (must not be empty)
|
|
488
|
+
|
|
489
|
+
Returns:
|
|
490
|
+
New list of EntityChangeDetailsClass with specified change types removed
|
|
491
|
+
|
|
492
|
+
Note:
|
|
493
|
+
This method does not modify existing_change_types in-place; it returns a new list.
|
|
494
|
+
"""
|
|
495
|
+
assert len(change_types_to_remove) > 0, (
|
|
496
|
+
"change_types_to_remove cannot be empty, worse case we have the default values"
|
|
497
|
+
)
|
|
498
|
+
assert len(existing_change_types) > 0, (
|
|
499
|
+
"Subscription must have at least one change type (no model restriction but business rule)"
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
change_types_to_remove_set = set(change_types_to_remove)
|
|
503
|
+
existing_change_types_set = {
|
|
504
|
+
str(ect.entityChangeType) for ect in existing_change_types
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
# Warn about change types that don't exist in the subscription
|
|
508
|
+
nonexistent_change_types = (
|
|
509
|
+
change_types_to_remove_set - existing_change_types_set
|
|
510
|
+
)
|
|
511
|
+
if nonexistent_change_types:
|
|
512
|
+
logger.warning(
|
|
513
|
+
f"The following change types do not exist in the subscription and will be ignored: {sorted(nonexistent_change_types)}"
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
return [
|
|
517
|
+
ect
|
|
518
|
+
for ect in existing_change_types
|
|
519
|
+
if str(ect.entityChangeType) not in change_types_to_remove_set
|
|
520
|
+
]
|
|
521
|
+
|
|
522
|
+
def _maybe_parse_urn(
|
|
523
|
+
self, urn: Union[str, DatasetUrn, AssertionUrn]
|
|
524
|
+
) -> Union[DatasetUrn, AssertionUrn]:
|
|
525
|
+
"""Parse URN string into appropriate URN object if needed.
|
|
526
|
+
|
|
527
|
+
Args:
|
|
528
|
+
urn: String URN or URN object (DatasetUrn or AssertionUrn)
|
|
529
|
+
|
|
530
|
+
Returns:
|
|
531
|
+
Parsed URN object (DatasetUrn or AssertionUrn)
|
|
532
|
+
|
|
533
|
+
Raises:
|
|
534
|
+
SdkUsageError: If the URN string format is unsupported entity type
|
|
535
|
+
"""
|
|
536
|
+
if isinstance(urn, (DatasetUrn, AssertionUrn)):
|
|
537
|
+
return urn
|
|
538
|
+
|
|
539
|
+
# Try to determine URN type from string format
|
|
540
|
+
if ":dataset:" in urn:
|
|
541
|
+
return DatasetUrn.from_string(urn)
|
|
542
|
+
elif ":assertion:" in urn:
|
|
543
|
+
return AssertionUrn.from_string(urn)
|
|
544
|
+
else:
|
|
545
|
+
raise SdkUsageError(
|
|
546
|
+
f"Unsupported URN type. Only dataset and assertion URNs are supported, got: {urn}"
|
|
547
|
+
)
|
|
548
|
+
|
|
549
|
+
def _fetch_dataset_from_assertion(
|
|
550
|
+
self, assertion_urn: AssertionUrn
|
|
551
|
+
) -> Tuple[DatasetUrn, AssertionUrn]:
|
|
552
|
+
assertion = self.client.entities.get(assertion_urn)
|
|
553
|
+
if assertion is None:
|
|
554
|
+
raise SdkUsageError(f"Assertion {assertion_urn} not found.")
|
|
555
|
+
|
|
556
|
+
assert isinstance(assertion, Assertion), (
|
|
557
|
+
f"Expected Assertion entity type for assertion urn={assertion_urn}"
|
|
558
|
+
)
|
|
559
|
+
return (assertion.dataset, assertion_urn)
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
def _print_experimental_warning() -> None:
|
|
563
|
+
print(
|
|
564
|
+
"Warning: The subscriptions client is experimental and under heavy development. Expect breaking changes."
|
|
565
|
+
)
|
|
@@ -1,6 +1,14 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import time
|
|
2
3
|
from typing import Dict, Iterable, List, Optional
|
|
3
4
|
|
|
5
|
+
from tenacity import (
|
|
6
|
+
retry,
|
|
7
|
+
retry_if_exception_type,
|
|
8
|
+
stop_after_attempt,
|
|
9
|
+
wait_exponential,
|
|
10
|
+
)
|
|
11
|
+
|
|
4
12
|
from datahub.configuration import ConfigModel
|
|
5
13
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
6
14
|
from datahub.ingestion.api.common import PipelineContext
|
|
@@ -21,7 +29,7 @@ logger = logging.getLogger(__name__)
|
|
|
21
29
|
|
|
22
30
|
|
|
23
31
|
class ActionRequestOwnerSourceConfig(ConfigModel):
|
|
24
|
-
batch_size: int =
|
|
32
|
+
batch_size: int = 200
|
|
25
33
|
|
|
26
34
|
|
|
27
35
|
class ActionRequestOwnerSourceReport(SourceReport):
|
|
@@ -30,7 +38,7 @@ class ActionRequestOwnerSourceReport(SourceReport):
|
|
|
30
38
|
correct_assignees_not_found = 0
|
|
31
39
|
correct_proposal_owners = 0
|
|
32
40
|
incorrect_proposal_owners = 0
|
|
33
|
-
|
|
41
|
+
missing_entity = 0
|
|
34
42
|
action_request_info_not_found = 0
|
|
35
43
|
|
|
36
44
|
|
|
@@ -73,6 +81,14 @@ class ActionRequestOwnerSource(Source):
|
|
|
73
81
|
self.report = ActionRequestOwnerSourceReport()
|
|
74
82
|
self.graph = ctx.require_graph("Proposal Owner source")
|
|
75
83
|
self.event_not_produced_warn = False
|
|
84
|
+
self.last_print_time = time.time()
|
|
85
|
+
|
|
86
|
+
def _print_report(self) -> None:
|
|
87
|
+
time_taken = round(time.time() - self.last_print_time, 1)
|
|
88
|
+
# Print report every 2 minutes
|
|
89
|
+
if time_taken > 120:
|
|
90
|
+
self.last_print_time = time.time()
|
|
91
|
+
logger.info(f"\n{self.report.as_string()}")
|
|
76
92
|
|
|
77
93
|
def _process_action_request(
|
|
78
94
|
self, action_request: Dict
|
|
@@ -82,8 +98,12 @@ class ActionRequestOwnerSource(Source):
|
|
|
82
98
|
action_type = action_request.get("type")
|
|
83
99
|
action_request_entity = action_request.get("entity")
|
|
84
100
|
if action_request_entity is None:
|
|
85
|
-
|
|
86
|
-
|
|
101
|
+
self.report.failure(
|
|
102
|
+
title="Action request entity not found",
|
|
103
|
+
message="Action request entity not found",
|
|
104
|
+
context=str(action_request_urn),
|
|
105
|
+
)
|
|
106
|
+
self.report.missing_entity += 1
|
|
87
107
|
return None
|
|
88
108
|
resource_urn = action_request_entity.get("urn")
|
|
89
109
|
sub_resource = action_request.get("subResource")
|
|
@@ -134,8 +154,10 @@ class ActionRequestOwnerSource(Source):
|
|
|
134
154
|
)
|
|
135
155
|
if action_request_info is None:
|
|
136
156
|
self.report.action_request_info_not_found += 1
|
|
137
|
-
|
|
138
|
-
|
|
157
|
+
self.report.failure(
|
|
158
|
+
title="Action request info not found for action request",
|
|
159
|
+
message="Action request info not found for action request",
|
|
160
|
+
context=str(action_request_urn),
|
|
139
161
|
)
|
|
140
162
|
return None
|
|
141
163
|
action_request_info.assignedUsers = correct_users
|
|
@@ -145,6 +167,12 @@ class ActionRequestOwnerSource(Source):
|
|
|
145
167
|
entityUrn=action_request_urn, aspect=action_request_info
|
|
146
168
|
)
|
|
147
169
|
|
|
170
|
+
@retry(
|
|
171
|
+
retry=retry_if_exception_type(ConnectionError),
|
|
172
|
+
stop=stop_after_attempt(3),
|
|
173
|
+
wait=wait_exponential(multiplier=1, min=4, max=10),
|
|
174
|
+
reraise=True,
|
|
175
|
+
)
|
|
148
176
|
def _get_action_requests(self, start: int) -> List:
|
|
149
177
|
list_action_requests = self.graph.execute_graphql(
|
|
150
178
|
query=ACTION_REQUESTS,
|
|
@@ -166,10 +194,12 @@ class ActionRequestOwnerSource(Source):
|
|
|
166
194
|
def get_workunits(self) -> Iterable[MetadataWorkUnit]:
|
|
167
195
|
start = 0
|
|
168
196
|
while True:
|
|
197
|
+
logger.info(f"Fetching action requests starting from {start}")
|
|
169
198
|
action_requests = self._get_action_requests(start)
|
|
170
199
|
if len(action_requests) == 0:
|
|
171
200
|
break
|
|
172
201
|
for action_request in action_requests:
|
|
202
|
+
self._print_report()
|
|
173
203
|
result = self._process_action_request(action_request)
|
|
174
204
|
if result is not None:
|
|
175
205
|
yield result.as_workunit()
|