acryl-datahub-cloud 0.3.14.1rc4__py3-none-any.whl → 0.3.15rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (40) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +1 -1
  3. acryl_datahub_cloud/datahub_reporting/datahub_dataset.py +30 -7
  4. acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +1 -1
  5. acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +22 -18
  6. acryl_datahub_cloud/elasticsearch/graph_service.py +23 -9
  7. acryl_datahub_cloud/lineage_features/source.py +77 -6
  8. acryl_datahub_cloud/metadata/_urns/urn_defs.py +60 -0
  9. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/execution/__init__.py +2 -0
  10. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  11. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
  12. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  13. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
  14. acryl_datahub_cloud/metadata/schema.avsc +420 -21
  15. acryl_datahub_cloud/metadata/schema_classes.py +521 -8
  16. acryl_datahub_cloud/metadata/schemas/Actors.avsc +38 -1
  17. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +37 -15
  18. acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +18 -15
  19. acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +19 -15
  20. acryl_datahub_cloud/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  21. acryl_datahub_cloud/metadata/schemas/DataHubFileInfo.avsc +230 -0
  22. acryl_datahub_cloud/metadata/schemas/DataHubFileKey.avsc +21 -0
  23. acryl_datahub_cloud/metadata/schemas/DataHubPageModuleProperties.avsc +3 -1
  24. acryl_datahub_cloud/metadata/schemas/ExecutionRequestArtifactsLocation.avsc +16 -0
  25. acryl_datahub_cloud/metadata/schemas/ExecutionRequestKey.avsc +2 -1
  26. acryl_datahub_cloud/metadata/schemas/GlobalSettingsInfo.avsc +72 -0
  27. acryl_datahub_cloud/metadata/schemas/LineageFeatures.avsc +67 -42
  28. acryl_datahub_cloud/metadata/schemas/LogicalParent.avsc +2 -1
  29. acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +1 -1
  30. acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +24 -15
  31. acryl_datahub_cloud/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  32. acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +22 -6
  33. acryl_datahub_cloud/sdk/assertions_client.py +35 -7
  34. acryl_datahub_cloud/sdk/entities/subscription.py +22 -6
  35. acryl_datahub_cloud/sdk/subscription_client.py +8 -2
  36. {acryl_datahub_cloud-0.3.14.1rc4.dist-info → acryl_datahub_cloud-0.3.15rc0.dist-info}/METADATA +39 -42
  37. {acryl_datahub_cloud-0.3.14.1rc4.dist-info → acryl_datahub_cloud-0.3.15rc0.dist-info}/RECORD +40 -36
  38. {acryl_datahub_cloud-0.3.14.1rc4.dist-info → acryl_datahub_cloud-0.3.15rc0.dist-info}/WHEEL +0 -0
  39. {acryl_datahub_cloud-0.3.14.1rc4.dist-info → acryl_datahub_cloud-0.3.15rc0.dist-info}/entry_points.txt +0 -0
  40. {acryl_datahub_cloud-0.3.14.1rc4.dist-info → acryl_datahub_cloud-0.3.15rc0.dist-info}/top_level.txt +0 -0
@@ -460,6 +460,78 @@
460
460
  "default": null,
461
461
  "doc": "SSO integrations between DataHub and identity providers"
462
462
  },
463
+ {
464
+ "type": [
465
+ "null",
466
+ {
467
+ "type": "record",
468
+ "name": "OAuthSettings",
469
+ "namespace": "com.linkedin.pegasus2avro.settings.global",
470
+ "fields": [
471
+ {
472
+ "type": {
473
+ "type": "array",
474
+ "items": {
475
+ "type": "record",
476
+ "name": "OAuthProvider",
477
+ "namespace": "com.linkedin.pegasus2avro.settings.global",
478
+ "fields": [
479
+ {
480
+ "type": "boolean",
481
+ "name": "enabled",
482
+ "doc": "Whether this OAuth provider is enabled."
483
+ },
484
+ {
485
+ "type": "string",
486
+ "name": "name",
487
+ "doc": "The name of this OAuth provider. This is used for display purposes only."
488
+ },
489
+ {
490
+ "type": [
491
+ "null",
492
+ "string"
493
+ ],
494
+ "name": "jwksUri",
495
+ "default": null,
496
+ "doc": "The URI of the JSON Web Key Set (JWKS) endpoint for this OAuth provider."
497
+ },
498
+ {
499
+ "type": "string",
500
+ "name": "issuer",
501
+ "doc": "The expected issuer (iss) claim in the JWTs issued by this OAuth provider."
502
+ },
503
+ {
504
+ "type": "string",
505
+ "name": "audience",
506
+ "doc": "The expected audience (aud) claim in the JWTs issued by this OAuth provider."
507
+ },
508
+ {
509
+ "type": "string",
510
+ "name": "algorithm",
511
+ "default": "RS256",
512
+ "doc": "The JWT signing algorithm required for this provider.\nPrevents algorithm confusion attacks. Common values: RS256, RS384, RS512, PS256, ES256"
513
+ },
514
+ {
515
+ "type": "string",
516
+ "name": "userIdClaim",
517
+ "default": "sub",
518
+ "doc": "The JWT claim to use as the user identifier for this provider.\nDifferent providers use different claims (sub, email, preferred_username, etc.)"
519
+ }
520
+ ],
521
+ "doc": "An OAuth Provider. This provides information required to validate inbound\nrequests with OAuth 2.0 bearer tokens."
522
+ }
523
+ },
524
+ "name": "providers",
525
+ "doc": "Trusted OAuth Providers"
526
+ }
527
+ ],
528
+ "doc": "Trust oauth providers to use for authentication."
529
+ }
530
+ ],
531
+ "name": "oauth",
532
+ "default": null,
533
+ "doc": "Settings related to the oauth authentication provider"
534
+ },
463
535
  {
464
536
  "type": [
465
537
  "null",
@@ -25,51 +25,76 @@
25
25
  "doc": "Cached number of entities related by lineage, downstream."
26
26
  },
27
27
  {
28
- "type": {
29
- "type": "record",
30
- "name": "AuditStamp",
31
- "namespace": "com.linkedin.pegasus2avro.common",
32
- "fields": [
33
- {
34
- "type": "long",
35
- "name": "time",
36
- "doc": "When did the resource/association/sub-resource move into the specific lifecycle stage represented by this AuditEvent."
37
- },
38
- {
39
- "java": {
40
- "class": "com.linkedin.pegasus2avro.common.urn.Urn"
28
+ "Searchable": {
29
+ "addToFilters": true,
30
+ "fieldName": "hasAssetLevelLineageFeature",
31
+ "fieldType": "BOOLEAN",
32
+ "filterNameOverride": "Has Asset-Level Lineage"
33
+ },
34
+ "type": [
35
+ "null",
36
+ "boolean"
37
+ ],
38
+ "name": "hasAssetLevelLineage",
39
+ "default": null,
40
+ "doc": "Whether upstreamCount > 0 OR downstreamCount > 0\nstored to make filtering easier"
41
+ },
42
+ {
43
+ "Searchable": {
44
+ "/time": {
45
+ "fieldName": "lineageFeaturesComputedAt",
46
+ "fieldType": "DATETIME"
47
+ }
48
+ },
49
+ "type": [
50
+ "null",
51
+ {
52
+ "type": "record",
53
+ "name": "AuditStamp",
54
+ "namespace": "com.linkedin.pegasus2avro.common",
55
+ "fields": [
56
+ {
57
+ "type": "long",
58
+ "name": "time",
59
+ "doc": "When did the resource/association/sub-resource move into the specific lifecycle stage represented by this AuditEvent."
41
60
  },
42
- "type": "string",
43
- "name": "actor",
44
- "doc": "The entity (e.g. a member URN) which will be credited for moving the resource/association/sub-resource into the specific lifecycle stage. It is also the one used to authorize the change.",
45
- "Urn": "Urn"
46
- },
47
- {
48
- "java": {
49
- "class": "com.linkedin.pegasus2avro.common.urn.Urn"
61
+ {
62
+ "java": {
63
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
64
+ },
65
+ "type": "string",
66
+ "name": "actor",
67
+ "doc": "The entity (e.g. a member URN) which will be credited for moving the resource/association/sub-resource into the specific lifecycle stage. It is also the one used to authorize the change.",
68
+ "Urn": "Urn"
50
69
  },
51
- "type": [
52
- "null",
53
- "string"
54
- ],
55
- "name": "impersonator",
56
- "default": null,
57
- "doc": "The entity (e.g. a service URN) which performs the change on behalf of the Actor and must be authorized to act as the Actor.",
58
- "Urn": "Urn"
59
- },
60
- {
61
- "type": [
62
- "null",
63
- "string"
64
- ],
65
- "name": "message",
66
- "default": null,
67
- "doc": "Additional context around how DataHub was informed of the particular change. For example: was the change created by an automated process, or manually."
68
- }
69
- ],
70
- "doc": "Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into a particular lifecycle stage, and who acted to move it into that specific lifecycle stage."
71
- },
70
+ {
71
+ "java": {
72
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
73
+ },
74
+ "type": [
75
+ "null",
76
+ "string"
77
+ ],
78
+ "name": "impersonator",
79
+ "default": null,
80
+ "doc": "The entity (e.g. a service URN) which performs the change on behalf of the Actor and must be authorized to act as the Actor.",
81
+ "Urn": "Urn"
82
+ },
83
+ {
84
+ "type": [
85
+ "null",
86
+ "string"
87
+ ],
88
+ "name": "message",
89
+ "default": null,
90
+ "doc": "Additional context around how DataHub was informed of the particular change. For example: was the change created by an automated process, or manually."
91
+ }
92
+ ],
93
+ "doc": "Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into a particular lifecycle stage, and who acted to move it into that specific lifecycle stage."
94
+ }
95
+ ],
72
96
  "name": "computedAt",
97
+ "default": null,
73
98
  "doc": "Record of when and how lineage features were computed."
74
99
  }
75
100
  ]
@@ -140,5 +140,6 @@
140
140
  "name": "parent",
141
141
  "default": null
142
142
  }
143
- ]
143
+ ],
144
+ "doc": "Relates a physical asset to a logical model."
144
145
  }
@@ -2743,7 +2743,7 @@
2743
2743
  },
2744
2744
  "type": "string",
2745
2745
  "name": "pictureLink",
2746
- "default": "https://raw.githubusercontent.com/datahub-project/datahub/master/datahub-web-react/src/images/default_avatar.png",
2746
+ "default": "assets/platforms/default_avatar.png",
2747
2747
  "doc": "A URL which points to a picture which user wants to set as a profile photo"
2748
2748
  },
2749
2749
  {
@@ -171,6 +171,12 @@
171
171
  "namespace": "com.linkedin.pegasus2avro.monitor",
172
172
  "fields": [
173
173
  {
174
+ "Searchable": {
175
+ "/*/assertion": {
176
+ "fieldName": "assertionUrn",
177
+ "fieldType": "URN"
178
+ }
179
+ },
174
180
  "type": {
175
181
  "type": "array",
176
182
  "items": {
@@ -628,6 +634,7 @@
628
634
  },
629
635
  {
630
636
  "Searchable": {
637
+ "addToFilters": true,
631
638
  "fieldName": "assertionType",
632
639
  "fieldType": "KEYWORD"
633
640
  },
@@ -990,9 +997,6 @@
990
997
  ],
991
998
  "name": "Asserts"
992
999
  },
993
- "Searchable": {
994
- "fieldType": "URN"
995
- },
996
1000
  "java": {
997
1001
  "class": "com.linkedin.pegasus2avro.common.urn.Urn"
998
1002
  },
@@ -1383,9 +1387,6 @@
1383
1387
  ],
1384
1388
  "name": "Asserts"
1385
1389
  },
1386
- "Searchable": {
1387
- "fieldType": "URN"
1388
- },
1389
1390
  "java": {
1390
1391
  "class": "com.linkedin.pegasus2avro.common.urn.Urn"
1391
1392
  },
@@ -1675,9 +1676,6 @@
1675
1676
  ],
1676
1677
  "name": "Asserts"
1677
1678
  },
1678
- "Searchable": {
1679
- "fieldType": "URN"
1680
- },
1681
1679
  "java": {
1682
1680
  "class": "com.linkedin.pegasus2avro.common.urn.Urn"
1683
1681
  },
@@ -1757,9 +1755,6 @@
1757
1755
  ],
1758
1756
  "name": "Asserts"
1759
1757
  },
1760
- "Searchable": {
1761
- "fieldType": "URN"
1762
- },
1763
1758
  "java": {
1764
1759
  "class": "com.linkedin.pegasus2avro.common.urn.Urn"
1765
1760
  },
@@ -2000,9 +1995,6 @@
2000
1995
  ],
2001
1996
  "name": "Asserts"
2002
1997
  },
2003
- "Searchable": {
2004
- "fieldType": "URN"
2005
- },
2006
1998
  "java": {
2007
1999
  "class": "com.linkedin.pegasus2avro.common.urn.Urn"
2008
2000
  },
@@ -3285,6 +3277,23 @@
3285
3277
  "name": "note",
3286
3278
  "default": null,
3287
3279
  "doc": "An optional note to give technical owners more context about the assertion, and how to troubleshoot it.\nThe UI will render this in markdown format."
3280
+ },
3281
+ {
3282
+ "Searchable": {
3283
+ "fieldName": "entity",
3284
+ "fieldType": "URN"
3285
+ },
3286
+ "java": {
3287
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
3288
+ },
3289
+ "type": [
3290
+ "null",
3291
+ "string"
3292
+ ],
3293
+ "name": "entity",
3294
+ "default": null,
3295
+ "doc": "The entity targeted by this assertion. Newly added field, automatically set by mutation",
3296
+ "Urn": "Urn"
3288
3297
  }
3289
3298
  ],
3290
3299
  "doc": "Information about an assertion\n\nAcryl Only: Did you update AssertionWithoutAnnotations.pdl? If not, please update it."
@@ -33,6 +33,15 @@
33
33
  "default": false,
34
34
  "doc": "Whether or not this asset should be displayed in the asset sidebar"
35
35
  },
36
+ {
37
+ "Searchable": {
38
+ "fieldType": "BOOLEAN"
39
+ },
40
+ "type": "boolean",
41
+ "name": "hideInAssetSummaryWhenEmpty",
42
+ "default": false,
43
+ "doc": "Whether or not this asset should be hidden in the asset sidebar (showInAssetSummary should be enabled)\nwhen its value is empty"
44
+ },
36
45
  {
37
46
  "Searchable": {
38
47
  "fieldType": "BOOLEAN"
@@ -3,13 +3,24 @@ This file contains the AssertionInput class and related classes, which are used
3
3
  validate and represent the input for creating an Assertion in DataHub.
4
4
  """
5
5
 
6
+ import inspect
6
7
  import random
7
8
  import string
8
9
  from abc import ABC, abstractmethod
9
10
  from dataclasses import dataclass
10
11
  from datetime import datetime
11
12
  from enum import Enum
12
- from typing import Callable, Literal, Optional, Type, TypeAlias, TypeVar, Union
13
+ from typing import (
14
+ Callable,
15
+ Collection,
16
+ Literal,
17
+ Optional,
18
+ Type,
19
+ TypeAlias,
20
+ TypeVar,
21
+ Union,
22
+ cast,
23
+ )
13
24
 
14
25
  import pydantic
15
26
  import pytz
@@ -305,7 +316,9 @@ class DetectionMechanism:
305
316
  return_value = getattr(
306
317
  DetectionMechanism, detection_mechanism_config.upper()
307
318
  )
308
- if isinstance(return_value, pydantic.main.ModelMetaclass):
319
+ if inspect.isclass(return_value) and issubclass(
320
+ return_value, pydantic.BaseModel
321
+ ):
309
322
  try:
310
323
  # We try to instantiate here to let pydantic raise a helpful error
311
324
  # about which parameters are missing
@@ -1147,10 +1160,13 @@ class _AssertionInput(ABC):
1147
1160
  examples={
1148
1161
  "Tags from string (tag name)": "my_tag_1",
1149
1162
  "Tags from string (tag URN)": "urn:li:tag:my_tag_1",
1150
- "Tags from list (mixed)": [
1151
- "my_tag_1",
1152
- "urn:li:tag:my_tag_2",
1153
- ],
1163
+ "Tags from list (mixed)": cast(
1164
+ Collection[str],
1165
+ [
1166
+ "my_tag_1",
1167
+ "urn:li:tag:my_tag_2",
1168
+ ],
1169
+ ),
1154
1170
  },
1155
1171
  )
1156
1172
 
@@ -67,6 +67,7 @@ from acryl_datahub_cloud.sdk.errors import SDKUsageError
67
67
  from datahub.errors import ItemNotFoundError
68
68
  from datahub.metadata import schema_classes as models
69
69
  from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, MonitorUrn
70
+ from datahub.sdk.search_filters import FilterDsl
70
71
 
71
72
  if TYPE_CHECKING:
72
73
  from datahub.sdk.main_client import DataHubClient
@@ -321,6 +322,7 @@ class AssertionsClient:
321
322
  incident_behavior=incident_behavior,
322
323
  tags=tags,
323
324
  created_by=updated_by,
325
+ enabled=enabled if enabled is not None else True,
324
326
  )
325
327
 
326
328
  # 3. Check for any issues e.g. different dataset urns
@@ -407,6 +409,7 @@ class AssertionsClient:
407
409
  tags=tags,
408
410
  created_by=updated_by,
409
411
  schedule=schedule,
412
+ enabled=enabled if enabled is not None else True,
410
413
  )
411
414
 
412
415
  # 3. Check for any issues e.g. different dataset urns
@@ -494,6 +497,7 @@ class AssertionsClient:
494
497
  schedule=schedule,
495
498
  freshness_schedule_check_type=freshness_schedule_check_type,
496
499
  lookback_window=lookback_window,
500
+ enabled=enabled if enabled is not None else True,
497
501
  )
498
502
 
499
503
  # 3. Check for any issues e.g. different dataset urns
@@ -584,6 +588,7 @@ class AssertionsClient:
584
588
  schedule=schedule,
585
589
  criteria_condition=parsed_criteria.condition,
586
590
  criteria_parameters=parsed_criteria.parameters,
591
+ enabled=enabled if enabled is not None else True,
587
592
  )
588
593
 
589
594
  # 3. Check for any issues e.g. different dataset urns
@@ -686,6 +691,7 @@ class AssertionsClient:
686
691
  tags=tags,
687
692
  created_by=updated_by,
688
693
  schedule=schedule,
694
+ enabled=enabled if enabled is not None else True,
689
695
  )
690
696
 
691
697
  # 3. Check for any issues e.g. different dataset urns
@@ -759,17 +765,37 @@ class AssertionsClient:
759
765
  except ItemNotFoundError:
760
766
  pass
761
767
 
762
- # Get monitor entity
763
- monitor_urn = Monitor._ensure_id(id=(dataset_urn, urn))
768
+ # Get monitor entity by searching for monitors where assertionUrn equals the assertion urn
769
+ monitor_urn: Optional[MonitorUrn] = None
764
770
  maybe_monitor_entity: Optional[Monitor] = None
765
771
  try:
766
- entity = self.client.entities.get(monitor_urn)
767
- if entity is not None:
768
- assert isinstance(entity, Monitor)
769
- maybe_monitor_entity = entity
770
- except ItemNotFoundError:
772
+ # Search for monitor entities with assertionUrn matching the assertion urn
773
+ monitor_filter = FilterDsl.and_(
774
+ FilterDsl.entity_type("monitor"),
775
+ FilterDsl.custom_filter("assertionUrn", "EQUAL", [str(urn)]),
776
+ )
777
+ monitor_urns = list(self.client.search.get_urns(filter=monitor_filter))
778
+
779
+ if monitor_urns:
780
+ # Log if there are multiple monitors found, because this is unexpected
781
+ if len(monitor_urns) > 1:
782
+ logger.warning(
783
+ f"Multiple monitors found for assertion {urn}, which should never happen: {monitor_urns}"
784
+ )
785
+ # Use the first matching monitor
786
+ monitor_urn = MonitorUrn.from_string(str(monitor_urns[0]))
787
+ entity = self.client.entities.get(monitor_urn)
788
+ if entity is not None:
789
+ assert isinstance(entity, Monitor)
790
+ maybe_monitor_entity = entity
791
+ except ItemNotFoundError as e:
792
+ logger.debug(f"Could not find monitor for assertion {urn}: {e}")
771
793
  pass
772
794
 
795
+ # If no monitor found via search, fall back to creating a new monitor with the dataset urn and assertion urn
796
+ if monitor_urn is None:
797
+ monitor_urn = Monitor._ensure_id(id=(dataset_urn, urn))
798
+
773
799
  return maybe_assertion_entity, monitor_urn, maybe_monitor_entity
774
800
 
775
801
  def _merge_smart_freshness_input(
@@ -2749,6 +2775,7 @@ class AssertionsClient:
2749
2775
  incident_behavior=incident_behavior,
2750
2776
  tags=tags,
2751
2777
  created_by=updated_by,
2778
+ enabled=enabled if enabled is not None else True,
2752
2779
  )
2753
2780
 
2754
2781
  # 3. Check for any issues e.g. different dataset urns
@@ -3704,6 +3731,7 @@ class AssertionsClient:
3704
3731
  incident_behavior=incident_behavior,
3705
3732
  tags=tags,
3706
3733
  created_by=updated_by,
3734
+ enabled=enabled if enabled is not None else True,
3707
3735
  )
3708
3736
 
3709
3737
  # 3. Check for any issues e.g. different dataset urns
@@ -1,4 +1,3 @@
1
- import uuid
2
1
  from typing import (
3
2
  Type,
4
3
  Union,
@@ -9,6 +8,7 @@ from typing_extensions import (
9
8
  assert_never,
10
9
  )
11
10
 
11
+ from datahub.emitter.mcp_builder import DatahubKey
12
12
  from datahub.metadata import schema_classes as models
13
13
  from datahub.metadata.urns import (
14
14
  SubscriptionUrn,
@@ -17,6 +17,24 @@ from datahub.metadata.urns import (
17
17
  from datahub.sdk.entity import Entity
18
18
 
19
19
 
20
+ class SubscriptionKey(DatahubKey):
21
+ """
22
+ Key class for generating stable subscription identifiers.
23
+
24
+ The main goal is to have stable IDs that are deterministic based on the
25
+ entity and actor URNs, which helps prevent duplicate subscriptions during
26
+ eventual consistency scenarios when multiple subscription requests happen
27
+ in quick succession.
28
+
29
+ This implementation matches the behavior expected in the backend when a
30
+ subscription is created, ensuring consistent ID generation between the
31
+ Python SDK and Java backend services.
32
+ """
33
+
34
+ entity_urn: str
35
+ actor_urn: str
36
+
37
+
20
38
  class Subscription(Entity):
21
39
  """
22
40
  Subscription entity class.
@@ -37,8 +55,8 @@ class Subscription(Entity):
37
55
  self,
38
56
  # SubscriptionInfo
39
57
  info: models.SubscriptionInfoClass,
40
- # Identity; it is automatically generated if not provided
41
- id: Union[str, SubscriptionUrn, None] = None,
58
+ # Identity
59
+ id: Union[str, SubscriptionUrn],
42
60
  ):
43
61
  """
44
62
  Initialize the Subscription entity.
@@ -61,13 +79,11 @@ class Subscription(Entity):
61
79
  return self._urn
62
80
 
63
81
  @classmethod
64
- def _ensure_id(cls, id: Union[str, SubscriptionUrn, None]) -> SubscriptionUrn:
82
+ def _ensure_id(cls, id: Union[str, SubscriptionUrn]) -> SubscriptionUrn:
65
83
  if isinstance(id, str):
66
84
  return SubscriptionUrn.from_string(id)
67
85
  elif isinstance(id, SubscriptionUrn):
68
86
  return id
69
- elif id is None:
70
- return SubscriptionUrn.from_string(f"urn:li:subscription:{uuid.uuid4()}")
71
87
  else:
72
88
  assert_never(id)
73
89
 
@@ -6,7 +6,7 @@ from typing_extensions import TypeAlias
6
6
 
7
7
  import datahub.metadata.schema_classes as models
8
8
  from acryl_datahub_cloud.sdk.entities.assertion import Assertion
9
- from acryl_datahub_cloud.sdk.entities.subscription import Subscription
9
+ from acryl_datahub_cloud.sdk.entities.subscription import Subscription, SubscriptionKey
10
10
  from datahub.emitter.enum_helpers import get_enum_options
11
11
  from datahub.emitter.mce_builder import make_ts_millis
12
12
  from datahub.emitter.rest_emitter import EmitMode
@@ -120,8 +120,14 @@ class SubscriptionClient:
120
120
  skip_cache=True,
121
121
  )
122
122
  if not existing_subscriptions:
123
- # new subscription
123
+ # new subscription - use stable ID generation
124
+ subscription_key = SubscriptionKey(
125
+ entity_urn=dataset_urn.urn(),
126
+ actor_urn=parsed_subscriber_urn.urn(),
127
+ )
128
+
124
129
  subscription = Subscription(
130
+ id=f"urn:li:subscription:{subscription_key.guid()}",
125
131
  info=models.SubscriptionInfoClass(
126
132
  entityUrn=dataset_urn.urn(),
127
133
  actorUrn=parsed_subscriber_urn.urn(),