acryl-datahub 0.15.0.6rc2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (205) hide show
  1. {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/METADATA +2522 -2493
  2. {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/RECORD +205 -192
  3. {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/entry_points.txt +1 -0
  5. datahub/_version.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +4 -3
  7. datahub/api/entities/dataset/dataset.py +731 -42
  8. datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
  9. datahub/cli/check_cli.py +72 -19
  10. datahub/cli/docker_cli.py +3 -3
  11. datahub/cli/iceberg_cli.py +31 -7
  12. datahub/cli/ingest_cli.py +30 -93
  13. datahub/cli/lite_cli.py +4 -2
  14. datahub/cli/specific/dataproduct_cli.py +1 -1
  15. datahub/cli/specific/dataset_cli.py +128 -14
  16. datahub/configuration/common.py +10 -2
  17. datahub/configuration/git.py +1 -3
  18. datahub/configuration/kafka.py +1 -1
  19. datahub/emitter/mce_builder.py +28 -13
  20. datahub/emitter/mcp_builder.py +4 -1
  21. datahub/emitter/response_helper.py +145 -0
  22. datahub/emitter/rest_emitter.py +323 -10
  23. datahub/ingestion/api/decorators.py +1 -1
  24. datahub/ingestion/api/source_helpers.py +4 -0
  25. datahub/ingestion/fs/s3_fs.py +2 -2
  26. datahub/ingestion/glossary/classification_mixin.py +1 -5
  27. datahub/ingestion/graph/client.py +41 -22
  28. datahub/ingestion/graph/entity_versioning.py +3 -3
  29. datahub/ingestion/graph/filters.py +64 -37
  30. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -6
  31. datahub/ingestion/run/pipeline.py +112 -148
  32. datahub/ingestion/run/sink_callback.py +77 -0
  33. datahub/ingestion/sink/datahub_rest.py +8 -0
  34. datahub/ingestion/source/abs/config.py +2 -4
  35. datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
  36. datahub/ingestion/source/bigquery_v2/bigquery_config.py +2 -46
  37. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +6 -1
  38. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +7 -4
  39. datahub/ingestion/source/cassandra/cassandra.py +152 -233
  40. datahub/ingestion/source/cassandra/cassandra_api.py +13 -5
  41. datahub/ingestion/source/common/gcp_credentials_config.py +53 -0
  42. datahub/ingestion/source/common/subtypes.py +12 -0
  43. datahub/ingestion/source/csv_enricher.py +3 -3
  44. datahub/ingestion/source/data_lake_common/path_spec.py +1 -3
  45. datahub/ingestion/source/dbt/dbt_common.py +8 -5
  46. datahub/ingestion/source/dbt/dbt_core.py +11 -9
  47. datahub/ingestion/source/dbt/dbt_tests.py +4 -8
  48. datahub/ingestion/source/delta_lake/config.py +8 -1
  49. datahub/ingestion/source/delta_lake/report.py +4 -2
  50. datahub/ingestion/source/delta_lake/source.py +20 -5
  51. datahub/ingestion/source/dremio/dremio_api.py +4 -8
  52. datahub/ingestion/source/dremio/dremio_aspects.py +3 -5
  53. datahub/ingestion/source/dynamodb/dynamodb.py +6 -0
  54. datahub/ingestion/source/elastic_search.py +26 -6
  55. datahub/ingestion/source/feast.py +27 -8
  56. datahub/ingestion/source/file.py +6 -3
  57. datahub/ingestion/source/gc/dataprocess_cleanup.py +1 -1
  58. datahub/ingestion/source/gc/execution_request_cleanup.py +2 -1
  59. datahub/ingestion/source/ge_data_profiler.py +12 -15
  60. datahub/ingestion/source/iceberg/iceberg.py +46 -12
  61. datahub/ingestion/source/iceberg/iceberg_common.py +71 -21
  62. datahub/ingestion/source/identity/okta.py +37 -7
  63. datahub/ingestion/source/kafka/kafka.py +1 -1
  64. datahub/ingestion/source/kafka_connect/common.py +2 -7
  65. datahub/ingestion/source/kafka_connect/kafka_connect.py +97 -4
  66. datahub/ingestion/source/kafka_connect/sink_connectors.py +2 -2
  67. datahub/ingestion/source/kafka_connect/source_connectors.py +6 -9
  68. datahub/ingestion/source/looker/looker_common.py +6 -5
  69. datahub/ingestion/source/looker/looker_file_loader.py +2 -2
  70. datahub/ingestion/source/looker/looker_lib_wrapper.py +2 -1
  71. datahub/ingestion/source/looker/looker_source.py +1 -1
  72. datahub/ingestion/source/looker/looker_template_language.py +4 -2
  73. datahub/ingestion/source/looker/lookml_source.py +3 -2
  74. datahub/ingestion/source/metabase.py +57 -35
  75. datahub/ingestion/source/metadata/business_glossary.py +45 -3
  76. datahub/ingestion/source/metadata/lineage.py +2 -2
  77. datahub/ingestion/source/mlflow.py +365 -35
  78. datahub/ingestion/source/mode.py +18 -8
  79. datahub/ingestion/source/neo4j/neo4j_source.py +27 -7
  80. datahub/ingestion/source/nifi.py +37 -11
  81. datahub/ingestion/source/openapi.py +1 -1
  82. datahub/ingestion/source/openapi_parser.py +49 -17
  83. datahub/ingestion/source/powerbi/m_query/parser.py +3 -2
  84. datahub/ingestion/source/powerbi/m_query/tree_function.py +2 -1
  85. datahub/ingestion/source/powerbi/powerbi.py +1 -3
  86. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -1
  87. datahub/ingestion/source/powerbi_report_server/report_server.py +26 -7
  88. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +1 -1
  89. datahub/ingestion/source/preset.py +7 -4
  90. datahub/ingestion/source/pulsar.py +3 -2
  91. datahub/ingestion/source/qlik_sense/websocket_connection.py +4 -2
  92. datahub/ingestion/source/redash.py +31 -7
  93. datahub/ingestion/source/redshift/config.py +4 -0
  94. datahub/ingestion/source/redshift/datashares.py +236 -0
  95. datahub/ingestion/source/redshift/lineage.py +6 -2
  96. datahub/ingestion/source/redshift/lineage_v2.py +24 -9
  97. datahub/ingestion/source/redshift/profile.py +1 -1
  98. datahub/ingestion/source/redshift/query.py +133 -33
  99. datahub/ingestion/source/redshift/redshift.py +46 -73
  100. datahub/ingestion/source/redshift/redshift_schema.py +186 -6
  101. datahub/ingestion/source/redshift/report.py +3 -0
  102. datahub/ingestion/source/s3/config.py +5 -5
  103. datahub/ingestion/source/s3/source.py +20 -41
  104. datahub/ingestion/source/salesforce.py +550 -275
  105. datahub/ingestion/source/schema_inference/object.py +1 -1
  106. datahub/ingestion/source/sigma/sigma.py +1 -1
  107. datahub/ingestion/source/slack/slack.py +31 -10
  108. datahub/ingestion/source/snowflake/snowflake_connection.py +2 -2
  109. datahub/ingestion/source/snowflake/snowflake_queries.py +19 -13
  110. datahub/ingestion/source/snowflake/snowflake_query.py +6 -4
  111. datahub/ingestion/source/snowflake/snowflake_schema.py +3 -4
  112. datahub/ingestion/source/snowflake/snowflake_v2.py +1 -1
  113. datahub/ingestion/source/sql/athena.py +10 -16
  114. datahub/ingestion/source/sql/druid.py +1 -5
  115. datahub/ingestion/source/sql/hive.py +15 -6
  116. datahub/ingestion/source/sql/hive_metastore.py +3 -2
  117. datahub/ingestion/source/sql/mssql/job_models.py +29 -0
  118. datahub/ingestion/source/sql/mssql/source.py +11 -5
  119. datahub/ingestion/source/sql/oracle.py +127 -63
  120. datahub/ingestion/source/sql/sql_common.py +16 -18
  121. datahub/ingestion/source/sql/sql_types.py +2 -2
  122. datahub/ingestion/source/sql/teradata.py +19 -5
  123. datahub/ingestion/source/sql/trino.py +2 -2
  124. datahub/ingestion/source/state/stale_entity_removal_handler.py +4 -8
  125. datahub/ingestion/source/superset.py +222 -62
  126. datahub/ingestion/source/tableau/tableau.py +22 -6
  127. datahub/ingestion/source/tableau/tableau_common.py +3 -2
  128. datahub/ingestion/source/unity/ge_profiler.py +2 -1
  129. datahub/ingestion/source/unity/source.py +11 -1
  130. datahub/ingestion/source/vertexai.py +697 -0
  131. datahub/ingestion/source_config/pulsar.py +3 -1
  132. datahub/ingestion/transformer/pattern_cleanup_ownership.py +25 -7
  133. datahub/lite/duckdb_lite.py +3 -10
  134. datahub/lite/lite_local.py +1 -1
  135. datahub/lite/lite_util.py +4 -3
  136. datahub/metadata/_schema_classes.py +714 -417
  137. datahub/metadata/_urns/urn_defs.py +1673 -1649
  138. datahub/metadata/com/linkedin/pegasus2avro/incident/__init__.py +4 -0
  139. datahub/metadata/schema.avsc +16438 -16603
  140. datahub/metadata/schemas/AssertionInfo.avsc +3 -1
  141. datahub/metadata/schemas/BusinessAttributeInfo.avsc +6 -2
  142. datahub/metadata/schemas/BusinessAttributes.avsc +6 -0
  143. datahub/metadata/schemas/ChartInfo.avsc +1 -0
  144. datahub/metadata/schemas/CorpGroupKey.avsc +2 -1
  145. datahub/metadata/schemas/CorpUserInfo.avsc +13 -0
  146. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  147. datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc +8 -3
  148. datahub/metadata/schemas/DataProcessInstanceInput.avsc +129 -1
  149. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +131 -3
  150. datahub/metadata/schemas/DataProcessKey.avsc +2 -1
  151. datahub/metadata/schemas/DataProductKey.avsc +2 -1
  152. datahub/metadata/schemas/DomainKey.avsc +2 -1
  153. datahub/metadata/schemas/EditableSchemaMetadata.avsc +6 -2
  154. datahub/metadata/schemas/GlossaryNodeKey.avsc +3 -1
  155. datahub/metadata/schemas/GlossaryTermKey.avsc +2 -1
  156. datahub/metadata/schemas/GlossaryTerms.avsc +3 -1
  157. datahub/metadata/schemas/IncidentInfo.avsc +130 -46
  158. datahub/metadata/schemas/InputFields.avsc +3 -1
  159. datahub/metadata/schemas/MLFeatureKey.avsc +2 -1
  160. datahub/metadata/schemas/MLFeatureTableKey.avsc +2 -1
  161. datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -1
  162. datahub/metadata/schemas/MLModelGroupKey.avsc +3 -1
  163. datahub/metadata/schemas/MLModelKey.avsc +3 -1
  164. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +2 -1
  165. datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -2
  166. datahub/metadata/schemas/PostKey.avsc +2 -1
  167. datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
  168. datahub/metadata/schemas/SchemaMetadata.avsc +3 -1
  169. datahub/metadata/schemas/StructuredPropertyDefinition.avsc +14 -0
  170. datahub/metadata/schemas/VersionProperties.avsc +18 -0
  171. datahub/metadata/schemas/VersionSetProperties.avsc +5 -0
  172. datahub/pydantic/__init__.py +0 -0
  173. datahub/pydantic/compat.py +58 -0
  174. datahub/sdk/__init__.py +30 -12
  175. datahub/sdk/_all_entities.py +1 -1
  176. datahub/sdk/_attribution.py +4 -0
  177. datahub/sdk/_shared.py +258 -16
  178. datahub/sdk/_utils.py +35 -0
  179. datahub/sdk/container.py +30 -6
  180. datahub/sdk/dataset.py +118 -20
  181. datahub/sdk/{_entity.py → entity.py} +24 -1
  182. datahub/sdk/entity_client.py +1 -1
  183. datahub/sdk/main_client.py +23 -0
  184. datahub/sdk/resolver_client.py +17 -29
  185. datahub/sdk/search_client.py +50 -0
  186. datahub/sdk/search_filters.py +374 -0
  187. datahub/specific/dataset.py +3 -4
  188. datahub/sql_parsing/_sqlglot_patch.py +2 -10
  189. datahub/sql_parsing/schema_resolver.py +1 -1
  190. datahub/sql_parsing/split_statements.py +220 -126
  191. datahub/sql_parsing/sql_parsing_common.py +7 -0
  192. datahub/sql_parsing/sqlglot_lineage.py +1 -1
  193. datahub/sql_parsing/sqlglot_utils.py +1 -4
  194. datahub/testing/check_sql_parser_result.py +5 -6
  195. datahub/testing/compare_metadata_json.py +7 -6
  196. datahub/testing/pytest_hooks.py +56 -0
  197. datahub/upgrade/upgrade.py +2 -2
  198. datahub/utilities/file_backed_collections.py +3 -14
  199. datahub/utilities/ingest_utils.py +106 -0
  200. datahub/utilities/mapping.py +1 -1
  201. datahub/utilities/memory_footprint.py +3 -2
  202. datahub/utilities/sentinels.py +22 -0
  203. datahub/utilities/unified_diff.py +5 -1
  204. {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/LICENSE +0 -0
  205. {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/top_level.txt +0 -0
datahub/sdk/_shared.py CHANGED
@@ -1,14 +1,18 @@
1
+ from __future__ import annotations
2
+
1
3
  import warnings
2
4
  from datetime import datetime
3
5
  from typing import (
4
6
  TYPE_CHECKING,
7
+ Callable,
5
8
  List,
6
9
  Optional,
10
+ Sequence,
7
11
  Tuple,
8
12
  Union,
9
13
  )
10
14
 
11
- from typing_extensions import TypeAlias
15
+ from typing_extensions import TypeAlias, assert_never
12
16
 
13
17
  import datahub.metadata.schema_classes as models
14
18
  from datahub.emitter.mce_builder import (
@@ -20,6 +24,7 @@ from datahub.emitter.mce_builder import (
20
24
  from datahub.emitter.mcp_builder import ContainerKey
21
25
  from datahub.errors import MultipleSubtypesWarning, SdkUsageError
22
26
  from datahub.metadata.urns import (
27
+ ContainerUrn,
23
28
  CorpGroupUrn,
24
29
  CorpUserUrn,
25
30
  DataJobUrn,
@@ -32,7 +37,8 @@ from datahub.metadata.urns import (
32
37
  TagUrn,
33
38
  Urn,
34
39
  )
35
- from datahub.sdk._entity import Entity
40
+ from datahub.sdk._utils import add_list_unique, remove_list_unique
41
+ from datahub.sdk.entity import Entity
36
42
  from datahub.utilities.urns.error import InvalidUrnError
37
43
 
38
44
  if TYPE_CHECKING:
@@ -44,6 +50,8 @@ DatajobUrnOrStr: TypeAlias = Union[str, DataJobUrn]
44
50
 
45
51
  ActorUrn: TypeAlias = Union[CorpUserUrn, CorpGroupUrn]
46
52
 
53
+ _DEFAULT_ACTOR_URN = CorpUserUrn("__ingestion").urn()
54
+
47
55
 
48
56
  def make_time_stamp(ts: Optional[datetime]) -> Optional[models.TimeStampClass]:
49
57
  if ts is None:
@@ -83,6 +91,13 @@ class HasPlatformInstance(Entity):
83
91
  )
84
92
  )
85
93
 
94
+ @property
95
+ def platform(self) -> Optional[DataPlatformUrn]:
96
+ dataPlatform = self._get_aspect(models.DataPlatformInstanceClass)
97
+ if dataPlatform and dataPlatform.platform:
98
+ return DataPlatformUrn.from_string(dataPlatform.platform)
99
+ return None
100
+
86
101
  @property
87
102
  def platform_instance(self) -> Optional[DataPlatformInstanceUrn]:
88
103
  dataPlatformInstance = self._get_aspect(models.DataPlatformInstanceClass)
@@ -112,11 +127,11 @@ class HasSubtype(Entity):
112
127
  self._set_aspect(models.SubTypesClass(typeNames=[subtype]))
113
128
 
114
129
 
130
+ # TODO: Reference OwnershipTypeClass as the valid ownership type enum.
115
131
  OwnershipTypeType: TypeAlias = Union[str, OwnershipTypeUrn]
116
132
  OwnerInputType: TypeAlias = Union[
117
- str,
118
133
  ActorUrn,
119
- Tuple[Union[str, ActorUrn], OwnershipTypeType],
134
+ Tuple[ActorUrn, OwnershipTypeType],
120
135
  models.OwnerClass,
121
136
  ]
122
137
  OwnersInputType: TypeAlias = List[OwnerInputType]
@@ -126,15 +141,17 @@ class HasOwnership(Entity):
126
141
  __slots__ = ()
127
142
 
128
143
  @staticmethod
129
- def _parse_owner_class(owner: OwnerInputType) -> models.OwnerClass:
144
+ def _parse_owner_class(owner: OwnerInputType) -> Tuple[models.OwnerClass, bool]:
130
145
  if isinstance(owner, models.OwnerClass):
131
- return owner
146
+ return owner, False
132
147
 
148
+ was_type_specified = False
133
149
  owner_type = models.OwnershipTypeClass.TECHNICAL_OWNER
134
150
  owner_type_urn = None
135
151
 
136
152
  if isinstance(owner, tuple):
137
153
  raw_owner, raw_owner_type = owner
154
+ was_type_specified = True
138
155
 
139
156
  if isinstance(raw_owner_type, OwnershipTypeUrn):
140
157
  owner_type = models.OwnershipTypeClass.CUSTOM
@@ -151,17 +168,15 @@ class HasOwnership(Entity):
151
168
  owner=make_user_urn(raw_owner),
152
169
  type=owner_type,
153
170
  typeUrn=owner_type_urn,
154
- )
171
+ ), was_type_specified
155
172
  elif isinstance(raw_owner, Urn):
156
173
  return models.OwnerClass(
157
174
  owner=str(raw_owner),
158
175
  type=owner_type,
159
176
  typeUrn=owner_type_urn,
160
- )
177
+ ), was_type_specified
161
178
  else:
162
- raise SdkUsageError(
163
- f"Invalid owner {owner}: {type(owner)} is not a valid owner type"
164
- )
179
+ assert_never(raw_owner)
165
180
 
166
181
  # TODO: Return a custom type with deserialized urns, instead of the raw aspect.
167
182
  # Ideally we'd also use first-class ownership type urns here, not strings.
@@ -173,21 +188,74 @@ class HasOwnership(Entity):
173
188
 
174
189
  def set_owners(self, owners: OwnersInputType) -> None:
175
190
  # TODO: add docs on the default parsing + default ownership type
176
- parsed_owners = [self._parse_owner_class(owner) for owner in owners]
191
+ parsed_owners = [self._parse_owner_class(owner)[0] for owner in owners]
177
192
  self._set_aspect(models.OwnershipClass(owners=parsed_owners))
178
193
 
194
+ @classmethod
195
+ def _owner_key_method(
196
+ cls, consider_owner_type: bool
197
+ ) -> Callable[[models.OwnerClass], Tuple[str, ...]]:
198
+ if consider_owner_type:
199
+ return cls._typed_owner_key
200
+ else:
201
+ return cls._simple_owner_key
179
202
 
180
- ContainerInputType: TypeAlias = Union["Container", ContainerKey]
203
+ @classmethod
204
+ def _typed_owner_key(cls, owner: models.OwnerClass) -> Tuple[str, str]:
205
+ return (owner.owner, owner.typeUrn or str(owner.type))
206
+
207
+ @classmethod
208
+ def _simple_owner_key(cls, owner: models.OwnerClass) -> Tuple[str,]:
209
+ return (owner.owner,)
210
+
211
+ def _ensure_owners(self) -> List[models.OwnerClass]:
212
+ owners = self._setdefault_aspect(models.OwnershipClass(owners=[])).owners
213
+ return owners
214
+
215
+ def add_owner(self, owner: OwnerInputType) -> None:
216
+ # Tricky: when adding an owner, we always use the ownership type.
217
+ # For removals, we only use it if it was explicitly specified.
218
+ parsed_owner, _ = self._parse_owner_class(owner)
219
+ add_list_unique(
220
+ self._ensure_owners(),
221
+ key=self._typed_owner_key,
222
+ item=parsed_owner,
223
+ )
224
+
225
+ def remove_owner(self, owner: OwnerInputType) -> None:
226
+ parsed_owner, was_type_specified = self._parse_owner_class(owner)
227
+ remove_list_unique(
228
+ self._ensure_owners(),
229
+ key=self._owner_key_method(was_type_specified),
230
+ item=parsed_owner,
231
+ )
232
+
233
+
234
+ # If you pass in a container object, we can build on top of its browse path.
235
+ # If you pass in a ContainerKey, we can use parent_key() to build the browse path.
236
+ # If you pass in a list of urns, we'll use that as the browse path. Any non-urn strings
237
+ # will be treated as raw ids.
238
+ ParentContainerInputType: TypeAlias = Union["Container", ContainerKey, List[UrnOrStr]]
181
239
 
182
240
 
183
241
  class HasContainer(Entity):
184
242
  __slots__ = ()
185
243
 
186
- def _set_container(self, container: Optional[ContainerInputType]) -> None:
244
+ @staticmethod
245
+ def _maybe_parse_as_urn(urn: UrnOrStr) -> UrnOrStr:
246
+ if isinstance(urn, Urn):
247
+ return urn
248
+ elif urn.startswith("urn:li:"):
249
+ return Urn.from_string(urn)
250
+ else:
251
+ return urn
252
+
253
+ def _set_container(self, container: Optional[ParentContainerInputType]) -> None:
187
254
  # We need to allow container to be None. It won't happen for datasets much, but
188
255
  # will be required for root containers.
189
256
  from datahub.sdk.container import Container
190
257
 
258
+ container_urn: Optional[str]
191
259
  browse_path: List[Union[str, models.BrowsePathEntryClass]] = []
192
260
  if isinstance(container, Container):
193
261
  container_urn = container.urn.urn()
@@ -204,6 +272,29 @@ class HasContainer(Entity):
204
272
  urn=container_urn,
205
273
  ),
206
274
  ]
275
+ elif isinstance(container, list):
276
+ parsed_path = [self._maybe_parse_as_urn(entry) for entry in container]
277
+
278
+ # Use the last container in the path as the container urn.
279
+ container_urns = [
280
+ urn.urn() for urn in parsed_path if isinstance(urn, ContainerUrn)
281
+ ]
282
+ container_urn = container_urns[-1] if container_urns else None
283
+
284
+ browse_path = [
285
+ (
286
+ models.BrowsePathEntryClass(
287
+ id=str(entry),
288
+ urn=str(entry),
289
+ )
290
+ if isinstance(entry, Urn)
291
+ else models.BrowsePathEntryClass(
292
+ id=entry,
293
+ urn=None,
294
+ )
295
+ )
296
+ for entry in parsed_path
297
+ ]
207
298
  elif container is not None:
208
299
  container_urn = container.as_urn()
209
300
 
@@ -212,6 +303,13 @@ class HasContainer(Entity):
212
303
  while parent_key is not None:
213
304
  browse_path_reversed.append(parent_key.as_urn())
214
305
  parent_key = parent_key.parent_key()
306
+ if container.instance is not None:
307
+ browse_path_reversed.append(
308
+ DataPlatformInstanceUrn(
309
+ container.platform, container.instance
310
+ ).urn()
311
+ )
312
+
215
313
  browse_path = list(reversed(browse_path_reversed))
216
314
  else:
217
315
  container_urn = None
@@ -236,6 +334,24 @@ class HasContainer(Entity):
236
334
  )
237
335
  )
238
336
 
337
+ @property
338
+ def parent_container(self) -> Optional[ContainerUrn]:
339
+ if container := self._get_aspect(models.ContainerClass):
340
+ return ContainerUrn.from_string(container.container)
341
+ return None
342
+
343
+ @property
344
+ def browse_path(self) -> Optional[List[UrnOrStr]]:
345
+ if browse_path := self._get_aspect(models.BrowsePathsV2Class):
346
+ path: List[UrnOrStr] = []
347
+ for entry in browse_path.path:
348
+ if entry.urn:
349
+ path.append(Urn.from_string(entry.urn))
350
+ else:
351
+ path.append(entry.id)
352
+ return path
353
+ return None
354
+
239
355
 
240
356
  TagInputType: TypeAlias = Union[str, TagUrn, models.TagAssociationClass]
241
357
  TagsInputType: TypeAlias = List[TagInputType]
@@ -244,6 +360,9 @@ TagsInputType: TypeAlias = List[TagInputType]
244
360
  class HasTags(Entity):
245
361
  __slots__ = ()
246
362
 
363
+ def _ensure_tags(self) -> List[models.TagAssociationClass]:
364
+ return self._setdefault_aspect(models.GlobalTagsClass(tags=[])).tags
365
+
247
366
  # TODO: Return a custom type with deserialized urns, instead of the raw aspect.
248
367
  @property
249
368
  def tags(self) -> Optional[List[models.TagAssociationClass]]:
@@ -268,6 +387,24 @@ class HasTags(Entity):
268
387
  )
269
388
  )
270
389
 
390
+ @classmethod
391
+ def _tag_key(cls, tag: models.TagAssociationClass) -> str:
392
+ return tag.tag
393
+
394
+ def add_tag(self, tag: TagInputType) -> None:
395
+ add_list_unique(
396
+ self._ensure_tags(),
397
+ self._tag_key,
398
+ self._parse_tag_association_class(tag),
399
+ )
400
+
401
+ def remove_tag(self, tag: TagInputType) -> None:
402
+ remove_list_unique(
403
+ self._ensure_tags(),
404
+ self._tag_key,
405
+ self._parse_tag_association_class(tag),
406
+ )
407
+
271
408
 
272
409
  TermInputType: TypeAlias = Union[
273
410
  str, GlossaryTermUrn, models.GlossaryTermAssociationClass
@@ -278,6 +415,11 @@ TermsInputType: TypeAlias = List[TermInputType]
278
415
  class HasTerms(Entity):
279
416
  __slots__ = ()
280
417
 
418
+ def _ensure_terms(self) -> List[models.GlossaryTermAssociationClass]:
419
+ return self._setdefault_aspect(
420
+ models.GlossaryTermsClass(terms=[], auditStamp=self._terms_audit_stamp())
421
+ ).terms
422
+
281
423
  # TODO: Return a custom type with deserialized urns, instead of the raw aspect.
282
424
  @property
283
425
  def terms(self) -> Optional[List[models.GlossaryTermAssociationClass]]:
@@ -299,8 +441,7 @@ class HasTerms(Entity):
299
441
  def _terms_audit_stamp(self) -> models.AuditStampClass:
300
442
  return models.AuditStampClass(
301
443
  time=0,
302
- # TODO figure out what to put here
303
- actor=CorpUserUrn("__ingestion").urn(),
444
+ actor=_DEFAULT_ACTOR_URN,
304
445
  )
305
446
 
306
447
  def set_terms(self, terms: TermsInputType) -> None:
@@ -313,6 +454,24 @@ class HasTerms(Entity):
313
454
  )
314
455
  )
315
456
 
457
+ @classmethod
458
+ def _terms_key(self, term: models.GlossaryTermAssociationClass) -> str:
459
+ return term.urn
460
+
461
+ def add_term(self, term: TermInputType) -> None:
462
+ add_list_unique(
463
+ self._ensure_terms(),
464
+ self._terms_key,
465
+ self._parse_glossary_term_association_class(term),
466
+ )
467
+
468
+ def remove_term(self, term: TermInputType) -> None:
469
+ remove_list_unique(
470
+ self._ensure_terms(),
471
+ self._terms_key,
472
+ self._parse_glossary_term_association_class(term),
473
+ )
474
+
316
475
 
317
476
  DomainInputType: TypeAlias = Union[str, DomainUrn]
318
477
 
@@ -336,3 +495,86 @@ class HasDomain(Entity):
336
495
  def set_domain(self, domain: DomainInputType) -> None:
337
496
  domain_urn = DomainUrn.from_string(domain) # basically a type assertion
338
497
  self._set_aspect(models.DomainsClass(domains=[str(domain_urn)]))
498
+
499
+
500
+ LinkInputType: TypeAlias = Union[
501
+ str,
502
+ Tuple[str, str], # url, description
503
+ models.InstitutionalMemoryMetadataClass,
504
+ ]
505
+ LinksInputType: TypeAlias = Sequence[LinkInputType]
506
+
507
+
508
+ class HasInstitutionalMemory(Entity):
509
+ __slots__ = ()
510
+
511
+ # Internally the aspect is called institutionalMemory, and so much of the code
512
+ # uses that name. However, the public-facing API is called "links", since
513
+ # that's what we call these in the UI.
514
+
515
+ def _ensure_institutional_memory(
516
+ self,
517
+ ) -> List[models.InstitutionalMemoryMetadataClass]:
518
+ return self._setdefault_aspect(
519
+ models.InstitutionalMemoryClass(elements=[])
520
+ ).elements
521
+
522
+ @property
523
+ def links(self) -> Optional[List[models.InstitutionalMemoryMetadataClass]]:
524
+ if institutional_memory := self._get_aspect(models.InstitutionalMemoryClass):
525
+ return institutional_memory.elements
526
+ return None
527
+
528
+ @classmethod
529
+ def _institutional_memory_audit_stamp(self) -> models.AuditStampClass:
530
+ return models.AuditStampClass(
531
+ time=0,
532
+ actor=_DEFAULT_ACTOR_URN,
533
+ )
534
+
535
+ @classmethod
536
+ def _parse_link_association_class(
537
+ cls, link: LinkInputType
538
+ ) -> models.InstitutionalMemoryMetadataClass:
539
+ if isinstance(link, models.InstitutionalMemoryMetadataClass):
540
+ return link
541
+ elif isinstance(link, str):
542
+ return models.InstitutionalMemoryMetadataClass(
543
+ url=link,
544
+ description=link,
545
+ createStamp=cls._institutional_memory_audit_stamp(),
546
+ )
547
+ elif isinstance(link, tuple) and len(link) == 2:
548
+ url, description = link
549
+ return models.InstitutionalMemoryMetadataClass(
550
+ url=url,
551
+ description=description,
552
+ createStamp=cls._institutional_memory_audit_stamp(),
553
+ )
554
+ else:
555
+ assert_never(link)
556
+
557
+ def set_links(self, links: LinksInputType) -> None:
558
+ self._set_aspect(
559
+ models.InstitutionalMemoryClass(
560
+ elements=[self._parse_link_association_class(link) for link in links]
561
+ )
562
+ )
563
+
564
+ @classmethod
565
+ def _link_key(self, link: models.InstitutionalMemoryMetadataClass) -> str:
566
+ return link.url
567
+
568
+ def add_link(self, link: LinkInputType) -> None:
569
+ add_list_unique(
570
+ self._ensure_institutional_memory(),
571
+ self._link_key,
572
+ self._parse_link_association_class(link),
573
+ )
574
+
575
+ def remove_link(self, link: LinkInputType) -> None:
576
+ remove_list_unique(
577
+ self._ensure_institutional_memory(),
578
+ self._link_key,
579
+ self._parse_link_association_class(link),
580
+ )
datahub/sdk/_utils.py ADDED
@@ -0,0 +1,35 @@
1
+ from typing import Any, Callable, List, Protocol, TypeVar
2
+
3
+ from datahub.errors import ItemNotFoundError
4
+
5
+
6
+ class _SupportsEq(Protocol):
7
+ def __eq__(self, other: Any) -> bool: ...
8
+
9
+
10
+ T = TypeVar("T")
11
+ K = TypeVar("K", bound=_SupportsEq)
12
+
13
+
14
+ def add_list_unique(lst: List[T], key: Callable[[T], K], item: T) -> None:
15
+ item_key = key(item)
16
+ for i, existing in enumerate(lst):
17
+ if key(existing) == item_key:
18
+ lst[i] = item
19
+ return
20
+ lst.append(item)
21
+
22
+
23
+ def remove_list_unique(
24
+ lst: List[T], key: Callable[[T], K], item: T, *, missing_ok: bool = True
25
+ ) -> None:
26
+ # Poor man's patch implementation.
27
+ item_key = key(item)
28
+ removed = False
29
+ for i, existing in enumerate(lst):
30
+ if key(existing) == item_key:
31
+ lst.pop(i)
32
+ removed = True
33
+ # Tricky: no break. In case there's already duplicates, we want to remove all of them.
34
+ if not removed and not missing_ok:
35
+ raise ItemNotFoundError(f"Cannot remove item {item} from list: not found")
datahub/sdk/container.py CHANGED
@@ -16,22 +16,26 @@ from datahub.metadata.urns import (
16
16
  ContainerUrn,
17
17
  Urn,
18
18
  )
19
- from datahub.sdk._entity import Entity
20
19
  from datahub.sdk._shared import (
21
20
  DomainInputType,
22
21
  HasContainer,
23
22
  HasDomain,
23
+ HasInstitutionalMemory,
24
24
  HasOwnership,
25
25
  HasPlatformInstance,
26
26
  HasSubtype,
27
27
  HasTags,
28
28
  HasTerms,
29
+ LinksInputType,
29
30
  OwnersInputType,
31
+ ParentContainerInputType,
30
32
  TagsInputType,
31
33
  TermsInputType,
32
34
  make_time_stamp,
33
35
  parse_time_stamp,
34
36
  )
37
+ from datahub.sdk.entity import Entity, ExtraAspectsType
38
+ from datahub.utilities.sentinels import Auto, auto
35
39
 
36
40
 
37
41
  class Container(
@@ -39,6 +43,7 @@ class Container(
39
43
  HasSubtype,
40
44
  HasContainer,
41
45
  HasOwnership,
46
+ HasInstitutionalMemory,
42
47
  HasTags,
43
48
  HasTerms,
44
49
  HasDomain,
@@ -54,7 +59,7 @@ class Container(
54
59
  self,
55
60
  /,
56
61
  # Identity.
57
- container_key: ContainerKey | ContainerUrn,
62
+ container_key: ContainerKey,
58
63
  *,
59
64
  # Container attributes.
60
65
  display_name: str,
@@ -66,17 +71,23 @@ class Container(
66
71
  created: Optional[datetime] = None,
67
72
  last_modified: Optional[datetime] = None,
68
73
  # Standard aspects.
74
+ parent_container: Auto | ParentContainerInputType | None = auto,
69
75
  subtype: Optional[str] = None,
70
76
  owners: Optional[OwnersInputType] = None,
77
+ links: Optional[LinksInputType] = None,
71
78
  tags: Optional[TagsInputType] = None,
72
79
  terms: Optional[TermsInputType] = None,
73
80
  domain: Optional[DomainInputType] = None,
81
+ extra_aspects: ExtraAspectsType = None,
74
82
  ):
83
+ # Hack: while the type annotations say container_key is always a ContainerKey,
84
+ # we allow ContainerUrn to make the graph-based constructor work.
75
85
  if isinstance(container_key, ContainerUrn):
76
86
  urn = container_key
77
87
  else:
78
88
  urn = ContainerUrn.from_string(container_key.as_urn())
79
89
  super().__init__(urn)
90
+ self._set_extra_aspects(extra_aspects)
80
91
 
81
92
  # This needs to come first to ensure that the display name is registered.
82
93
  self._ensure_container_props(name=display_name)
@@ -85,8 +96,6 @@ class Container(
85
96
  if isinstance(container_key, ContainerKey):
86
97
  self._set_platform_instance(container_key.platform, container_key.instance)
87
98
 
88
- self._set_container(container_key.parent_key())
89
-
90
99
  self.set_custom_properties(
91
100
  {
92
101
  **container_key.property_dict(),
@@ -100,6 +109,18 @@ class Container(
100
109
  env = container_key.env if container_key.env in ALL_ENV_TYPES else None
101
110
  if _INCLUDE_ENV_IN_CONTAINER_PROPERTIES and env is not None:
102
111
  self._ensure_container_props().env = env
112
+ else:
113
+ self.set_custom_properties(extra_properties or {})
114
+
115
+ if parent_container is auto:
116
+ if not isinstance(container_key, ContainerKey):
117
+ raise SdkUsageError(
118
+ "Either a container_key or parent_container must be provided"
119
+ )
120
+
121
+ self._set_container(container_key.parent_key())
122
+ else:
123
+ self._set_container(parent_container)
103
124
 
104
125
  if description is not None:
105
126
  self.set_description(description)
@@ -116,6 +137,8 @@ class Container(
116
137
  self.set_subtype(subtype)
117
138
  if owners is not None:
118
139
  self.set_owners(owners)
140
+ if links is not None:
141
+ self.set_links(links)
119
142
  if tags is not None:
120
143
  self.set_tags(tags)
121
144
  if terms is not None:
@@ -126,7 +149,8 @@ class Container(
126
149
  @classmethod
127
150
  def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
128
151
  assert isinstance(urn, ContainerUrn)
129
- entity = cls(urn, display_name="__dummy_value__")
152
+
153
+ entity = cls(urn, display_name="__dummy_value__", parent_container=None) # type: ignore[arg-type]
130
154
  return entity._init_from_graph(current_aspects)
131
155
 
132
156
  def _ensure_container_props(
@@ -147,7 +171,7 @@ class Container(
147
171
  return self._ensure_container_props().name
148
172
 
149
173
  def set_display_name(self, value: str) -> None:
150
- self._ensure_container_props().name = value
174
+ self._ensure_container_props(name=value).name = value
151
175
 
152
176
  @property
153
177
  def description(self) -> Optional[str]: