acryl-datahub 1.0.0rc3__py3-none-any.whl → 1.0.0rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (30) hide show
  1. {acryl_datahub-1.0.0rc3.dist-info → acryl_datahub-1.0.0rc5.dist-info}/METADATA +2377 -2377
  2. {acryl_datahub-1.0.0rc3.dist-info → acryl_datahub-1.0.0rc5.dist-info}/RECORD +30 -27
  3. datahub/_version.py +1 -1
  4. datahub/cli/ingest_cli.py +27 -92
  5. datahub/emitter/mcp_builder.py +4 -1
  6. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +5 -0
  7. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +7 -4
  8. datahub/ingestion/source/openapi_parser.py +46 -14
  9. datahub/ingestion/source/unity/source.py +11 -1
  10. datahub/metadata/_schema_classes.py +17 -0
  11. datahub/metadata/schema.avsc +21 -3
  12. datahub/metadata/schemas/CorpUserInfo.avsc +13 -0
  13. datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc +8 -3
  14. datahub/metadata/schemas/MetadataChangeEvent.avsc +13 -0
  15. datahub/sdk/_attribution.py +4 -0
  16. datahub/sdk/_entity.py +2 -0
  17. datahub/sdk/_shared.py +163 -13
  18. datahub/sdk/_utils.py +35 -0
  19. datahub/sdk/container.py +20 -4
  20. datahub/sdk/dataset.py +104 -14
  21. datahub/sdk/main_client.py +17 -0
  22. datahub/specific/dataset.py +3 -4
  23. datahub/sql_parsing/split_statements.py +20 -13
  24. datahub/utilities/file_backed_collections.py +3 -14
  25. datahub/utilities/ingest_utils.py +106 -0
  26. datahub/utilities/sentinels.py +22 -0
  27. {acryl_datahub-1.0.0rc3.dist-info → acryl_datahub-1.0.0rc5.dist-info}/LICENSE +0 -0
  28. {acryl_datahub-1.0.0rc3.dist-info → acryl_datahub-1.0.0rc5.dist-info}/WHEEL +0 -0
  29. {acryl_datahub-1.0.0rc3.dist-info → acryl_datahub-1.0.0rc5.dist-info}/entry_points.txt +0 -0
  30. {acryl_datahub-1.0.0rc3.dist-info → acryl_datahub-1.0.0rc5.dist-info}/top_level.txt +0 -0
@@ -16,7 +16,8 @@
16
16
  },
17
17
  {
18
18
  "Searchable": {
19
- "fieldType": "TEXT_PARTIAL"
19
+ "fieldType": "KEYWORD",
20
+ "queryByDefault": false
20
21
  },
21
22
  "type": "string",
22
23
  "name": "type",
@@ -83,7 +84,9 @@
83
84
  },
84
85
  {
85
86
  "Searchable": {
86
- "fieldName": "sourceExecutorId"
87
+ "fieldName": "sourceExecutorId",
88
+ "fieldType": "KEYWORD",
89
+ "queryByDefault": false
87
90
  },
88
91
  "type": [
89
92
  "null",
@@ -129,7 +132,9 @@
129
132
  "fields": [
130
133
  {
131
134
  "Searchable": {
132
- "fieldName": "sourceType"
135
+ "fieldName": "sourceType",
136
+ "fieldType": "KEYWORD",
137
+ "queryByDefault": false
133
138
  },
134
139
  "type": {
135
140
  "type": "enum",
@@ -1619,6 +1619,19 @@
1619
1619
  "name": "countryCode",
1620
1620
  "default": null,
1621
1621
  "doc": "two uppercase letters country code. e.g. US"
1622
+ },
1623
+ {
1624
+ "Searchable": {
1625
+ "fieldType": "BOOLEAN",
1626
+ "queryByDefault": false
1627
+ },
1628
+ "type": [
1629
+ "boolean",
1630
+ "null"
1631
+ ],
1632
+ "name": "system",
1633
+ "default": false,
1634
+ "doc": "Whether the corpUser is a system user."
1622
1635
  }
1623
1636
  ],
1624
1637
  "doc": "Linkedin corp user information"
@@ -5,6 +5,10 @@ from typing import Iterator
5
5
 
6
6
  from datahub.utilities.str_enum import StrEnum
7
7
 
8
+ # TODO: This attribution setup is not the final form. I expect that once we have better
9
+ # backend support for attribution and attribution-oriented patch, this will become a bit
10
+ # more sophisticated.
11
+
8
12
 
9
13
  class KnownAttribution(StrEnum):
10
14
  INGESTION = "INGESTION"
datahub/sdk/_entity.py CHANGED
@@ -36,6 +36,8 @@ class Entity:
36
36
 
37
37
  def _init_from_graph(self, current_aspects: models.AspectBag) -> Self:
38
38
  self._prev_aspects = current_aspects
39
+
40
+ self._aspects = {}
39
41
  aspect: models._Aspect
40
42
  for aspect_name, aspect in (current_aspects or {}).items(): # type: ignore
41
43
  aspect_copy = type(aspect).from_obj(aspect.to_obj())
datahub/sdk/_shared.py CHANGED
@@ -1,14 +1,17 @@
1
+ from __future__ import annotations
2
+
1
3
  import warnings
2
4
  from datetime import datetime
3
5
  from typing import (
4
6
  TYPE_CHECKING,
7
+ Callable,
5
8
  List,
6
9
  Optional,
7
10
  Tuple,
8
11
  Union,
9
12
  )
10
13
 
11
- from typing_extensions import TypeAlias
14
+ from typing_extensions import TypeAlias, assert_never
12
15
 
13
16
  import datahub.metadata.schema_classes as models
14
17
  from datahub.emitter.mce_builder import (
@@ -20,6 +23,7 @@ from datahub.emitter.mce_builder import (
20
23
  from datahub.emitter.mcp_builder import ContainerKey
21
24
  from datahub.errors import MultipleSubtypesWarning, SdkUsageError
22
25
  from datahub.metadata.urns import (
26
+ ContainerUrn,
23
27
  CorpGroupUrn,
24
28
  CorpUserUrn,
25
29
  DataJobUrn,
@@ -33,6 +37,7 @@ from datahub.metadata.urns import (
33
37
  Urn,
34
38
  )
35
39
  from datahub.sdk._entity import Entity
40
+ from datahub.sdk._utils import add_list_unique, remove_list_unique
36
41
  from datahub.utilities.urns.error import InvalidUrnError
37
42
 
38
43
  if TYPE_CHECKING:
@@ -83,6 +88,13 @@ class HasPlatformInstance(Entity):
83
88
  )
84
89
  )
85
90
 
91
+ @property
92
+ def platform(self) -> Optional[DataPlatformUrn]:
93
+ dataPlatform = self._get_aspect(models.DataPlatformInstanceClass)
94
+ if dataPlatform and dataPlatform.platform:
95
+ return DataPlatformUrn.from_string(dataPlatform.platform)
96
+ return None
97
+
86
98
  @property
87
99
  def platform_instance(self) -> Optional[DataPlatformInstanceUrn]:
88
100
  dataPlatformInstance = self._get_aspect(models.DataPlatformInstanceClass)
@@ -112,11 +124,11 @@ class HasSubtype(Entity):
112
124
  self._set_aspect(models.SubTypesClass(typeNames=[subtype]))
113
125
 
114
126
 
127
+ # TODO: Reference OwnershipTypeClass as the valid ownership type enum.
115
128
  OwnershipTypeType: TypeAlias = Union[str, OwnershipTypeUrn]
116
129
  OwnerInputType: TypeAlias = Union[
117
- str,
118
130
  ActorUrn,
119
- Tuple[Union[str, ActorUrn], OwnershipTypeType],
131
+ Tuple[ActorUrn, OwnershipTypeType],
120
132
  models.OwnerClass,
121
133
  ]
122
134
  OwnersInputType: TypeAlias = List[OwnerInputType]
@@ -126,15 +138,17 @@ class HasOwnership(Entity):
126
138
  __slots__ = ()
127
139
 
128
140
  @staticmethod
129
- def _parse_owner_class(owner: OwnerInputType) -> models.OwnerClass:
141
+ def _parse_owner_class(owner: OwnerInputType) -> Tuple[models.OwnerClass, bool]:
130
142
  if isinstance(owner, models.OwnerClass):
131
- return owner
143
+ return owner, False
132
144
 
145
+ was_type_specified = False
133
146
  owner_type = models.OwnershipTypeClass.TECHNICAL_OWNER
134
147
  owner_type_urn = None
135
148
 
136
149
  if isinstance(owner, tuple):
137
150
  raw_owner, raw_owner_type = owner
151
+ was_type_specified = True
138
152
 
139
153
  if isinstance(raw_owner_type, OwnershipTypeUrn):
140
154
  owner_type = models.OwnershipTypeClass.CUSTOM
@@ -151,17 +165,15 @@ class HasOwnership(Entity):
151
165
  owner=make_user_urn(raw_owner),
152
166
  type=owner_type,
153
167
  typeUrn=owner_type_urn,
154
- )
168
+ ), was_type_specified
155
169
  elif isinstance(raw_owner, Urn):
156
170
  return models.OwnerClass(
157
171
  owner=str(raw_owner),
158
172
  type=owner_type,
159
173
  typeUrn=owner_type_urn,
160
- )
174
+ ), was_type_specified
161
175
  else:
162
- raise SdkUsageError(
163
- f"Invalid owner {owner}: {type(owner)} is not a valid owner type"
164
- )
176
+ assert_never(raw_owner)
165
177
 
166
178
  # TODO: Return a custom type with deserialized urns, instead of the raw aspect.
167
179
  # Ideally we'd also use first-class ownership type urns here, not strings.
@@ -173,21 +185,74 @@ class HasOwnership(Entity):
173
185
 
174
186
  def set_owners(self, owners: OwnersInputType) -> None:
175
187
  # TODO: add docs on the default parsing + default ownership type
176
- parsed_owners = [self._parse_owner_class(owner) for owner in owners]
188
+ parsed_owners = [self._parse_owner_class(owner)[0] for owner in owners]
177
189
  self._set_aspect(models.OwnershipClass(owners=parsed_owners))
178
190
 
191
+ @classmethod
192
+ def _owner_key_method(
193
+ cls, consider_owner_type: bool
194
+ ) -> Callable[[models.OwnerClass], Tuple[str, ...]]:
195
+ if consider_owner_type:
196
+ return cls._typed_owner_key
197
+ else:
198
+ return cls._simple_owner_key
179
199
 
180
- ContainerInputType: TypeAlias = Union["Container", ContainerKey]
200
+ @classmethod
201
+ def _typed_owner_key(cls, owner: models.OwnerClass) -> Tuple[str, str]:
202
+ return (owner.owner, owner.typeUrn or str(owner.type))
203
+
204
+ @classmethod
205
+ def _simple_owner_key(cls, owner: models.OwnerClass) -> Tuple[str,]:
206
+ return (owner.owner,)
207
+
208
+ def _ensure_owners(self) -> List[models.OwnerClass]:
209
+ owners = self._setdefault_aspect(models.OwnershipClass(owners=[])).owners
210
+ return owners
211
+
212
+ def add_owner(self, owner: OwnerInputType) -> None:
213
+ # Tricky: when adding an owner, we always use the ownership type.
214
+ # For removals, we only use it if it was explicitly specified.
215
+ parsed_owner, _ = self._parse_owner_class(owner)
216
+ add_list_unique(
217
+ self._ensure_owners(),
218
+ key=self._typed_owner_key,
219
+ item=parsed_owner,
220
+ )
221
+
222
+ def remove_owner(self, owner: OwnerInputType) -> None:
223
+ parsed_owner, was_type_specified = self._parse_owner_class(owner)
224
+ remove_list_unique(
225
+ self._ensure_owners(),
226
+ key=self._owner_key_method(was_type_specified),
227
+ item=parsed_owner,
228
+ )
229
+
230
+
231
+ # If you pass in a container object, we can build on top of its browse path.
232
+ # If you pass in a ContainerKey, we can use parent_key() to build the browse path.
233
+ # If you pass in a list of urns, we'll use that as the browse path. Any non-urn strings
234
+ # will be treated as raw ids.
235
+ ParentContainerInputType: TypeAlias = Union["Container", ContainerKey, List[UrnOrStr]]
181
236
 
182
237
 
183
238
  class HasContainer(Entity):
184
239
  __slots__ = ()
185
240
 
186
- def _set_container(self, container: Optional[ContainerInputType]) -> None:
241
+ @staticmethod
242
+ def _maybe_parse_as_urn(urn: UrnOrStr) -> UrnOrStr:
243
+ if isinstance(urn, Urn):
244
+ return urn
245
+ elif urn.startswith("urn:li:"):
246
+ return Urn.from_string(urn)
247
+ else:
248
+ return urn
249
+
250
+ def _set_container(self, container: Optional[ParentContainerInputType]) -> None:
187
251
  # We need to allow container to be None. It won't happen for datasets much, but
188
252
  # will be required for root containers.
189
253
  from datahub.sdk.container import Container
190
254
 
255
+ container_urn: Optional[str]
191
256
  browse_path: List[Union[str, models.BrowsePathEntryClass]] = []
192
257
  if isinstance(container, Container):
193
258
  container_urn = container.urn.urn()
@@ -204,6 +269,29 @@ class HasContainer(Entity):
204
269
  urn=container_urn,
205
270
  ),
206
271
  ]
272
+ elif isinstance(container, list):
273
+ parsed_path = [self._maybe_parse_as_urn(entry) for entry in container]
274
+
275
+ # Use the last container in the path as the container urn.
276
+ container_urns = [
277
+ urn.urn() for urn in parsed_path if isinstance(urn, ContainerUrn)
278
+ ]
279
+ container_urn = container_urns[-1] if container_urns else None
280
+
281
+ browse_path = [
282
+ (
283
+ models.BrowsePathEntryClass(
284
+ id=str(entry),
285
+ urn=str(entry),
286
+ )
287
+ if isinstance(entry, Urn)
288
+ else models.BrowsePathEntryClass(
289
+ id=entry,
290
+ urn=None,
291
+ )
292
+ )
293
+ for entry in parsed_path
294
+ ]
207
295
  elif container is not None:
208
296
  container_urn = container.as_urn()
209
297
 
@@ -243,6 +331,24 @@ class HasContainer(Entity):
243
331
  )
244
332
  )
245
333
 
334
+ @property
335
+ def parent_container(self) -> Optional[ContainerUrn]:
336
+ if container := self._get_aspect(models.ContainerClass):
337
+ return ContainerUrn.from_string(container.container)
338
+ return None
339
+
340
+ @property
341
+ def browse_path(self) -> Optional[List[UrnOrStr]]:
342
+ if browse_path := self._get_aspect(models.BrowsePathsV2Class):
343
+ path: List[UrnOrStr] = []
344
+ for entry in browse_path.path:
345
+ if entry.urn:
346
+ path.append(Urn.from_string(entry.urn))
347
+ else:
348
+ path.append(entry.id)
349
+ return path
350
+ return None
351
+
246
352
 
247
353
  TagInputType: TypeAlias = Union[str, TagUrn, models.TagAssociationClass]
248
354
  TagsInputType: TypeAlias = List[TagInputType]
@@ -251,6 +357,9 @@ TagsInputType: TypeAlias = List[TagInputType]
251
357
  class HasTags(Entity):
252
358
  __slots__ = ()
253
359
 
360
+ def _ensure_tags(self) -> List[models.TagAssociationClass]:
361
+ return self._setdefault_aspect(models.GlobalTagsClass(tags=[])).tags
362
+
254
363
  # TODO: Return a custom type with deserialized urns, instead of the raw aspect.
255
364
  @property
256
365
  def tags(self) -> Optional[List[models.TagAssociationClass]]:
@@ -275,6 +384,24 @@ class HasTags(Entity):
275
384
  )
276
385
  )
277
386
 
387
+ @classmethod
388
+ def _tag_key(cls, tag: models.TagAssociationClass) -> str:
389
+ return tag.tag
390
+
391
+ def add_tag(self, tag: TagInputType) -> None:
392
+ add_list_unique(
393
+ self._ensure_tags(),
394
+ self._tag_key,
395
+ self._parse_tag_association_class(tag),
396
+ )
397
+
398
+ def remove_tag(self, tag: TagInputType) -> None:
399
+ remove_list_unique(
400
+ self._ensure_tags(),
401
+ self._tag_key,
402
+ self._parse_tag_association_class(tag),
403
+ )
404
+
278
405
 
279
406
  TermInputType: TypeAlias = Union[
280
407
  str, GlossaryTermUrn, models.GlossaryTermAssociationClass
@@ -285,6 +412,11 @@ TermsInputType: TypeAlias = List[TermInputType]
285
412
  class HasTerms(Entity):
286
413
  __slots__ = ()
287
414
 
415
+ def _ensure_terms(self) -> List[models.GlossaryTermAssociationClass]:
416
+ return self._setdefault_aspect(
417
+ models.GlossaryTermsClass(terms=[], auditStamp=self._terms_audit_stamp())
418
+ ).terms
419
+
288
420
  # TODO: Return a custom type with deserialized urns, instead of the raw aspect.
289
421
  @property
290
422
  def terms(self) -> Optional[List[models.GlossaryTermAssociationClass]]:
@@ -320,6 +452,24 @@ class HasTerms(Entity):
320
452
  )
321
453
  )
322
454
 
455
+ @classmethod
456
+ def _terms_key(self, term: models.GlossaryTermAssociationClass) -> str:
457
+ return term.urn
458
+
459
+ def add_term(self, term: TermInputType) -> None:
460
+ add_list_unique(
461
+ self._ensure_terms(),
462
+ self._terms_key,
463
+ self._parse_glossary_term_association_class(term),
464
+ )
465
+
466
+ def remove_term(self, term: TermInputType) -> None:
467
+ remove_list_unique(
468
+ self._ensure_terms(),
469
+ self._terms_key,
470
+ self._parse_glossary_term_association_class(term),
471
+ )
472
+
323
473
 
324
474
  DomainInputType: TypeAlias = Union[str, DomainUrn]
325
475
 
datahub/sdk/_utils.py ADDED
@@ -0,0 +1,35 @@
1
+ from typing import Any, Callable, List, Protocol, TypeVar
2
+
3
+ from datahub.errors import ItemNotFoundError
4
+
5
+
6
+ class _SupportsEq(Protocol):
7
+ def __eq__(self, other: Any) -> bool: ...
8
+
9
+
10
+ T = TypeVar("T")
11
+ K = TypeVar("K", bound=_SupportsEq)
12
+
13
+
14
+ def add_list_unique(lst: List[T], key: Callable[[T], K], item: T) -> None:
15
+ item_key = key(item)
16
+ for i, existing in enumerate(lst):
17
+ if key(existing) == item_key:
18
+ lst[i] = item
19
+ return
20
+ lst.append(item)
21
+
22
+
23
+ def remove_list_unique(
24
+ lst: List[T], key: Callable[[T], K], item: T, *, missing_ok: bool = True
25
+ ) -> None:
26
+ # Poor man's patch implementation.
27
+ item_key = key(item)
28
+ removed = False
29
+ for i, existing in enumerate(lst):
30
+ if key(existing) == item_key:
31
+ lst.pop(i)
32
+ removed = True
33
+ # Tricky: no break. In case there's already duplicates, we want to remove all of them.
34
+ if not removed and not missing_ok:
35
+ raise ItemNotFoundError(f"Cannot remove item {item} from list: not found")
datahub/sdk/container.py CHANGED
@@ -27,11 +27,13 @@ from datahub.sdk._shared import (
27
27
  HasTags,
28
28
  HasTerms,
29
29
  OwnersInputType,
30
+ ParentContainerInputType,
30
31
  TagsInputType,
31
32
  TermsInputType,
32
33
  make_time_stamp,
33
34
  parse_time_stamp,
34
35
  )
36
+ from datahub.utilities.sentinels import Auto, auto
35
37
 
36
38
 
37
39
  class Container(
@@ -54,7 +56,7 @@ class Container(
54
56
  self,
55
57
  /,
56
58
  # Identity.
57
- container_key: ContainerKey | ContainerUrn,
59
+ container_key: ContainerKey,
58
60
  *,
59
61
  # Container attributes.
60
62
  display_name: str,
@@ -66,12 +68,15 @@ class Container(
66
68
  created: Optional[datetime] = None,
67
69
  last_modified: Optional[datetime] = None,
68
70
  # Standard aspects.
71
+ parent_container: Auto | ParentContainerInputType | None = auto,
69
72
  subtype: Optional[str] = None,
70
73
  owners: Optional[OwnersInputType] = None,
71
74
  tags: Optional[TagsInputType] = None,
72
75
  terms: Optional[TermsInputType] = None,
73
76
  domain: Optional[DomainInputType] = None,
74
77
  ):
78
+ # Hack: while the type annotations say container_key is always a ContainerKey,
79
+ # we allow ContainerUrn to make the graph-based constructor work.
75
80
  if isinstance(container_key, ContainerUrn):
76
81
  urn = container_key
77
82
  else:
@@ -85,8 +90,6 @@ class Container(
85
90
  if isinstance(container_key, ContainerKey):
86
91
  self._set_platform_instance(container_key.platform, container_key.instance)
87
92
 
88
- self._set_container(container_key.parent_key())
89
-
90
93
  self.set_custom_properties(
91
94
  {
92
95
  **container_key.property_dict(),
@@ -100,6 +103,18 @@ class Container(
100
103
  env = container_key.env if container_key.env in ALL_ENV_TYPES else None
101
104
  if _INCLUDE_ENV_IN_CONTAINER_PROPERTIES and env is not None:
102
105
  self._ensure_container_props().env = env
106
+ else:
107
+ self.set_custom_properties(extra_properties or {})
108
+
109
+ if parent_container is auto:
110
+ if not isinstance(container_key, ContainerKey):
111
+ raise SdkUsageError(
112
+ "Either a container_key or parent_container must be provided"
113
+ )
114
+
115
+ self._set_container(container_key.parent_key())
116
+ else:
117
+ self._set_container(parent_container)
103
118
 
104
119
  if description is not None:
105
120
  self.set_description(description)
@@ -126,7 +141,8 @@ class Container(
126
141
  @classmethod
127
142
  def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
128
143
  assert isinstance(urn, ContainerUrn)
129
- entity = cls(urn, display_name="__dummy_value__")
144
+
145
+ entity = cls(urn, display_name="__dummy_value__", parent_container=None) # type: ignore[arg-type]
130
146
  return entity._init_from_graph(current_aspects)
131
147
 
132
148
  def _ensure_container_props(
datahub/sdk/dataset.py CHANGED
@@ -13,13 +13,13 @@ from datahub.errors import (
13
13
  IngestionAttributionWarning,
14
14
  ItemNotFoundError,
15
15
  SchemaFieldKeyError,
16
+ SdkUsageError,
16
17
  )
17
18
  from datahub.ingestion.source.sql.sql_types import resolve_sql_type
18
19
  from datahub.metadata.urns import DatasetUrn, SchemaFieldUrn, Urn
19
20
  from datahub.sdk._attribution import is_ingestion_attribution
20
21
  from datahub.sdk._entity import Entity
21
22
  from datahub.sdk._shared import (
22
- ContainerInputType,
23
23
  DatasetUrnOrStr,
24
24
  DomainInputType,
25
25
  HasContainer,
@@ -30,14 +30,18 @@ from datahub.sdk._shared import (
30
30
  HasTags,
31
31
  HasTerms,
32
32
  OwnersInputType,
33
+ ParentContainerInputType,
34
+ TagInputType,
33
35
  TagsInputType,
36
+ TermInputType,
34
37
  TermsInputType,
35
38
  make_time_stamp,
36
39
  parse_time_stamp,
37
40
  )
41
+ from datahub.sdk._utils import add_list_unique, remove_list_unique
42
+ from datahub.utilities.sentinels import Unset, unset
38
43
 
39
44
  SchemaFieldInputType: TypeAlias = Union[
40
- str,
41
45
  Tuple[str, str], # (name, type)
42
46
  Tuple[str, str, str], # (name, type, description)
43
47
  models.SchemaFieldClass,
@@ -271,6 +275,51 @@ class SchemaField:
271
275
  tags=parsed_tags
272
276
  )
273
277
 
278
+ def add_tag(self, tag: TagInputType) -> None:
279
+ parsed_tag = self._parent._parse_tag_association_class(tag)
280
+
281
+ if is_ingestion_attribution():
282
+ raise SdkUsageError(
283
+ "Adding field tags in ingestion mode is not yet supported. "
284
+ "Use set_tags instead."
285
+ )
286
+ else:
287
+ editable_field = self._ensure_editable_schema_field()
288
+ if editable_field.globalTags is None:
289
+ editable_field.globalTags = models.GlobalTagsClass(tags=[])
290
+
291
+ add_list_unique(
292
+ editable_field.globalTags.tags,
293
+ key=self._parent._tag_key,
294
+ item=parsed_tag,
295
+ )
296
+
297
+ def remove_tag(self, tag: TagInputType) -> None:
298
+ parsed_tag = self._parent._parse_tag_association_class(tag)
299
+
300
+ if is_ingestion_attribution():
301
+ raise SdkUsageError(
302
+ "Adding field tags in ingestion mode is not yet supported. "
303
+ "Use set_tags instead."
304
+ )
305
+ else:
306
+ base_field = self._base_schema_field()
307
+ if base_field.globalTags is not None:
308
+ remove_list_unique(
309
+ base_field.globalTags.tags,
310
+ key=self._parent._tag_key,
311
+ item=parsed_tag,
312
+ missing_ok=True,
313
+ )
314
+
315
+ editable_field = self._ensure_editable_schema_field()
316
+ if editable_field.globalTags is not None:
317
+ remove_list_unique(
318
+ editable_field.globalTags.tags,
319
+ key=self._parent._tag_key,
320
+ item=parsed_tag,
321
+ )
322
+
274
323
  @property
275
324
  def terms(self) -> Optional[List[models.GlossaryTermAssociationClass]]:
276
325
  # TODO: Basically the same implementation as tags - can we share code?
@@ -287,7 +336,7 @@ class SchemaField:
287
336
 
288
337
  return terms
289
338
 
290
- def set_terms(self, terms: List[models.GlossaryTermAssociationClass]) -> None:
339
+ def set_terms(self, terms: TermsInputType) -> None:
291
340
  parsed_terms = [
292
341
  self._parent._parse_glossary_term_association_class(term) for term in terms
293
342
  ]
@@ -318,6 +367,55 @@ class SchemaField:
318
367
  )
319
368
  )
320
369
 
370
+ def add_term(self, term: TermInputType) -> None:
371
+ parsed_term = self._parent._parse_glossary_term_association_class(term)
372
+
373
+ if is_ingestion_attribution():
374
+ raise SdkUsageError(
375
+ "Adding field terms in ingestion mode is not yet supported. "
376
+ "Use set_terms instead."
377
+ )
378
+ else:
379
+ editable_field = self._ensure_editable_schema_field()
380
+ if editable_field.glossaryTerms is None:
381
+ editable_field.glossaryTerms = models.GlossaryTermsClass(
382
+ terms=[],
383
+ auditStamp=self._parent._terms_audit_stamp(),
384
+ )
385
+
386
+ add_list_unique(
387
+ editable_field.glossaryTerms.terms,
388
+ key=self._parent._terms_key,
389
+ item=parsed_term,
390
+ )
391
+
392
+ def remove_term(self, term: TermInputType) -> None:
393
+ parsed_term = self._parent._parse_glossary_term_association_class(term)
394
+
395
+ if is_ingestion_attribution():
396
+ raise SdkUsageError(
397
+ "Removing field terms in ingestion mode is not yet supported. "
398
+ "Use set_terms instead."
399
+ )
400
+ else:
401
+ base_field = self._base_schema_field()
402
+ if base_field.glossaryTerms is not None:
403
+ remove_list_unique(
404
+ base_field.glossaryTerms.terms,
405
+ key=self._parent._terms_key,
406
+ item=parsed_term,
407
+ missing_ok=True,
408
+ )
409
+
410
+ editable_field = self._ensure_editable_schema_field()
411
+ if editable_field.glossaryTerms is not None:
412
+ remove_list_unique(
413
+ editable_field.glossaryTerms.terms,
414
+ key=self._parent._terms_key,
415
+ item=parsed_term,
416
+ missing_ok=True,
417
+ )
418
+
321
419
 
322
420
  class Dataset(
323
421
  HasPlatformInstance,
@@ -352,8 +450,8 @@ class Dataset(
352
450
  created: Optional[datetime] = None,
353
451
  last_modified: Optional[datetime] = None,
354
452
  # Standard aspects.
453
+ parent_container: ParentContainerInputType | Unset = unset,
355
454
  subtype: Optional[str] = None,
356
- container: Optional[ContainerInputType] = None,
357
455
  owners: Optional[OwnersInputType] = None,
358
456
  tags: Optional[TagsInputType] = None,
359
457
  terms: Optional[TermsInputType] = None,
@@ -393,10 +491,10 @@ class Dataset(
393
491
  if last_modified is not None:
394
492
  self.set_last_modified(last_modified)
395
493
 
494
+ if parent_container is not unset:
495
+ self._set_container(parent_container)
396
496
  if subtype is not None:
397
497
  self.set_subtype(subtype)
398
- if container is not None:
399
- self._set_container(container)
400
498
  if owners is not None:
401
499
  self.set_owners(owners)
402
500
  if tags is not None:
@@ -537,14 +635,6 @@ class Dataset(
537
635
  nativeDataType=field_type,
538
636
  description=description,
539
637
  )
540
- elif isinstance(schema_field_input, str):
541
- # TODO: Not sure this branch makes sense - we should probably just require types?
542
- return models.SchemaFieldClass(
543
- fieldPath=schema_field_input,
544
- type=models.SchemaFieldDataTypeClass(models.NullTypeClass()),
545
- nativeDataType="unknown",
546
- description=None,
547
- )
548
638
  else:
549
639
  assert_never(schema_field_input)
550
640