acryl-datahub 0.15.0.6rc1__py3-none-any.whl → 0.15.0.6rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,7 +1,8 @@
1
1
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
- datahub/_version.py,sha256=U9IGE-YR9bmigwAxXVjXLxWIGHYH0FW2G6D5UE_-ZIg,324
3
+ datahub/_version.py,sha256=BhSRdcjgmWwrve2zKjSYh9Z1fk_7CHpbmijLhhWMrJM,324
4
4
  datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
5
+ datahub/errors.py,sha256=w6h8b27j9XlmPbTwqpu7-wgiTrXlHzcnUOnJ_iOrwzo,520
5
6
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
7
  datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
8
  datahub/_codegen/aspect.py,sha256=PJRa-Z4ouXHq3OkulfyWhwZn-fFUBDK_UPvmqaWdbWk,1063
@@ -203,7 +204,7 @@ datahub/ingestion/source/ldap.py,sha256=CNr3foofIpoCXu_GGqfcajlQE2qkHr5isYwVcDut
203
204
  datahub/ingestion/source/metabase.py,sha256=m9Gfhrs8F1z23ci8CIxdE5cW--25stgxg_IQTKwkFrk,31532
204
205
  datahub/ingestion/source/mlflow.py,sha256=pmIkmsfidi7dOGdQ61rab7m8AnKZhIRE2IA9in9HGFU,12144
205
206
  datahub/ingestion/source/mode.py,sha256=HVxhzMIY4HjkAG_T6y00Po2B9XwjALP6i5XQThuyYM4,63488
206
- datahub/ingestion/source/mongodb.py,sha256=0P3PHVvMSXFkFimGvQzOQZF7APjsFOyzQAVQjVlVbuk,21172
207
+ datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-OPSNGU,21237
207
208
  datahub/ingestion/source/nifi.py,sha256=FgIbZSCu-mcdnbIpqwvmADnUIxptogUq1sSEkrkwtrc,56089
208
209
  datahub/ingestion/source/openapi.py,sha256=MGsRLseZompW10UVMN_tU1GZgqPgTAM4lnqCJ8eVRoY,17386
209
210
  datahub/ingestion/source/openapi_parser.py,sha256=1_68wHWe_SzWYEyC1YVDw9vxoadKjW1yv8DecvyIhwY,13606
@@ -410,7 +411,7 @@ datahub/ingestion/source/s3/config.py,sha256=Zs1nrBZKLImteZreIcSMMRLj8vBGgxakNDs
410
411
  datahub/ingestion/source/s3/datalake_profiler_config.py,sha256=FfrcgK-JEF94vw-l3q6pN6FENXb-wZzW2w1VUZVkwW8,3620
411
412
  datahub/ingestion/source/s3/profiling.py,sha256=yKNCKpr6w7qpCH-baeSkNE9VjkN6eBot_weD-2_Jxzk,17579
412
413
  datahub/ingestion/source/s3/report.py,sha256=fzkTdTewYlWrTk4f2Cyl-e8RV4qw9wEVtm0cdKD-Xgw,542
413
- datahub/ingestion/source/s3/source.py,sha256=IE_K_HE_S7w8fpGPT8OptU5-VmwapntsI5PePv_wUQA,47412
414
+ datahub/ingestion/source/s3/source.py,sha256=JwEmVWDEFtPt6iMo82n5DQava8QAKXk_xYl01KAfdHk,47614
414
415
  datahub/ingestion/source/sac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
415
416
  datahub/ingestion/source/sac/sac.py,sha256=zPSO9ukuyhvNaaVzeAYpA-_sFma_XMcCQMPaGvDWuTk,30226
416
417
  datahub/ingestion/source/sac/sac_common.py,sha256=-xQTDBtgH56AnpRXWGDnlmQqUuLRx-7wF1U1kQFWtX8,998
@@ -444,8 +445,8 @@ datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34url
444
445
  datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=M-FBoYeiW91-g3gOUpCTj8cKWHH-wqyFtD5UcewfI2k,28121
445
446
  datahub/ingestion/source/snowflake/snowflake_query.py,sha256=Ex9FZZzz02cQis4bV3tzd53Pmf8p3AreuWnv9w95pJ0,39642
446
447
  datahub/ingestion/source/snowflake/snowflake_report.py,sha256=ahea-bwpW6T0iDehGo0Qq_J7wKxPkV61aYHm8bGwDqo,6651
447
- datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=fdDN7jO5aHsmTFYC8cahXRT9BSAoDY72heM_WrkSxXo,25648
448
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=5DUP_uTmbLU01NegFXClqPkpB8LTruIkyIaGUBOTCQw,54718
448
+ datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=GFgcKV5T6VHyNwPBzzw_f8cWA9YFlWug0m6nkLoGXus,25979
449
+ datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=Yba6OIWYtukAFMoNtEtX2BXWwJee17Dl58DUyK0myho,54530
449
450
  datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=maZyFkfrbVogEFM0tTKRiNp9c_1muv6YfleSd3q0umI,6341
450
451
  datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYhWeK4XafJq3ssFL9kcS03jEeWT4,5506
451
452
  datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=1eLYTcgmfzDs9xktMTTE74L5SeNP48Qg3uLr9y-Ez3Y,8733
@@ -869,6 +870,16 @@ datahub/metadata/schemas/VersionSetKey.avsc,sha256=psjGNNcFua3Zs9Xlh4HnUHNmBEU74
869
870
  datahub/metadata/schemas/VersionSetProperties.avsc,sha256=yrhhVNioD11nFlDO7IfUbxAQjhA9Tr-4wnAYH5I9W74,1172
870
871
  datahub/metadata/schemas/ViewProperties.avsc,sha256=3HhcbH5493dJUnEUtFMYMVfbYQ52aDedm5L4j77Nym4,1032
871
872
  datahub/metadata/schemas/__init__.py,sha256=uvLNC3VyCkWA_v8e9FdA1leFf46NFKDD0AajCfihepI,581
873
+ datahub/sdk/__init__.py,sha256=fYD-f338EW5WPFW2NSiirMAsHkNgZfolIvneM7yxgBk,977
874
+ datahub/sdk/_all_entities.py,sha256=0XFtmgeEtrWOXy_oWcwqrtvfvzo8obPIq3Z1fEr5-34,400
875
+ datahub/sdk/_attribution.py,sha256=05iNVT_IDO7aU3vU6dJKCF8EoDAI1mwh4mAg_EBf3RY,1121
876
+ datahub/sdk/_entity.py,sha256=A_AWqNjaV6Y1YvmIiPNm0UyC0aO52G_L-iwgp9XEtYs,3225
877
+ datahub/sdk/_shared.py,sha256=FIEcYGm3LgRKLqn_i1FDePaqvMljkXIA9f3RMqaF1kY,11137
878
+ datahub/sdk/container.py,sha256=6Y19pQLN5vXGM86TEKW7ItTBwE3S0U4BsAMilWhyXqw,6511
879
+ datahub/sdk/dataset.py,sha256=BydV2papBEYL2de72UMNjSkAbsDNpVlXqiEkMlXJiLY,21514
880
+ datahub/sdk/entity_client.py,sha256=DcHytfCM8X9J6mm_QXzFR-2vDQa88I9Q2ktSNC2oSUI,4277
881
+ datahub/sdk/main_client.py,sha256=IKKzBMiKtT5zIMEHrvaMyxJ7DYBVNpqua70J0Ydl74Y,2068
882
+ datahub/sdk/resolver_client.py,sha256=UxI0bwg0ITm9dWPmAAKxkTvpLiGACtSCuEDNxLJipjs,3395
872
883
  datahub/secret/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
873
884
  datahub/secret/datahub_secret_store.py,sha256=9u9S87-15jwhj4h0EsAVIMdQLgvstKc8voQux2slxgU,2477
874
885
  datahub/secret/datahub_secrets_client.py,sha256=nDmhziKdvseJHlaDVUcAwK8Fv8maeAaG-ktZtWG2b70,1316
@@ -1001,9 +1012,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1001
1012
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1002
1013
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1003
1014
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1004
- acryl_datahub-0.15.0.6rc1.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1005
- acryl_datahub-0.15.0.6rc1.dist-info/METADATA,sha256=YPtgGtRPaltE8LkNq8PlWsrlzhbE-FQV0wWYkkuPqDc,175375
1006
- acryl_datahub-0.15.0.6rc1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
1007
- acryl_datahub-0.15.0.6rc1.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
1008
- acryl_datahub-0.15.0.6rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1009
- acryl_datahub-0.15.0.6rc1.dist-info/RECORD,,
1015
+ acryl_datahub-0.15.0.6rc2.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1016
+ acryl_datahub-0.15.0.6rc2.dist-info/METADATA,sha256=Bv5h1BaxWarbwfDdiiaD1wwyonRclpbgYtingIwN4B4,175375
1017
+ acryl_datahub-0.15.0.6rc2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
1018
+ acryl_datahub-0.15.0.6rc2.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
1019
+ acryl_datahub-0.15.0.6rc2.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1020
+ acryl_datahub-0.15.0.6rc2.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "0.15.0.6rc1"
3
+ __version__ = "0.15.0.6rc2"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
datahub/errors.py ADDED
@@ -0,0 +1,35 @@
1
+ from datahub.configuration.common import MetaError
2
+
3
+ # TODO: Move all other error types to this file.
4
+
5
+
6
+ class SdkUsageError(MetaError):
7
+ pass
8
+
9
+
10
+ class AlreadyExistsError(SdkUsageError):
11
+ pass
12
+
13
+
14
+ class ItemNotFoundError(SdkUsageError):
15
+ pass
16
+
17
+
18
+ class MultipleItemsFoundError(SdkUsageError):
19
+ pass
20
+
21
+
22
+ class SchemaFieldKeyError(SdkUsageError, KeyError):
23
+ pass
24
+
25
+
26
+ class IngestionAttributionWarning(Warning):
27
+ pass
28
+
29
+
30
+ class MultipleSubtypesWarning(Warning):
31
+ pass
32
+
33
+
34
+ class ExperimentalWarning(Warning):
35
+ pass
@@ -219,26 +219,27 @@ def construct_schema_pymongo(
219
219
  """
220
220
 
221
221
  aggregations: List[Dict] = []
222
+
223
+ # The order of the aggregations impacts execution time. By setting the sample/limit aggregation first,
224
+ # the subsequent aggregations process a much smaller dataset, improving performance.
225
+ if sample_size:
226
+ if use_random_sampling:
227
+ aggregations.append({"$sample": {"size": sample_size}})
228
+ else:
229
+ aggregations.append({"$limit": sample_size})
230
+
222
231
  if should_add_document_size_filter:
223
232
  doc_size_field = "temporary_doc_size_field"
224
233
  # create a temporary field to store the size of the document. filter on it and then remove it.
225
- aggregations = [
226
- {"$addFields": {doc_size_field: {"$bsonSize": "$$ROOT"}}},
227
- {"$match": {doc_size_field: {"$lt": max_document_size}}},
228
- {"$project": {doc_size_field: 0}},
229
- ]
230
- if use_random_sampling:
231
- # get sample documents in collection
232
- if sample_size:
233
- aggregations.append({"$sample": {"size": sample_size}})
234
- documents = collection.aggregate(
235
- aggregations,
236
- allowDiskUse=True,
234
+ aggregations.extend(
235
+ [
236
+ {"$addFields": {doc_size_field: {"$bsonSize": "$$ROOT"}}},
237
+ {"$match": {doc_size_field: {"$lt": max_document_size}}},
238
+ {"$project": {doc_size_field: 0}},
239
+ ]
237
240
  )
238
- else:
239
- if sample_size:
240
- aggregations.append({"$limit": sample_size})
241
- documents = collection.aggregate(aggregations, allowDiskUse=True)
241
+
242
+ documents = collection.aggregate(aggregations, allowDiskUse=True)
242
243
 
243
244
  return construct_schema(list(documents), delimiter)
244
245
 
@@ -866,8 +866,21 @@ class S3Source(StatefulIngestionSourceBase):
866
866
  Returns:
867
867
  List[Folder]: A list of Folder objects representing the partitions found.
868
868
  """
869
+
870
+ def _is_allowed_path(path_spec_: PathSpec, s3_uri: str) -> bool:
871
+ allowed = path_spec_.allowed(s3_uri)
872
+ if not allowed:
873
+ logger.debug(f"File {s3_uri} not allowed and skipping")
874
+ self.report.report_file_dropped(s3_uri)
875
+ return allowed
876
+
877
+ s3_objects = (
878
+ obj
879
+ for obj in bucket.objects.filter(Prefix=prefix).page_size(PAGE_SIZE)
880
+ if _is_allowed_path(path_spec, f"s3://{obj.bucket_name}/{obj.key}")
881
+ )
882
+
869
883
  partitions: List[Folder] = []
870
- s3_objects = bucket.objects.filter(Prefix=prefix).page_size(PAGE_SIZE)
871
884
  grouped_s3_objects_by_dirname = groupby_unsorted(
872
885
  s3_objects,
873
886
  key=lambda obj: obj.key.rsplit("/", 1)[0],
@@ -878,10 +891,6 @@ class S3Source(StatefulIngestionSourceBase):
878
891
  modification_time = None
879
892
 
880
893
  for item in group:
881
- file_path = self.create_s3_path(item.bucket_name, item.key)
882
- if not path_spec.allowed(file_path):
883
- logger.debug(f"File {file_path} not allowed and skipping")
884
- continue
885
894
  file_size += item.size
886
895
  if creation_time is None or item.last_modified < creation_time:
887
896
  creation_time = item.last_modified
@@ -6,6 +6,7 @@ from datetime import datetime
6
6
  from typing import Callable, Dict, Iterable, List, MutableMapping, Optional
7
7
 
8
8
  from datahub.ingestion.api.report import SupportsAsObj
9
+ from datahub.ingestion.source.common.subtypes import DatasetSubTypes
9
10
  from datahub.ingestion.source.snowflake.constants import SnowflakeObjectDomain
10
11
  from datahub.ingestion.source.snowflake.snowflake_connection import SnowflakeConnection
11
12
  from datahub.ingestion.source.snowflake.snowflake_query import (
@@ -100,6 +101,9 @@ class SnowflakeTable(BaseTable):
100
101
  def is_hybrid(self) -> bool:
101
102
  return self.type is not None and self.type == "HYBRID TABLE"
102
103
 
104
+ def get_subtype(self) -> DatasetSubTypes:
105
+ return DatasetSubTypes.TABLE
106
+
103
107
 
104
108
  @dataclass
105
109
  class SnowflakeView(BaseView):
@@ -109,6 +113,9 @@ class SnowflakeView(BaseView):
109
113
  column_tags: Dict[str, List[SnowflakeTag]] = field(default_factory=dict)
110
114
  is_secure: bool = False
111
115
 
116
+ def get_subtype(self) -> DatasetSubTypes:
117
+ return DatasetSubTypes.VIEW
118
+
112
119
 
113
120
  @dataclass
114
121
  class SnowflakeSchema:
@@ -154,6 +161,9 @@ class SnowflakeStream:
154
161
  column_tags: Dict[str, List[SnowflakeTag]] = field(default_factory=dict)
155
162
  last_altered: Optional[datetime] = None
156
163
 
164
+ def get_subtype(self) -> DatasetSubTypes:
165
+ return DatasetSubTypes.SNOWFLAKE_STREAM
166
+
157
167
 
158
168
  class _SnowflakeTagCache:
159
169
  def __init__(self) -> None:
@@ -21,7 +21,6 @@ from datahub.ingestion.glossary.classification_mixin import (
21
21
  from datahub.ingestion.source.aws.s3_util import make_s3_urn_for_lineage
22
22
  from datahub.ingestion.source.common.subtypes import (
23
23
  DatasetContainerSubTypes,
24
- DatasetSubTypes,
25
24
  )
26
25
  from datahub.ingestion.source.snowflake.constants import (
27
26
  GENERIC_PERMISSION_ERROR_KEY,
@@ -467,7 +466,13 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
467
466
  context=f"{db_name}.{schema_name}",
468
467
  )
469
468
 
470
- def _process_tags(self, snowflake_schema, schema_name, db_name, domain):
469
+ def _process_tags(
470
+ self,
471
+ snowflake_schema: SnowflakeSchema,
472
+ schema_name: str,
473
+ db_name: str,
474
+ domain: str,
475
+ ) -> None:
471
476
  snowflake_schema.tags = self.tag_extractor.get_tags_on_object(
472
477
  schema_name=schema_name, db_name=db_name, domain=domain
473
478
  )
@@ -837,15 +842,7 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
837
842
  if dpi_aspect:
838
843
  yield dpi_aspect
839
844
 
840
- subTypes = SubTypes(
841
- typeNames=(
842
- [DatasetSubTypes.SNOWFLAKE_STREAM]
843
- if isinstance(table, SnowflakeStream)
844
- else [DatasetSubTypes.VIEW]
845
- if isinstance(table, SnowflakeView)
846
- else [DatasetSubTypes.TABLE]
847
- )
848
- )
845
+ subTypes = SubTypes(typeNames=[table.get_subtype()])
849
846
 
850
847
  yield MetadataChangeProposalWrapper(
851
848
  entityUrn=dataset_urn, aspect=subTypes
@@ -932,9 +929,9 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
932
929
  "OWNER_ROLE_TYPE": table.owner_role_type,
933
930
  "TABLE_NAME": table.table_name,
934
931
  "BASE_TABLES": table.base_tables,
935
- "STALE_AFTER": table.stale_after.isoformat()
936
- if table.stale_after
937
- else None,
932
+ "STALE_AFTER": (
933
+ table.stale_after.isoformat() if table.stale_after else None
934
+ ),
938
935
  }.items()
939
936
  if v
940
937
  }
@@ -0,0 +1,33 @@
1
+ import warnings
2
+
3
+ import datahub.metadata.schema_classes as models
4
+ from datahub.errors import ExperimentalWarning, SdkUsageError
5
+ from datahub.ingestion.graph.config import DatahubClientConfig
6
+ from datahub.metadata.urns import (
7
+ ChartUrn,
8
+ ContainerUrn,
9
+ CorpGroupUrn,
10
+ CorpUserUrn,
11
+ DashboardUrn,
12
+ DataPlatformInstanceUrn,
13
+ DataPlatformUrn,
14
+ DatasetUrn,
15
+ DomainUrn,
16
+ GlossaryTermUrn,
17
+ SchemaFieldUrn,
18
+ TagUrn,
19
+ )
20
+ from datahub.sdk.container import Container
21
+ from datahub.sdk.dataset import Dataset
22
+ from datahub.sdk.main_client import DataHubClient
23
+
24
+ warnings.warn(
25
+ "The new datahub SDK (e.g. datahub.sdk.*) is experimental. "
26
+ "Our typical backwards-compatibility and stability guarantees do not apply to this code. "
27
+ "When it's promoted to stable, the import path will change "
28
+ "from `from datahub.sdk import ...` to `from datahub import ...`.",
29
+ ExperimentalWarning,
30
+ stacklevel=2,
31
+ )
32
+ del warnings
33
+ del ExperimentalWarning
@@ -0,0 +1,15 @@
1
+ from typing import Dict, List, Type
2
+
3
+ from datahub.sdk._entity import Entity
4
+ from datahub.sdk.container import Container
5
+ from datahub.sdk.dataset import Dataset
6
+
7
+ # TODO: Is there a better way to declare this?
8
+ ENTITY_CLASSES_LIST: List[Type[Entity]] = [
9
+ Container,
10
+ Dataset,
11
+ ]
12
+
13
+ ENTITY_CLASSES: Dict[str, Type[Entity]] = {
14
+ cls.get_urn_type().ENTITY_TYPE: cls for cls in ENTITY_CLASSES_LIST
15
+ }
@@ -0,0 +1,48 @@
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ from typing import Iterator
5
+
6
+ from datahub.utilities.str_enum import StrEnum
7
+
8
+
9
+ class KnownAttribution(StrEnum):
10
+ INGESTION = "INGESTION"
11
+ INGESTION_ALTERNATE = "INGESTION_ALTERNATE"
12
+
13
+ UI = "UI"
14
+ SDK = "SDK"
15
+
16
+ PROPAGATION = "PROPAGATION"
17
+
18
+ def is_ingestion(self) -> bool:
19
+ return self in (
20
+ KnownAttribution.INGESTION,
21
+ KnownAttribution.INGESTION_ALTERNATE,
22
+ )
23
+
24
+
25
+ _default_attribution = KnownAttribution.SDK
26
+
27
+
28
+ def get_default_attribution() -> KnownAttribution:
29
+ return _default_attribution
30
+
31
+
32
+ def set_default_attribution(attribution: KnownAttribution) -> None:
33
+ global _default_attribution
34
+ _default_attribution = attribution
35
+
36
+
37
+ @contextlib.contextmanager
38
+ def change_default_attribution(attribution: KnownAttribution) -> Iterator[None]:
39
+ old_attribution = get_default_attribution()
40
+ try:
41
+ set_default_attribution(attribution)
42
+ yield
43
+ finally:
44
+ set_default_attribution(old_attribution)
45
+
46
+
47
+ def is_ingestion_attribution() -> bool:
48
+ return get_default_attribution().is_ingestion()
datahub/sdk/_entity.py ADDED
@@ -0,0 +1,89 @@
1
+ import abc
2
+ from typing import List, Optional, Type, Union
3
+
4
+ from typing_extensions import Self
5
+
6
+ import datahub.metadata.schema_classes as models
7
+ from datahub.emitter.mce_builder import Aspect as AspectTypeVar
8
+ from datahub.emitter.mcp import MetadataChangeProposalWrapper
9
+ from datahub.errors import SdkUsageError
10
+ from datahub.metadata.urns import Urn
11
+ from datahub.utilities.urns._urn_base import _SpecificUrn
12
+
13
+
14
+ class Entity:
15
+ __slots__ = ("_urn", "_prev_aspects", "_aspects")
16
+
17
+ def __init__(self, /, urn: Urn):
18
+ # This method is not meant for direct usage.
19
+ if type(self) is Entity:
20
+ raise SdkUsageError(f"{Entity.__name__} cannot be instantiated directly.")
21
+
22
+ assert isinstance(urn, self.get_urn_type())
23
+ self._urn: _SpecificUrn = urn
24
+
25
+ # prev_aspects is None means this was created from scratch
26
+ self._prev_aspects: Optional[models.AspectBag] = None
27
+ self._aspects: models.AspectBag = {}
28
+
29
+ @classmethod
30
+ def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
31
+ # If an init method from a subclass adds required fields, it also needs to override this method.
32
+ # An alternative approach would call cls.__new__() to bypass the init method, but it's a bit
33
+ # too hacky for my taste.
34
+ entity = cls(urn=urn)
35
+ return entity._init_from_graph(current_aspects)
36
+
37
+ def _init_from_graph(self, current_aspects: models.AspectBag) -> Self:
38
+ self._prev_aspects = current_aspects
39
+ aspect: models._Aspect
40
+ for aspect_name, aspect in (current_aspects or {}).items(): # type: ignore
41
+ aspect_copy = type(aspect).from_obj(aspect.to_obj())
42
+ self._aspects[aspect_name] = aspect_copy # type: ignore
43
+ return self
44
+
45
+ @classmethod
46
+ @abc.abstractmethod
47
+ def get_urn_type(cls) -> Type[_SpecificUrn]: ...
48
+
49
+ @property
50
+ def urn(self) -> _SpecificUrn:
51
+ return self._urn
52
+
53
+ def _get_aspect(
54
+ self,
55
+ aspect_type: Type[AspectTypeVar],
56
+ /,
57
+ ) -> Optional[AspectTypeVar]:
58
+ return self._aspects.get(aspect_type.ASPECT_NAME) # type: ignore
59
+
60
+ def _set_aspect(self, value: AspectTypeVar, /) -> None:
61
+ self._aspects[value.ASPECT_NAME] = value # type: ignore
62
+
63
+ def _setdefault_aspect(self, default_aspect: AspectTypeVar, /) -> AspectTypeVar:
64
+ # Similar semantics to dict.setdefault.
65
+ if existing_aspect := self._get_aspect(type(default_aspect)):
66
+ return existing_aspect
67
+ self._set_aspect(default_aspect)
68
+ return default_aspect
69
+
70
+ def _as_mcps(
71
+ self,
72
+ change_type: Union[str, models.ChangeTypeClass] = models.ChangeTypeClass.UPSERT,
73
+ ) -> List[MetadataChangeProposalWrapper]:
74
+ urn_str = str(self.urn)
75
+
76
+ mcps = []
77
+ for aspect in self._aspects.values():
78
+ assert isinstance(aspect, models._Aspect)
79
+ mcps.append(
80
+ MetadataChangeProposalWrapper(
81
+ entityUrn=urn_str,
82
+ aspect=aspect,
83
+ changeType=change_type,
84
+ )
85
+ )
86
+ return mcps
87
+
88
+ def __repr__(self) -> str:
89
+ return f"{self.__class__.__name__}('{self.urn}')"