acryl-datahub 0.15.0.6rc1__py3-none-any.whl → 0.15.0.6rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/METADATA +2394 -2394
- {acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/RECORD +22 -11
- datahub/_version.py +1 -1
- datahub/errors.py +35 -0
- datahub/ingestion/source/mongodb.py +17 -16
- datahub/ingestion/source/s3/source.py +14 -5
- datahub/ingestion/source/snowflake/snowflake_schema.py +10 -0
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +11 -14
- datahub/sdk/__init__.py +33 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_attribution.py +48 -0
- datahub/sdk/_entity.py +89 -0
- datahub/sdk/_shared.py +338 -0
- datahub/sdk/container.py +193 -0
- datahub/sdk/dataset.py +584 -0
- datahub/sdk/entity_client.py +115 -0
- datahub/sdk/main_client.py +56 -0
- datahub/sdk/resolver_client.py +101 -0
- {acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/LICENSE +0 -0
- {acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
|
-
datahub/_version.py,sha256=
|
|
3
|
+
datahub/_version.py,sha256=BhSRdcjgmWwrve2zKjSYh9Z1fk_7CHpbmijLhhWMrJM,324
|
|
4
4
|
datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
|
|
5
|
+
datahub/errors.py,sha256=w6h8b27j9XlmPbTwqpu7-wgiTrXlHzcnUOnJ_iOrwzo,520
|
|
5
6
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
7
|
datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
8
|
datahub/_codegen/aspect.py,sha256=PJRa-Z4ouXHq3OkulfyWhwZn-fFUBDK_UPvmqaWdbWk,1063
|
|
@@ -203,7 +204,7 @@ datahub/ingestion/source/ldap.py,sha256=CNr3foofIpoCXu_GGqfcajlQE2qkHr5isYwVcDut
|
|
|
203
204
|
datahub/ingestion/source/metabase.py,sha256=m9Gfhrs8F1z23ci8CIxdE5cW--25stgxg_IQTKwkFrk,31532
|
|
204
205
|
datahub/ingestion/source/mlflow.py,sha256=pmIkmsfidi7dOGdQ61rab7m8AnKZhIRE2IA9in9HGFU,12144
|
|
205
206
|
datahub/ingestion/source/mode.py,sha256=HVxhzMIY4HjkAG_T6y00Po2B9XwjALP6i5XQThuyYM4,63488
|
|
206
|
-
datahub/ingestion/source/mongodb.py,sha256=
|
|
207
|
+
datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-OPSNGU,21237
|
|
207
208
|
datahub/ingestion/source/nifi.py,sha256=FgIbZSCu-mcdnbIpqwvmADnUIxptogUq1sSEkrkwtrc,56089
|
|
208
209
|
datahub/ingestion/source/openapi.py,sha256=MGsRLseZompW10UVMN_tU1GZgqPgTAM4lnqCJ8eVRoY,17386
|
|
209
210
|
datahub/ingestion/source/openapi_parser.py,sha256=1_68wHWe_SzWYEyC1YVDw9vxoadKjW1yv8DecvyIhwY,13606
|
|
@@ -410,7 +411,7 @@ datahub/ingestion/source/s3/config.py,sha256=Zs1nrBZKLImteZreIcSMMRLj8vBGgxakNDs
|
|
|
410
411
|
datahub/ingestion/source/s3/datalake_profiler_config.py,sha256=FfrcgK-JEF94vw-l3q6pN6FENXb-wZzW2w1VUZVkwW8,3620
|
|
411
412
|
datahub/ingestion/source/s3/profiling.py,sha256=yKNCKpr6w7qpCH-baeSkNE9VjkN6eBot_weD-2_Jxzk,17579
|
|
412
413
|
datahub/ingestion/source/s3/report.py,sha256=fzkTdTewYlWrTk4f2Cyl-e8RV4qw9wEVtm0cdKD-Xgw,542
|
|
413
|
-
datahub/ingestion/source/s3/source.py,sha256=
|
|
414
|
+
datahub/ingestion/source/s3/source.py,sha256=JwEmVWDEFtPt6iMo82n5DQava8QAKXk_xYl01KAfdHk,47614
|
|
414
415
|
datahub/ingestion/source/sac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
415
416
|
datahub/ingestion/source/sac/sac.py,sha256=zPSO9ukuyhvNaaVzeAYpA-_sFma_XMcCQMPaGvDWuTk,30226
|
|
416
417
|
datahub/ingestion/source/sac/sac_common.py,sha256=-xQTDBtgH56AnpRXWGDnlmQqUuLRx-7wF1U1kQFWtX8,998
|
|
@@ -444,8 +445,8 @@ datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34url
|
|
|
444
445
|
datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=M-FBoYeiW91-g3gOUpCTj8cKWHH-wqyFtD5UcewfI2k,28121
|
|
445
446
|
datahub/ingestion/source/snowflake/snowflake_query.py,sha256=Ex9FZZzz02cQis4bV3tzd53Pmf8p3AreuWnv9w95pJ0,39642
|
|
446
447
|
datahub/ingestion/source/snowflake/snowflake_report.py,sha256=ahea-bwpW6T0iDehGo0Qq_J7wKxPkV61aYHm8bGwDqo,6651
|
|
447
|
-
datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=
|
|
448
|
-
datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=
|
|
448
|
+
datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=GFgcKV5T6VHyNwPBzzw_f8cWA9YFlWug0m6nkLoGXus,25979
|
|
449
|
+
datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=Yba6OIWYtukAFMoNtEtX2BXWwJee17Dl58DUyK0myho,54530
|
|
449
450
|
datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=maZyFkfrbVogEFM0tTKRiNp9c_1muv6YfleSd3q0umI,6341
|
|
450
451
|
datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYhWeK4XafJq3ssFL9kcS03jEeWT4,5506
|
|
451
452
|
datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=1eLYTcgmfzDs9xktMTTE74L5SeNP48Qg3uLr9y-Ez3Y,8733
|
|
@@ -869,6 +870,16 @@ datahub/metadata/schemas/VersionSetKey.avsc,sha256=psjGNNcFua3Zs9Xlh4HnUHNmBEU74
|
|
|
869
870
|
datahub/metadata/schemas/VersionSetProperties.avsc,sha256=yrhhVNioD11nFlDO7IfUbxAQjhA9Tr-4wnAYH5I9W74,1172
|
|
870
871
|
datahub/metadata/schemas/ViewProperties.avsc,sha256=3HhcbH5493dJUnEUtFMYMVfbYQ52aDedm5L4j77Nym4,1032
|
|
871
872
|
datahub/metadata/schemas/__init__.py,sha256=uvLNC3VyCkWA_v8e9FdA1leFf46NFKDD0AajCfihepI,581
|
|
873
|
+
datahub/sdk/__init__.py,sha256=fYD-f338EW5WPFW2NSiirMAsHkNgZfolIvneM7yxgBk,977
|
|
874
|
+
datahub/sdk/_all_entities.py,sha256=0XFtmgeEtrWOXy_oWcwqrtvfvzo8obPIq3Z1fEr5-34,400
|
|
875
|
+
datahub/sdk/_attribution.py,sha256=05iNVT_IDO7aU3vU6dJKCF8EoDAI1mwh4mAg_EBf3RY,1121
|
|
876
|
+
datahub/sdk/_entity.py,sha256=A_AWqNjaV6Y1YvmIiPNm0UyC0aO52G_L-iwgp9XEtYs,3225
|
|
877
|
+
datahub/sdk/_shared.py,sha256=FIEcYGm3LgRKLqn_i1FDePaqvMljkXIA9f3RMqaF1kY,11137
|
|
878
|
+
datahub/sdk/container.py,sha256=6Y19pQLN5vXGM86TEKW7ItTBwE3S0U4BsAMilWhyXqw,6511
|
|
879
|
+
datahub/sdk/dataset.py,sha256=BydV2papBEYL2de72UMNjSkAbsDNpVlXqiEkMlXJiLY,21514
|
|
880
|
+
datahub/sdk/entity_client.py,sha256=DcHytfCM8X9J6mm_QXzFR-2vDQa88I9Q2ktSNC2oSUI,4277
|
|
881
|
+
datahub/sdk/main_client.py,sha256=IKKzBMiKtT5zIMEHrvaMyxJ7DYBVNpqua70J0Ydl74Y,2068
|
|
882
|
+
datahub/sdk/resolver_client.py,sha256=UxI0bwg0ITm9dWPmAAKxkTvpLiGACtSCuEDNxLJipjs,3395
|
|
872
883
|
datahub/secret/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
873
884
|
datahub/secret/datahub_secret_store.py,sha256=9u9S87-15jwhj4h0EsAVIMdQLgvstKc8voQux2slxgU,2477
|
|
874
885
|
datahub/secret/datahub_secrets_client.py,sha256=nDmhziKdvseJHlaDVUcAwK8Fv8maeAaG-ktZtWG2b70,1316
|
|
@@ -1001,9 +1012,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1001
1012
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1002
1013
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1003
1014
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1004
|
-
acryl_datahub-0.15.0.
|
|
1005
|
-
acryl_datahub-0.15.0.
|
|
1006
|
-
acryl_datahub-0.15.0.
|
|
1007
|
-
acryl_datahub-0.15.0.
|
|
1008
|
-
acryl_datahub-0.15.0.
|
|
1009
|
-
acryl_datahub-0.15.0.
|
|
1015
|
+
acryl_datahub-0.15.0.6rc2.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
1016
|
+
acryl_datahub-0.15.0.6rc2.dist-info/METADATA,sha256=Bv5h1BaxWarbwfDdiiaD1wwyonRclpbgYtingIwN4B4,175375
|
|
1017
|
+
acryl_datahub-0.15.0.6rc2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
1018
|
+
acryl_datahub-0.15.0.6rc2.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
|
|
1019
|
+
acryl_datahub-0.15.0.6rc2.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1020
|
+
acryl_datahub-0.15.0.6rc2.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
datahub/errors.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from datahub.configuration.common import MetaError
|
|
2
|
+
|
|
3
|
+
# TODO: Move all other error types to this file.
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class SdkUsageError(MetaError):
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class AlreadyExistsError(SdkUsageError):
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ItemNotFoundError(SdkUsageError):
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class MultipleItemsFoundError(SdkUsageError):
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class SchemaFieldKeyError(SdkUsageError, KeyError):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class IngestionAttributionWarning(Warning):
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class MultipleSubtypesWarning(Warning):
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ExperimentalWarning(Warning):
|
|
35
|
+
pass
|
|
@@ -219,26 +219,27 @@ def construct_schema_pymongo(
|
|
|
219
219
|
"""
|
|
220
220
|
|
|
221
221
|
aggregations: List[Dict] = []
|
|
222
|
+
|
|
223
|
+
# The order of the aggregations impacts execution time. By setting the sample/limit aggregation first,
|
|
224
|
+
# the subsequent aggregations process a much smaller dataset, improving performance.
|
|
225
|
+
if sample_size:
|
|
226
|
+
if use_random_sampling:
|
|
227
|
+
aggregations.append({"$sample": {"size": sample_size}})
|
|
228
|
+
else:
|
|
229
|
+
aggregations.append({"$limit": sample_size})
|
|
230
|
+
|
|
222
231
|
if should_add_document_size_filter:
|
|
223
232
|
doc_size_field = "temporary_doc_size_field"
|
|
224
233
|
# create a temporary field to store the size of the document. filter on it and then remove it.
|
|
225
|
-
aggregations
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
# get sample documents in collection
|
|
232
|
-
if sample_size:
|
|
233
|
-
aggregations.append({"$sample": {"size": sample_size}})
|
|
234
|
-
documents = collection.aggregate(
|
|
235
|
-
aggregations,
|
|
236
|
-
allowDiskUse=True,
|
|
234
|
+
aggregations.extend(
|
|
235
|
+
[
|
|
236
|
+
{"$addFields": {doc_size_field: {"$bsonSize": "$$ROOT"}}},
|
|
237
|
+
{"$match": {doc_size_field: {"$lt": max_document_size}}},
|
|
238
|
+
{"$project": {doc_size_field: 0}},
|
|
239
|
+
]
|
|
237
240
|
)
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
aggregations.append({"$limit": sample_size})
|
|
241
|
-
documents = collection.aggregate(aggregations, allowDiskUse=True)
|
|
241
|
+
|
|
242
|
+
documents = collection.aggregate(aggregations, allowDiskUse=True)
|
|
242
243
|
|
|
243
244
|
return construct_schema(list(documents), delimiter)
|
|
244
245
|
|
|
@@ -866,8 +866,21 @@ class S3Source(StatefulIngestionSourceBase):
|
|
|
866
866
|
Returns:
|
|
867
867
|
List[Folder]: A list of Folder objects representing the partitions found.
|
|
868
868
|
"""
|
|
869
|
+
|
|
870
|
+
def _is_allowed_path(path_spec_: PathSpec, s3_uri: str) -> bool:
|
|
871
|
+
allowed = path_spec_.allowed(s3_uri)
|
|
872
|
+
if not allowed:
|
|
873
|
+
logger.debug(f"File {s3_uri} not allowed and skipping")
|
|
874
|
+
self.report.report_file_dropped(s3_uri)
|
|
875
|
+
return allowed
|
|
876
|
+
|
|
877
|
+
s3_objects = (
|
|
878
|
+
obj
|
|
879
|
+
for obj in bucket.objects.filter(Prefix=prefix).page_size(PAGE_SIZE)
|
|
880
|
+
if _is_allowed_path(path_spec, f"s3://{obj.bucket_name}/{obj.key}")
|
|
881
|
+
)
|
|
882
|
+
|
|
869
883
|
partitions: List[Folder] = []
|
|
870
|
-
s3_objects = bucket.objects.filter(Prefix=prefix).page_size(PAGE_SIZE)
|
|
871
884
|
grouped_s3_objects_by_dirname = groupby_unsorted(
|
|
872
885
|
s3_objects,
|
|
873
886
|
key=lambda obj: obj.key.rsplit("/", 1)[0],
|
|
@@ -878,10 +891,6 @@ class S3Source(StatefulIngestionSourceBase):
|
|
|
878
891
|
modification_time = None
|
|
879
892
|
|
|
880
893
|
for item in group:
|
|
881
|
-
file_path = self.create_s3_path(item.bucket_name, item.key)
|
|
882
|
-
if not path_spec.allowed(file_path):
|
|
883
|
-
logger.debug(f"File {file_path} not allowed and skipping")
|
|
884
|
-
continue
|
|
885
894
|
file_size += item.size
|
|
886
895
|
if creation_time is None or item.last_modified < creation_time:
|
|
887
896
|
creation_time = item.last_modified
|
|
@@ -6,6 +6,7 @@ from datetime import datetime
|
|
|
6
6
|
from typing import Callable, Dict, Iterable, List, MutableMapping, Optional
|
|
7
7
|
|
|
8
8
|
from datahub.ingestion.api.report import SupportsAsObj
|
|
9
|
+
from datahub.ingestion.source.common.subtypes import DatasetSubTypes
|
|
9
10
|
from datahub.ingestion.source.snowflake.constants import SnowflakeObjectDomain
|
|
10
11
|
from datahub.ingestion.source.snowflake.snowflake_connection import SnowflakeConnection
|
|
11
12
|
from datahub.ingestion.source.snowflake.snowflake_query import (
|
|
@@ -100,6 +101,9 @@ class SnowflakeTable(BaseTable):
|
|
|
100
101
|
def is_hybrid(self) -> bool:
|
|
101
102
|
return self.type is not None and self.type == "HYBRID TABLE"
|
|
102
103
|
|
|
104
|
+
def get_subtype(self) -> DatasetSubTypes:
|
|
105
|
+
return DatasetSubTypes.TABLE
|
|
106
|
+
|
|
103
107
|
|
|
104
108
|
@dataclass
|
|
105
109
|
class SnowflakeView(BaseView):
|
|
@@ -109,6 +113,9 @@ class SnowflakeView(BaseView):
|
|
|
109
113
|
column_tags: Dict[str, List[SnowflakeTag]] = field(default_factory=dict)
|
|
110
114
|
is_secure: bool = False
|
|
111
115
|
|
|
116
|
+
def get_subtype(self) -> DatasetSubTypes:
|
|
117
|
+
return DatasetSubTypes.VIEW
|
|
118
|
+
|
|
112
119
|
|
|
113
120
|
@dataclass
|
|
114
121
|
class SnowflakeSchema:
|
|
@@ -154,6 +161,9 @@ class SnowflakeStream:
|
|
|
154
161
|
column_tags: Dict[str, List[SnowflakeTag]] = field(default_factory=dict)
|
|
155
162
|
last_altered: Optional[datetime] = None
|
|
156
163
|
|
|
164
|
+
def get_subtype(self) -> DatasetSubTypes:
|
|
165
|
+
return DatasetSubTypes.SNOWFLAKE_STREAM
|
|
166
|
+
|
|
157
167
|
|
|
158
168
|
class _SnowflakeTagCache:
|
|
159
169
|
def __init__(self) -> None:
|
|
@@ -21,7 +21,6 @@ from datahub.ingestion.glossary.classification_mixin import (
|
|
|
21
21
|
from datahub.ingestion.source.aws.s3_util import make_s3_urn_for_lineage
|
|
22
22
|
from datahub.ingestion.source.common.subtypes import (
|
|
23
23
|
DatasetContainerSubTypes,
|
|
24
|
-
DatasetSubTypes,
|
|
25
24
|
)
|
|
26
25
|
from datahub.ingestion.source.snowflake.constants import (
|
|
27
26
|
GENERIC_PERMISSION_ERROR_KEY,
|
|
@@ -467,7 +466,13 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
467
466
|
context=f"{db_name}.{schema_name}",
|
|
468
467
|
)
|
|
469
468
|
|
|
470
|
-
def _process_tags(
|
|
469
|
+
def _process_tags(
|
|
470
|
+
self,
|
|
471
|
+
snowflake_schema: SnowflakeSchema,
|
|
472
|
+
schema_name: str,
|
|
473
|
+
db_name: str,
|
|
474
|
+
domain: str,
|
|
475
|
+
) -> None:
|
|
471
476
|
snowflake_schema.tags = self.tag_extractor.get_tags_on_object(
|
|
472
477
|
schema_name=schema_name, db_name=db_name, domain=domain
|
|
473
478
|
)
|
|
@@ -837,15 +842,7 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
837
842
|
if dpi_aspect:
|
|
838
843
|
yield dpi_aspect
|
|
839
844
|
|
|
840
|
-
subTypes = SubTypes(
|
|
841
|
-
typeNames=(
|
|
842
|
-
[DatasetSubTypes.SNOWFLAKE_STREAM]
|
|
843
|
-
if isinstance(table, SnowflakeStream)
|
|
844
|
-
else [DatasetSubTypes.VIEW]
|
|
845
|
-
if isinstance(table, SnowflakeView)
|
|
846
|
-
else [DatasetSubTypes.TABLE]
|
|
847
|
-
)
|
|
848
|
-
)
|
|
845
|
+
subTypes = SubTypes(typeNames=[table.get_subtype()])
|
|
849
846
|
|
|
850
847
|
yield MetadataChangeProposalWrapper(
|
|
851
848
|
entityUrn=dataset_urn, aspect=subTypes
|
|
@@ -932,9 +929,9 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
932
929
|
"OWNER_ROLE_TYPE": table.owner_role_type,
|
|
933
930
|
"TABLE_NAME": table.table_name,
|
|
934
931
|
"BASE_TABLES": table.base_tables,
|
|
935
|
-
"STALE_AFTER":
|
|
936
|
-
|
|
937
|
-
|
|
932
|
+
"STALE_AFTER": (
|
|
933
|
+
table.stale_after.isoformat() if table.stale_after else None
|
|
934
|
+
),
|
|
938
935
|
}.items()
|
|
939
936
|
if v
|
|
940
937
|
}
|
datahub/sdk/__init__.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
|
|
3
|
+
import datahub.metadata.schema_classes as models
|
|
4
|
+
from datahub.errors import ExperimentalWarning, SdkUsageError
|
|
5
|
+
from datahub.ingestion.graph.config import DatahubClientConfig
|
|
6
|
+
from datahub.metadata.urns import (
|
|
7
|
+
ChartUrn,
|
|
8
|
+
ContainerUrn,
|
|
9
|
+
CorpGroupUrn,
|
|
10
|
+
CorpUserUrn,
|
|
11
|
+
DashboardUrn,
|
|
12
|
+
DataPlatformInstanceUrn,
|
|
13
|
+
DataPlatformUrn,
|
|
14
|
+
DatasetUrn,
|
|
15
|
+
DomainUrn,
|
|
16
|
+
GlossaryTermUrn,
|
|
17
|
+
SchemaFieldUrn,
|
|
18
|
+
TagUrn,
|
|
19
|
+
)
|
|
20
|
+
from datahub.sdk.container import Container
|
|
21
|
+
from datahub.sdk.dataset import Dataset
|
|
22
|
+
from datahub.sdk.main_client import DataHubClient
|
|
23
|
+
|
|
24
|
+
warnings.warn(
|
|
25
|
+
"The new datahub SDK (e.g. datahub.sdk.*) is experimental. "
|
|
26
|
+
"Our typical backwards-compatibility and stability guarantees do not apply to this code. "
|
|
27
|
+
"When it's promoted to stable, the import path will change "
|
|
28
|
+
"from `from datahub.sdk import ...` to `from datahub import ...`.",
|
|
29
|
+
ExperimentalWarning,
|
|
30
|
+
stacklevel=2,
|
|
31
|
+
)
|
|
32
|
+
del warnings
|
|
33
|
+
del ExperimentalWarning
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from typing import Dict, List, Type
|
|
2
|
+
|
|
3
|
+
from datahub.sdk._entity import Entity
|
|
4
|
+
from datahub.sdk.container import Container
|
|
5
|
+
from datahub.sdk.dataset import Dataset
|
|
6
|
+
|
|
7
|
+
# TODO: Is there a better way to declare this?
|
|
8
|
+
ENTITY_CLASSES_LIST: List[Type[Entity]] = [
|
|
9
|
+
Container,
|
|
10
|
+
Dataset,
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
ENTITY_CLASSES: Dict[str, Type[Entity]] = {
|
|
14
|
+
cls.get_urn_type().ENTITY_TYPE: cls for cls in ENTITY_CLASSES_LIST
|
|
15
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
from typing import Iterator
|
|
5
|
+
|
|
6
|
+
from datahub.utilities.str_enum import StrEnum
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class KnownAttribution(StrEnum):
|
|
10
|
+
INGESTION = "INGESTION"
|
|
11
|
+
INGESTION_ALTERNATE = "INGESTION_ALTERNATE"
|
|
12
|
+
|
|
13
|
+
UI = "UI"
|
|
14
|
+
SDK = "SDK"
|
|
15
|
+
|
|
16
|
+
PROPAGATION = "PROPAGATION"
|
|
17
|
+
|
|
18
|
+
def is_ingestion(self) -> bool:
|
|
19
|
+
return self in (
|
|
20
|
+
KnownAttribution.INGESTION,
|
|
21
|
+
KnownAttribution.INGESTION_ALTERNATE,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
_default_attribution = KnownAttribution.SDK
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def get_default_attribution() -> KnownAttribution:
|
|
29
|
+
return _default_attribution
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def set_default_attribution(attribution: KnownAttribution) -> None:
|
|
33
|
+
global _default_attribution
|
|
34
|
+
_default_attribution = attribution
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@contextlib.contextmanager
|
|
38
|
+
def change_default_attribution(attribution: KnownAttribution) -> Iterator[None]:
|
|
39
|
+
old_attribution = get_default_attribution()
|
|
40
|
+
try:
|
|
41
|
+
set_default_attribution(attribution)
|
|
42
|
+
yield
|
|
43
|
+
finally:
|
|
44
|
+
set_default_attribution(old_attribution)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def is_ingestion_attribution() -> bool:
|
|
48
|
+
return get_default_attribution().is_ingestion()
|
datahub/sdk/_entity.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
from typing import List, Optional, Type, Union
|
|
3
|
+
|
|
4
|
+
from typing_extensions import Self
|
|
5
|
+
|
|
6
|
+
import datahub.metadata.schema_classes as models
|
|
7
|
+
from datahub.emitter.mce_builder import Aspect as AspectTypeVar
|
|
8
|
+
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
9
|
+
from datahub.errors import SdkUsageError
|
|
10
|
+
from datahub.metadata.urns import Urn
|
|
11
|
+
from datahub.utilities.urns._urn_base import _SpecificUrn
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Entity:
|
|
15
|
+
__slots__ = ("_urn", "_prev_aspects", "_aspects")
|
|
16
|
+
|
|
17
|
+
def __init__(self, /, urn: Urn):
|
|
18
|
+
# This method is not meant for direct usage.
|
|
19
|
+
if type(self) is Entity:
|
|
20
|
+
raise SdkUsageError(f"{Entity.__name__} cannot be instantiated directly.")
|
|
21
|
+
|
|
22
|
+
assert isinstance(urn, self.get_urn_type())
|
|
23
|
+
self._urn: _SpecificUrn = urn
|
|
24
|
+
|
|
25
|
+
# prev_aspects is None means this was created from scratch
|
|
26
|
+
self._prev_aspects: Optional[models.AspectBag] = None
|
|
27
|
+
self._aspects: models.AspectBag = {}
|
|
28
|
+
|
|
29
|
+
@classmethod
|
|
30
|
+
def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
|
|
31
|
+
# If an init method from a subclass adds required fields, it also needs to override this method.
|
|
32
|
+
# An alternative approach would call cls.__new__() to bypass the init method, but it's a bit
|
|
33
|
+
# too hacky for my taste.
|
|
34
|
+
entity = cls(urn=urn)
|
|
35
|
+
return entity._init_from_graph(current_aspects)
|
|
36
|
+
|
|
37
|
+
def _init_from_graph(self, current_aspects: models.AspectBag) -> Self:
|
|
38
|
+
self._prev_aspects = current_aspects
|
|
39
|
+
aspect: models._Aspect
|
|
40
|
+
for aspect_name, aspect in (current_aspects or {}).items(): # type: ignore
|
|
41
|
+
aspect_copy = type(aspect).from_obj(aspect.to_obj())
|
|
42
|
+
self._aspects[aspect_name] = aspect_copy # type: ignore
|
|
43
|
+
return self
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
@abc.abstractmethod
|
|
47
|
+
def get_urn_type(cls) -> Type[_SpecificUrn]: ...
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def urn(self) -> _SpecificUrn:
|
|
51
|
+
return self._urn
|
|
52
|
+
|
|
53
|
+
def _get_aspect(
|
|
54
|
+
self,
|
|
55
|
+
aspect_type: Type[AspectTypeVar],
|
|
56
|
+
/,
|
|
57
|
+
) -> Optional[AspectTypeVar]:
|
|
58
|
+
return self._aspects.get(aspect_type.ASPECT_NAME) # type: ignore
|
|
59
|
+
|
|
60
|
+
def _set_aspect(self, value: AspectTypeVar, /) -> None:
|
|
61
|
+
self._aspects[value.ASPECT_NAME] = value # type: ignore
|
|
62
|
+
|
|
63
|
+
def _setdefault_aspect(self, default_aspect: AspectTypeVar, /) -> AspectTypeVar:
|
|
64
|
+
# Similar semantics to dict.setdefault.
|
|
65
|
+
if existing_aspect := self._get_aspect(type(default_aspect)):
|
|
66
|
+
return existing_aspect
|
|
67
|
+
self._set_aspect(default_aspect)
|
|
68
|
+
return default_aspect
|
|
69
|
+
|
|
70
|
+
def _as_mcps(
|
|
71
|
+
self,
|
|
72
|
+
change_type: Union[str, models.ChangeTypeClass] = models.ChangeTypeClass.UPSERT,
|
|
73
|
+
) -> List[MetadataChangeProposalWrapper]:
|
|
74
|
+
urn_str = str(self.urn)
|
|
75
|
+
|
|
76
|
+
mcps = []
|
|
77
|
+
for aspect in self._aspects.values():
|
|
78
|
+
assert isinstance(aspect, models._Aspect)
|
|
79
|
+
mcps.append(
|
|
80
|
+
MetadataChangeProposalWrapper(
|
|
81
|
+
entityUrn=urn_str,
|
|
82
|
+
aspect=aspect,
|
|
83
|
+
changeType=change_type,
|
|
84
|
+
)
|
|
85
|
+
)
|
|
86
|
+
return mcps
|
|
87
|
+
|
|
88
|
+
def __repr__(self) -> str:
|
|
89
|
+
return f"{self.__class__.__name__}('{self.urn}')"
|