acryl-datahub 0.15.0.1rc13__py3-none-any.whl → 0.15.0.1rc14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc14.dist-info}/METADATA +2520 -2520
- {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc14.dist-info}/RECORD +27 -24
- datahub/__init__.py +1 -1
- datahub/emitter/mce_builder.py +3 -3
- datahub/emitter/mcp_patch_builder.py +36 -12
- datahub/ingestion/source/bigquery_v2/bigquery.py +10 -18
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +3 -9
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +11 -17
- datahub/ingestion/source/bigquery_v2/lineage.py +9 -22
- datahub/ingestion/source/tableau/tableau.py +3 -0
- datahub/ingestion/source/tableau/tableau_common.py +18 -0
- datahub/specific/aspect_helpers/__init__.py +0 -0
- datahub/specific/aspect_helpers/custom_properties.py +79 -0
- datahub/specific/aspect_helpers/ownership.py +67 -0
- datahub/specific/aspect_helpers/structured_properties.py +72 -0
- datahub/specific/aspect_helpers/tags.py +42 -0
- datahub/specific/aspect_helpers/terms.py +43 -0
- datahub/specific/chart.py +28 -184
- datahub/specific/dashboard.py +31 -196
- datahub/specific/datajob.py +34 -189
- datahub/specific/dataproduct.py +24 -86
- datahub/specific/dataset.py +48 -133
- datahub/specific/form.py +12 -32
- datahub/specific/structured_property.py +9 -9
- datahub/specific/custom_properties.py +0 -37
- datahub/specific/ownership.py +0 -48
- datahub/specific/structured_properties.py +0 -53
- {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc14.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc14.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc14.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datahub/__init__.py,sha256=
|
|
1
|
+
datahub/__init__.py,sha256=tC4XcRTMJqr-bc6T1QdoRI7MvbIkSZk8AscmW3iOtOo,577
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
3
|
datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
|
|
4
4
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -114,10 +114,10 @@ datahub/emitter/aspect.py,sha256=ef0DVycqg-tRPurkYjc-5zknmLP2p2Y2RxP55WkvAEc,480
|
|
|
114
114
|
datahub/emitter/enum_helpers.py,sha256=ZeALUAPi10Q4Z6VM0_WiU9Y60_d0ugZHcUoVmuOCEec,321
|
|
115
115
|
datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
|
|
116
116
|
datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
|
|
117
|
-
datahub/emitter/mce_builder.py,sha256=
|
|
117
|
+
datahub/emitter/mce_builder.py,sha256=IqHOm0cpzdVC_mQOqk0yEVJUEj9xn8am2OFAwwQeX_8,16342
|
|
118
118
|
datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
|
|
119
119
|
datahub/emitter/mcp_builder.py,sha256=eOcuz41c4a3oTkNk39yYl9bTxpksxqATPHLcqyhPGT0,9856
|
|
120
|
-
datahub/emitter/mcp_patch_builder.py,sha256=
|
|
120
|
+
datahub/emitter/mcp_patch_builder.py,sha256=oonC8iGOvDzqj890CxOjWlBdDEF1RnwvbSZy1sivlTY,4572
|
|
121
121
|
datahub/emitter/request_helper.py,sha256=33ORG3S3OVy97_jlWBRn7yUM5XCIkRN6WSdJvN7Ofcg,670
|
|
122
122
|
datahub/emitter/rest_emitter.py,sha256=oqyRuXG1o1dYjiEIH5TFMb1q0xhRbpxPIA5qkyz0iQ8,16407
|
|
123
123
|
datahub/emitter/serialization_helper.py,sha256=q12Avmf70Vy4ttQGMJoTKlE5EsybMKNg2w3MQeZiHvk,3652
|
|
@@ -233,20 +233,20 @@ datahub/ingestion/source/azure/abs_folder_utils.py,sha256=7skXus-4fSIoKpqCeU-GG0
|
|
|
233
233
|
datahub/ingestion/source/azure/abs_utils.py,sha256=KdAlCK-PMrn35kFHxz5vrsjajyx2PD5GRgoBKdoRvcg,2075
|
|
234
234
|
datahub/ingestion/source/azure/azure_common.py,sha256=Zl0pPuE6L3QcM5B1P0LsPthZmD0h7fUUS0kg2okl6IY,4053
|
|
235
235
|
datahub/ingestion/source/bigquery_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
236
|
-
datahub/ingestion/source/bigquery_v2/bigquery.py,sha256
|
|
236
|
+
datahub/ingestion/source/bigquery_v2/bigquery.py,sha256=YMsyj6s7fggzisWfDdbT4w1MKJ3eRdNERsCShnu0Zqo,13681
|
|
237
237
|
datahub/ingestion/source/bigquery_v2/bigquery_audit.py,sha256=IlbHA8a-gNJvnubgBfxVHpUk8rFNIG80gk5HWXa2lyE,25108
|
|
238
238
|
datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py,sha256=LuGJ6LgPViLIfDQfylxlQ3CA7fZYM5MDt8M-7sfzm84,5096
|
|
239
|
-
datahub/ingestion/source/bigquery_v2/bigquery_config.py,sha256=
|
|
239
|
+
datahub/ingestion/source/bigquery_v2/bigquery_config.py,sha256=PqbYNqM4-KukCU1meuvsk0qbiWa7UFh5hqHrHsvOSWQ,25889
|
|
240
240
|
datahub/ingestion/source/bigquery_v2/bigquery_data_reader.py,sha256=DeT3v_Z82__8En0FcZ0kavBAWQoRvSZ5Rppm9eeDAb8,2393
|
|
241
241
|
datahub/ingestion/source/bigquery_v2/bigquery_helper.py,sha256=QER3gY8e_k1_eNVj7cBso7ZzrWl_vO5PYSa6CpvqNx8,1554
|
|
242
242
|
datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256=8nuQ8hMuJEswWDZtV2RjbK8RvDJUzT_S74dnyPpGFdQ,4857
|
|
243
243
|
datahub/ingestion/source/bigquery_v2/bigquery_queries.py,sha256=EoHo9twb0_QdX7Nvd1HJC1Yn0rqtrfR52EVk7Hu3XOQ,3296
|
|
244
244
|
datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=WxiLPFc7LwZXNDYfV9oySUD43kc2GcOf_pUokp3vFNM,8098
|
|
245
245
|
datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=E5GOx4NWjyZM0xzdpBlNXbvDdKNfW9UtS64XtCYFpzI,31809
|
|
246
|
-
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=
|
|
246
|
+
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=o2My5Q7ab39qHP3jjVFCQSErogGYb14s6397xHIZSqc,50568
|
|
247
247
|
datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
|
|
248
248
|
datahub/ingestion/source/bigquery_v2/common.py,sha256=Cxjf1a8ibkL_YRQeS0BqsjlyMgFJpaZ3iq_d7e8T8MQ,4030
|
|
249
|
-
datahub/ingestion/source/bigquery_v2/lineage.py,sha256=
|
|
249
|
+
datahub/ingestion/source/bigquery_v2/lineage.py,sha256=LJqdkCR8H55b3txCVBM-cs1T5QWxSTimJ3ebSgtXjgI,44874
|
|
250
250
|
datahub/ingestion/source/bigquery_v2/profiler.py,sha256=8-yAoq8sX0E6VIwr75YbM8wITRNhGfxgte9BCeGNkMM,10681
|
|
251
251
|
datahub/ingestion/source/bigquery_v2/queries.py,sha256=B2vJLZYfwM1J5JAckijKJTxLhDYA0yw3kfzj5oRQB5c,20151
|
|
252
252
|
datahub/ingestion/source/bigquery_v2/queries_extractor.py,sha256=xLf-vCUAnNuDdTHghxJvPOyGeA_XLCW3r-xj-8cfn3Q,19528
|
|
@@ -491,8 +491,8 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
|
|
|
491
491
|
datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=xsH7Ao_05VTjqpkzLkhdf5B1ULMzFoD8vkJJIJU9w-U,4077
|
|
492
492
|
datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
|
|
493
493
|
datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
494
|
-
datahub/ingestion/source/tableau/tableau.py,sha256=
|
|
495
|
-
datahub/ingestion/source/tableau/tableau_common.py,sha256=
|
|
494
|
+
datahub/ingestion/source/tableau/tableau.py,sha256=fY--jFtPtCuDBAruiMStAoT7HqaTDYtiVEKzEYuzCag,140121
|
|
495
|
+
datahub/ingestion/source/tableau/tableau_common.py,sha256=a3Nu0Upy6_pnrd7XpSMcYHdnYca1JBW7H0jMqkYr0ME,26871
|
|
496
496
|
datahub/ingestion/source/tableau/tableau_constant.py,sha256=ZcAeHsQUXVVL26ORly0ByZk_GJAFbxaKuJAlX_sYMac,2686
|
|
497
497
|
datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=nSyx9RzC6TCQDm-cTVJ657qT8iDwzk_8JMKpohhmOc4,1046
|
|
498
498
|
datahub/ingestion/source/tableau/tableau_validation.py,sha256=pd--LcTLTfrFsouhCOvGC_2IjeMfKbJV81EEo3ibMwE,1820
|
|
@@ -861,16 +861,19 @@ datahub/secret/datahub_secrets_client.py,sha256=WkoJDip7IAKSGDM5oHeZVL8878pd4Bix
|
|
|
861
861
|
datahub/secret/secret_common.py,sha256=PeRFNljPlGfNrmn3VtDVbazQE6J3Q1nA3L-z3cS8LEA,2522
|
|
862
862
|
datahub/secret/secret_store.py,sha256=2VP_Vd336Cy7C-2kwp4rx8MAqtYgtwv8XyzzNTXE5x8,1124
|
|
863
863
|
datahub/specific/__init__.py,sha256=r5RYM5mDnskLzin3vc87HV-9GSz3P6uQw8AlsN14LaI,88
|
|
864
|
-
datahub/specific/chart.py,sha256=
|
|
865
|
-
datahub/specific/
|
|
866
|
-
datahub/specific/
|
|
867
|
-
datahub/specific/
|
|
868
|
-
datahub/specific/
|
|
869
|
-
datahub/specific/
|
|
870
|
-
datahub/specific/
|
|
871
|
-
datahub/specific/
|
|
872
|
-
datahub/specific/
|
|
873
|
-
datahub/specific/
|
|
864
|
+
datahub/specific/chart.py,sha256=NPdzDHcZkPodthOn9c8QF_aDEo2y4lCJ4t1sI556uZ0,6684
|
|
865
|
+
datahub/specific/dashboard.py,sha256=D8CnOSScQ0-UICFjQnQOtqL-SlNSxhSuub4vZ3BpcuI,10017
|
|
866
|
+
datahub/specific/datajob.py,sha256=yZXic3CuCGxg-ewnaHphoH9Jjpe-P09XbN7T-rrvkyE,13711
|
|
867
|
+
datahub/specific/dataproduct.py,sha256=SrBNYACDVatuXA64GCHA0Igaes24ajJqTcXmDTT1FPA,2877
|
|
868
|
+
datahub/specific/dataset.py,sha256=je9j3rVzpSiXoOe0UmfD7mc5vCpLAAO74Z8q1SvwPX0,9725
|
|
869
|
+
datahub/specific/form.py,sha256=hbxmmBWHma0d4NCZEGR6Nr6R-5A5gYgl1mmkGgnM97o,3834
|
|
870
|
+
datahub/specific/structured_property.py,sha256=NZ2yppDgtqrH04Wn3_m5IupyLeWoBCMygyr3nEi1A3o,4038
|
|
871
|
+
datahub/specific/aspect_helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
872
|
+
datahub/specific/aspect_helpers/custom_properties.py,sha256=s87_Aq7BgF_t_I0MCjNEJxYyrNxMTb1N0hCifT8Y6Cw,2255
|
|
873
|
+
datahub/specific/aspect_helpers/ownership.py,sha256=rNYiJSqb_FJQhFRSIQScg4mfxgYhPvjeaYyvutY6CN0,1861
|
|
874
|
+
datahub/specific/aspect_helpers/structured_properties.py,sha256=EVnFS025r-PG5PAC7VENVJO-JvDYif2VeYonsC3Z8m8,2255
|
|
875
|
+
datahub/specific/aspect_helpers/tags.py,sha256=YHcKfRaIvv12wcmfMc8-Dk6gf6xIvJedkn451uBuz-Y,1254
|
|
876
|
+
datahub/specific/aspect_helpers/terms.py,sha256=l8xoOLQ2RsIl3UnKhLisQNwrGTFIPrzfvP4zjH-AhwI,1352
|
|
874
877
|
datahub/sql_parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
875
878
|
datahub/sql_parsing/_models.py,sha256=il-xm1RcLdi1phJUV3xrTecdOGH31akqheuSC2N4YhQ,3141
|
|
876
879
|
datahub/sql_parsing/_sqlglot_patch.py,sha256=iYJ8zOThHqqbamD5jdNr9iHTWD7ewNeHzPiTb6-rO3Y,7043
|
|
@@ -983,8 +986,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
983
986
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
984
987
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
985
988
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
986
|
-
acryl_datahub-0.15.0.
|
|
987
|
-
acryl_datahub-0.15.0.
|
|
988
|
-
acryl_datahub-0.15.0.
|
|
989
|
-
acryl_datahub-0.15.0.
|
|
990
|
-
acryl_datahub-0.15.0.
|
|
989
|
+
acryl_datahub-0.15.0.1rc14.dist-info/METADATA,sha256=na5JJwiilGTUFiwOBRULg2a8NxVvzNRgwodacg0LOSU,173444
|
|
990
|
+
acryl_datahub-0.15.0.1rc14.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
991
|
+
acryl_datahub-0.15.0.1rc14.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
|
|
992
|
+
acryl_datahub-0.15.0.1rc14.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
993
|
+
acryl_datahub-0.15.0.1rc14.dist-info/RECORD,,
|
datahub/__init__.py
CHANGED
datahub/emitter/mce_builder.py
CHANGED
|
@@ -24,6 +24,7 @@ from typing import (
|
|
|
24
24
|
|
|
25
25
|
import typing_inspect
|
|
26
26
|
from avrogen.dict_wrapper import DictWrapper
|
|
27
|
+
from typing_extensions import assert_never
|
|
27
28
|
|
|
28
29
|
from datahub.emitter.enum_helpers import get_enum_options
|
|
29
30
|
from datahub.metadata.schema_classes import (
|
|
@@ -269,9 +270,8 @@ def make_owner_urn(owner: str, owner_type: OwnerType) -> str:
|
|
|
269
270
|
return make_user_urn(owner)
|
|
270
271
|
elif owner_type == OwnerType.GROUP:
|
|
271
272
|
return make_group_urn(owner)
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
return f"urn:li:{owner_type.value}:{owner}"
|
|
273
|
+
else:
|
|
274
|
+
assert_never(owner_type)
|
|
275
275
|
|
|
276
276
|
|
|
277
277
|
def make_ownership_type_urn(type: str) -> str:
|
|
@@ -2,7 +2,19 @@ import json
|
|
|
2
2
|
import time
|
|
3
3
|
from collections import defaultdict
|
|
4
4
|
from dataclasses import dataclass
|
|
5
|
-
from typing import
|
|
5
|
+
from typing import (
|
|
6
|
+
Any,
|
|
7
|
+
Dict,
|
|
8
|
+
List,
|
|
9
|
+
Literal,
|
|
10
|
+
Optional,
|
|
11
|
+
Protocol,
|
|
12
|
+
Tuple,
|
|
13
|
+
Union,
|
|
14
|
+
runtime_checkable,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
from typing_extensions import LiteralString
|
|
6
18
|
|
|
7
19
|
from datahub.emitter.aspect import JSON_PATCH_CONTENT_TYPE
|
|
8
20
|
from datahub.emitter.serialization_helper import pre_json_transform
|
|
@@ -19,25 +31,36 @@ from datahub.metadata.urns import Urn
|
|
|
19
31
|
from datahub.utilities.urns.urn import guess_entity_type
|
|
20
32
|
|
|
21
33
|
|
|
34
|
+
@runtime_checkable
|
|
35
|
+
class SupportsToObj(Protocol):
|
|
36
|
+
def to_obj(self) -> Any:
|
|
37
|
+
...
|
|
38
|
+
|
|
39
|
+
|
|
22
40
|
def _recursive_to_obj(obj: Any) -> Any:
|
|
23
41
|
if isinstance(obj, list):
|
|
24
42
|
return [_recursive_to_obj(v) for v in obj]
|
|
25
|
-
elif
|
|
43
|
+
elif isinstance(obj, SupportsToObj):
|
|
26
44
|
return obj.to_obj()
|
|
27
45
|
else:
|
|
28
46
|
return obj
|
|
29
47
|
|
|
30
48
|
|
|
49
|
+
PatchPath = Tuple[Union[LiteralString, Urn], ...]
|
|
50
|
+
PatchOp = Literal["add", "remove", "replace"]
|
|
51
|
+
|
|
52
|
+
|
|
31
53
|
@dataclass
|
|
32
|
-
class _Patch:
|
|
33
|
-
op:
|
|
34
|
-
path:
|
|
54
|
+
class _Patch(SupportsToObj):
|
|
55
|
+
op: PatchOp
|
|
56
|
+
path: PatchPath
|
|
35
57
|
value: Any
|
|
36
58
|
|
|
37
59
|
def to_obj(self) -> Dict:
|
|
60
|
+
quoted_path = "/" + "/".join(MetadataPatchProposal.quote(p) for p in self.path)
|
|
38
61
|
return {
|
|
39
62
|
"op": self.op,
|
|
40
|
-
"path":
|
|
63
|
+
"path": quoted_path,
|
|
41
64
|
"value": _recursive_to_obj(self.value),
|
|
42
65
|
}
|
|
43
66
|
|
|
@@ -63,15 +86,16 @@ class MetadataPatchProposal:
|
|
|
63
86
|
|
|
64
87
|
# Json Patch quoting based on https://jsonpatch.com/#json-pointer
|
|
65
88
|
@classmethod
|
|
66
|
-
def quote(cls, value: str) -> str:
|
|
67
|
-
return value.replace("~", "~0").replace("/", "~1")
|
|
89
|
+
def quote(cls, value: Union[str, Urn]) -> str:
|
|
90
|
+
return str(value).replace("~", "~0").replace("/", "~1")
|
|
68
91
|
|
|
69
92
|
def _add_patch(
|
|
70
|
-
self,
|
|
93
|
+
self,
|
|
94
|
+
aspect_name: str,
|
|
95
|
+
op: PatchOp,
|
|
96
|
+
path: PatchPath,
|
|
97
|
+
value: Any,
|
|
71
98
|
) -> None:
|
|
72
|
-
if not isinstance(path, str):
|
|
73
|
-
path = "/" + "/".join(self.quote(p) for p in path)
|
|
74
|
-
|
|
75
99
|
# TODO: Validate that aspectName is a valid aspect for this entityType
|
|
76
100
|
self.patches[aspect_name].append(_Patch(op, path, value))
|
|
77
101
|
|
|
@@ -206,9 +206,7 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
|
|
|
206
206
|
|
|
207
207
|
def _init_schema_resolver(self) -> SchemaResolver:
|
|
208
208
|
schema_resolution_required = (
|
|
209
|
-
self.config.use_queries_v2
|
|
210
|
-
or self.config.lineage_parse_view_ddl
|
|
211
|
-
or self.config.lineage_use_sql_parser
|
|
209
|
+
self.config.use_queries_v2 or self.config.lineage_use_sql_parser
|
|
212
210
|
)
|
|
213
211
|
schema_ingestion_enabled = (
|
|
214
212
|
self.config.include_schema_metadata
|
|
@@ -255,18 +253,16 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
|
|
|
255
253
|
for project in projects:
|
|
256
254
|
yield from self.bq_schema_extractor.get_project_workunits(project)
|
|
257
255
|
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
self.bq_schema_extractor.snapshot_refs_by_project,
|
|
267
|
-
self.bq_schema_extractor.snapshots_by_ref,
|
|
268
|
-
)
|
|
256
|
+
self.report.set_ingestion_stage("*", "View and Snapshot Lineage")
|
|
257
|
+
yield from self.lineage_extractor.get_lineage_workunits_for_views_and_snapshots(
|
|
258
|
+
[p.id for p in projects],
|
|
259
|
+
self.bq_schema_extractor.view_refs_by_project,
|
|
260
|
+
self.bq_schema_extractor.view_definitions,
|
|
261
|
+
self.bq_schema_extractor.snapshot_refs_by_project,
|
|
262
|
+
self.bq_schema_extractor.snapshots_by_ref,
|
|
263
|
+
)
|
|
269
264
|
|
|
265
|
+
if self.config.use_queries_v2:
|
|
270
266
|
# if both usage and lineage are disabled then skip queries extractor piece
|
|
271
267
|
if (
|
|
272
268
|
not self.config.include_usage_statistics
|
|
@@ -306,10 +302,6 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
|
|
|
306
302
|
if self.config.include_table_lineage:
|
|
307
303
|
yield from self.lineage_extractor.get_lineage_workunits(
|
|
308
304
|
[p.id for p in projects],
|
|
309
|
-
self.bq_schema_extractor.view_refs_by_project,
|
|
310
|
-
self.bq_schema_extractor.view_definitions,
|
|
311
|
-
self.bq_schema_extractor.snapshot_refs_by_project,
|
|
312
|
-
self.bq_schema_extractor.snapshots_by_ref,
|
|
313
305
|
self.bq_schema_extractor.table_refs,
|
|
314
306
|
)
|
|
315
307
|
|
|
@@ -463,10 +463,6 @@ class BigQueryV2Config(
|
|
|
463
463
|
default=True,
|
|
464
464
|
description="Use sql parser to resolve view/table lineage.",
|
|
465
465
|
)
|
|
466
|
-
lineage_parse_view_ddl: bool = Field(
|
|
467
|
-
default=True,
|
|
468
|
-
description="Sql parse view ddl to get lineage.",
|
|
469
|
-
)
|
|
470
466
|
|
|
471
467
|
lineage_sql_parser_use_raw_names: bool = Field(
|
|
472
468
|
default=False,
|
|
@@ -572,11 +568,9 @@ class BigQueryV2Config(
|
|
|
572
568
|
"See [this](https://cloud.google.com/bigquery/docs/information-schema-jobs#scope_and_syntax) for details.",
|
|
573
569
|
)
|
|
574
570
|
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
include_view_column_lineage: bool = Field(default=True, hidden_from_docs=True)
|
|
571
|
+
_include_view_lineage = pydantic_removed_field("include_view_lineage")
|
|
572
|
+
_include_view_column_lineage = pydantic_removed_field("include_view_column_lineage")
|
|
573
|
+
_lineage_parse_view_ddl = pydantic_removed_field("lineage_parse_view_ddl")
|
|
580
574
|
|
|
581
575
|
@root_validator(pre=True)
|
|
582
576
|
def set_include_schema_metadata(cls, values: Dict) -> Dict:
|
|
@@ -653,14 +653,11 @@ class BigQuerySchemaGenerator:
|
|
|
653
653
|
self.report.report_dropped(table_identifier.raw_table_name())
|
|
654
654
|
return
|
|
655
655
|
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
)
|
|
660
|
-
self.
|
|
661
|
-
if self.config.lineage_parse_view_ddl and view.view_definition:
|
|
662
|
-
self.view_refs_by_project[project_id].add(table_ref)
|
|
663
|
-
self.view_definitions[table_ref] = view.view_definition
|
|
656
|
+
table_ref = str(BigQueryTableRef(table_identifier).get_sanitized_table_ref())
|
|
657
|
+
self.table_refs.add(table_ref)
|
|
658
|
+
if view.view_definition:
|
|
659
|
+
self.view_refs_by_project[project_id].add(table_ref)
|
|
660
|
+
self.view_definitions[table_ref] = view.view_definition
|
|
664
661
|
|
|
665
662
|
view.column_count = len(columns)
|
|
666
663
|
if not view.column_count:
|
|
@@ -701,14 +698,11 @@ class BigQuerySchemaGenerator:
|
|
|
701
698
|
f"Snapshot doesn't have any column or unable to get columns for snapshot: {table_identifier}"
|
|
702
699
|
)
|
|
703
700
|
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
)
|
|
708
|
-
self.
|
|
709
|
-
if snapshot.base_table_identifier:
|
|
710
|
-
self.snapshot_refs_by_project[project_id].add(table_ref)
|
|
711
|
-
self.snapshots_by_ref[table_ref] = snapshot
|
|
701
|
+
table_ref = str(BigQueryTableRef(table_identifier).get_sanitized_table_ref())
|
|
702
|
+
self.table_refs.add(table_ref)
|
|
703
|
+
if snapshot.base_table_identifier:
|
|
704
|
+
self.snapshot_refs_by_project[project_id].add(table_ref)
|
|
705
|
+
self.snapshots_by_ref[table_ref] = snapshot
|
|
712
706
|
|
|
713
707
|
yield from self.gen_snapshot_dataset_workunits(
|
|
714
708
|
table=snapshot,
|
|
@@ -1148,7 +1142,7 @@ class BigQuerySchemaGenerator:
|
|
|
1148
1142
|
foreignKeys=foreign_keys if foreign_keys else None,
|
|
1149
1143
|
)
|
|
1150
1144
|
|
|
1151
|
-
if self.config.
|
|
1145
|
+
if self.config.lineage_use_sql_parser:
|
|
1152
1146
|
self.sql_parser_schema_resolver.add_schema_metadata(
|
|
1153
1147
|
dataset_urn, schema_metadata
|
|
1154
1148
|
)
|
|
@@ -291,16 +291,15 @@ class BigqueryLineageExtractor:
|
|
|
291
291
|
snapshots_by_ref: FileBackedDict[BigqueryTableSnapshot],
|
|
292
292
|
) -> Iterable[MetadataWorkUnit]:
|
|
293
293
|
for project in projects:
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
self.
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
)
|
|
294
|
+
for view in view_refs_by_project[project]:
|
|
295
|
+
self.datasets_skip_audit_log_lineage.add(view)
|
|
296
|
+
self.aggregator.add_view_definition(
|
|
297
|
+
view_urn=self.identifiers.gen_dataset_urn_from_raw_ref(
|
|
298
|
+
BigQueryTableRef.from_string_name(view)
|
|
299
|
+
),
|
|
300
|
+
view_definition=view_definitions[view],
|
|
301
|
+
default_db=project,
|
|
302
|
+
)
|
|
304
303
|
|
|
305
304
|
for snapshot_ref in snapshot_refs_by_project[project]:
|
|
306
305
|
snapshot = snapshots_by_ref[snapshot_ref]
|
|
@@ -322,23 +321,11 @@ class BigqueryLineageExtractor:
|
|
|
322
321
|
def get_lineage_workunits(
|
|
323
322
|
self,
|
|
324
323
|
projects: List[str],
|
|
325
|
-
view_refs_by_project: Dict[str, Set[str]],
|
|
326
|
-
view_definitions: FileBackedDict[str],
|
|
327
|
-
snapshot_refs_by_project: Dict[str, Set[str]],
|
|
328
|
-
snapshots_by_ref: FileBackedDict[BigqueryTableSnapshot],
|
|
329
324
|
table_refs: Set[str],
|
|
330
325
|
) -> Iterable[MetadataWorkUnit]:
|
|
331
326
|
if not self._should_ingest_lineage():
|
|
332
327
|
return
|
|
333
328
|
|
|
334
|
-
yield from self.get_lineage_workunits_for_views_and_snapshots(
|
|
335
|
-
projects,
|
|
336
|
-
view_refs_by_project,
|
|
337
|
-
view_definitions,
|
|
338
|
-
snapshot_refs_by_project,
|
|
339
|
-
snapshots_by_ref,
|
|
340
|
-
)
|
|
341
|
-
|
|
342
329
|
if self.config.use_exported_bigquery_audit_metadata:
|
|
343
330
|
projects = ["*"] # project_id not used when using exported metadata
|
|
344
331
|
|
|
@@ -109,6 +109,7 @@ from datahub.ingestion.source.tableau.tableau_common import (
|
|
|
109
109
|
make_filter,
|
|
110
110
|
make_fine_grained_lineage_class,
|
|
111
111
|
make_upstream_class,
|
|
112
|
+
optimize_query_filter,
|
|
112
113
|
published_datasource_graphql_query,
|
|
113
114
|
query_metadata_cursor_based_pagination,
|
|
114
115
|
sheet_graphql_query,
|
|
@@ -1363,6 +1364,8 @@ class TableauSiteSource:
|
|
|
1363
1364
|
query_filter: dict = {},
|
|
1364
1365
|
page_size_override: Optional[int] = None,
|
|
1365
1366
|
) -> Iterable[dict]:
|
|
1367
|
+
query_filter = optimize_query_filter(query_filter)
|
|
1368
|
+
|
|
1366
1369
|
# Calls the get_connection_object_page function to get the objects,
|
|
1367
1370
|
# and automatically handles pagination.
|
|
1368
1371
|
page_size = page_size_override or self.config.page_size
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import copy
|
|
1
2
|
import html
|
|
2
3
|
import json
|
|
3
4
|
import logging
|
|
@@ -35,6 +36,7 @@ from datahub.metadata.schema_classes import (
|
|
|
35
36
|
UpstreamClass,
|
|
36
37
|
)
|
|
37
38
|
from datahub.sql_parsing.sqlglot_lineage import ColumnLineageInfo, SqlParsingResult
|
|
39
|
+
from datahub.utilities.ordered_set import OrderedSet
|
|
38
40
|
|
|
39
41
|
logger = logging.getLogger(__name__)
|
|
40
42
|
|
|
@@ -1000,3 +1002,19 @@ def get_filter_pages(query_filter: dict, page_size: int) -> List[dict]:
|
|
|
1000
1002
|
]
|
|
1001
1003
|
|
|
1002
1004
|
return filter_pages
|
|
1005
|
+
|
|
1006
|
+
|
|
1007
|
+
def optimize_query_filter(query_filter: dict) -> dict:
|
|
1008
|
+
"""
|
|
1009
|
+
Duplicates in the filter cause duplicates in the result,
|
|
1010
|
+
leading to entities/aspects being emitted multiple times unnecessarily
|
|
1011
|
+
"""
|
|
1012
|
+
optimized_query = copy.deepcopy(query_filter)
|
|
1013
|
+
|
|
1014
|
+
if query_filter.get(c.ID_WITH_IN):
|
|
1015
|
+
optimized_query[c.ID_WITH_IN] = list(OrderedSet(query_filter[c.ID_WITH_IN]))
|
|
1016
|
+
if query_filter.get(c.PROJECT_NAME_WITH_IN):
|
|
1017
|
+
optimized_query[c.PROJECT_NAME_WITH_IN] = list(
|
|
1018
|
+
OrderedSet(query_filter[c.PROJECT_NAME_WITH_IN])
|
|
1019
|
+
)
|
|
1020
|
+
return optimized_query
|
|
File without changes
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from abc import abstractmethod
|
|
2
|
+
from typing import Dict, Optional, Tuple
|
|
3
|
+
|
|
4
|
+
from typing_extensions import Self
|
|
5
|
+
|
|
6
|
+
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class HasCustomPropertiesPatch(MetadataPatchProposal):
|
|
10
|
+
@classmethod
|
|
11
|
+
@abstractmethod
|
|
12
|
+
def _custom_properties_location(self) -> Tuple[str, PatchPath]:
|
|
13
|
+
...
|
|
14
|
+
|
|
15
|
+
def add_custom_property(self, key: str, value: str) -> Self:
|
|
16
|
+
"""Add a custom property to the entity.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
key: The key of the custom property.
|
|
20
|
+
value: The value of the custom property.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
The patch builder instance.
|
|
24
|
+
"""
|
|
25
|
+
aspect_name, path = self._custom_properties_location()
|
|
26
|
+
self._add_patch(
|
|
27
|
+
aspect_name,
|
|
28
|
+
"add",
|
|
29
|
+
path=(*path, key),
|
|
30
|
+
value=value,
|
|
31
|
+
)
|
|
32
|
+
return self
|
|
33
|
+
|
|
34
|
+
def add_custom_properties(
|
|
35
|
+
self, custom_properties: Optional[Dict[str, str]] = None
|
|
36
|
+
) -> Self:
|
|
37
|
+
if custom_properties is not None:
|
|
38
|
+
for key, value in custom_properties.items():
|
|
39
|
+
self.add_custom_property(key, value)
|
|
40
|
+
return self
|
|
41
|
+
|
|
42
|
+
def remove_custom_property(self, key: str) -> Self:
|
|
43
|
+
"""Remove a custom property from the entity.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
key: The key of the custom property to remove.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
The patch builder instance.
|
|
50
|
+
"""
|
|
51
|
+
aspect_name, path = self._custom_properties_location()
|
|
52
|
+
self._add_patch(
|
|
53
|
+
aspect_name,
|
|
54
|
+
"remove",
|
|
55
|
+
path=(*path, key),
|
|
56
|
+
value={},
|
|
57
|
+
)
|
|
58
|
+
return self
|
|
59
|
+
|
|
60
|
+
def set_custom_properties(self, custom_properties: Dict[str, str]) -> Self:
|
|
61
|
+
"""Sets the custom properties of the entity.
|
|
62
|
+
|
|
63
|
+
This method replaces all existing custom properties with the given dictionary.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
custom_properties: A dictionary containing the custom properties to be set.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
The patch builder instance.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
aspect_name, path = self._custom_properties_location()
|
|
73
|
+
self._add_patch(
|
|
74
|
+
aspect_name,
|
|
75
|
+
"add",
|
|
76
|
+
path=path,
|
|
77
|
+
value=custom_properties,
|
|
78
|
+
)
|
|
79
|
+
return self
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
|
|
3
|
+
from typing_extensions import Self
|
|
4
|
+
|
|
5
|
+
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
|
|
6
|
+
from datahub.metadata.schema_classes import (
|
|
7
|
+
OwnerClass,
|
|
8
|
+
OwnershipClass,
|
|
9
|
+
OwnershipTypeClass,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class HasOwnershipPatch(MetadataPatchProposal):
|
|
14
|
+
def add_owner(self, owner: OwnerClass) -> Self:
|
|
15
|
+
"""Add an owner to the entity.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
owner: The Owner object to add.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
The patch builder instance.
|
|
22
|
+
"""
|
|
23
|
+
self._add_patch(
|
|
24
|
+
OwnershipClass.ASPECT_NAME,
|
|
25
|
+
"add",
|
|
26
|
+
path=("owners", owner.owner, str(owner.type)),
|
|
27
|
+
value=owner,
|
|
28
|
+
)
|
|
29
|
+
return self
|
|
30
|
+
|
|
31
|
+
def remove_owner(
|
|
32
|
+
self, owner: str, owner_type: Optional[OwnershipTypeClass] = None
|
|
33
|
+
) -> Self:
|
|
34
|
+
"""Remove an owner from the entity.
|
|
35
|
+
|
|
36
|
+
If owner_type is not provided, the owner will be removed regardless of ownership type.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
owner: The owner to remove.
|
|
40
|
+
owner_type: The ownership type of the owner (optional).
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
The patch builder instance.
|
|
44
|
+
"""
|
|
45
|
+
self._add_patch(
|
|
46
|
+
OwnershipClass.ASPECT_NAME,
|
|
47
|
+
"remove",
|
|
48
|
+
path=("owners", owner) + ((str(owner_type),) if owner_type else ()),
|
|
49
|
+
value=owner,
|
|
50
|
+
)
|
|
51
|
+
return self
|
|
52
|
+
|
|
53
|
+
def set_owners(self, owners: List[OwnerClass]) -> Self:
|
|
54
|
+
"""Set the owners of the entity.
|
|
55
|
+
|
|
56
|
+
This will effectively replace all existing owners with the new list - it doesn't really patch things.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
owners: The list of owners to set.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
The patch builder instance.
|
|
63
|
+
"""
|
|
64
|
+
self._add_patch(
|
|
65
|
+
OwnershipClass.ASPECT_NAME, "add", path=("owners",), value=owners
|
|
66
|
+
)
|
|
67
|
+
return self
|