acryl-datahub 0.15.0.1rc13__py3-none-any.whl → 0.15.0.1rc14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (30) hide show
  1. {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc14.dist-info}/METADATA +2520 -2520
  2. {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc14.dist-info}/RECORD +27 -24
  3. datahub/__init__.py +1 -1
  4. datahub/emitter/mce_builder.py +3 -3
  5. datahub/emitter/mcp_patch_builder.py +36 -12
  6. datahub/ingestion/source/bigquery_v2/bigquery.py +10 -18
  7. datahub/ingestion/source/bigquery_v2/bigquery_config.py +3 -9
  8. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +11 -17
  9. datahub/ingestion/source/bigquery_v2/lineage.py +9 -22
  10. datahub/ingestion/source/tableau/tableau.py +3 -0
  11. datahub/ingestion/source/tableau/tableau_common.py +18 -0
  12. datahub/specific/aspect_helpers/__init__.py +0 -0
  13. datahub/specific/aspect_helpers/custom_properties.py +79 -0
  14. datahub/specific/aspect_helpers/ownership.py +67 -0
  15. datahub/specific/aspect_helpers/structured_properties.py +72 -0
  16. datahub/specific/aspect_helpers/tags.py +42 -0
  17. datahub/specific/aspect_helpers/terms.py +43 -0
  18. datahub/specific/chart.py +28 -184
  19. datahub/specific/dashboard.py +31 -196
  20. datahub/specific/datajob.py +34 -189
  21. datahub/specific/dataproduct.py +24 -86
  22. datahub/specific/dataset.py +48 -133
  23. datahub/specific/form.py +12 -32
  24. datahub/specific/structured_property.py +9 -9
  25. datahub/specific/custom_properties.py +0 -37
  26. datahub/specific/ownership.py +0 -48
  27. datahub/specific/structured_properties.py +0 -53
  28. {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc14.dist-info}/WHEEL +0 -0
  29. {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc14.dist-info}/entry_points.txt +0 -0
  30. {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc14.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- datahub/__init__.py,sha256=kKM5imQ7UziWDSMvn1Ic5ZENvcshwalM2y2qGjZxUHY,577
1
+ datahub/__init__.py,sha256=tC4XcRTMJqr-bc6T1QdoRI7MvbIkSZk8AscmW3iOtOo,577
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
3
  datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
4
4
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -114,10 +114,10 @@ datahub/emitter/aspect.py,sha256=ef0DVycqg-tRPurkYjc-5zknmLP2p2Y2RxP55WkvAEc,480
114
114
  datahub/emitter/enum_helpers.py,sha256=ZeALUAPi10Q4Z6VM0_WiU9Y60_d0ugZHcUoVmuOCEec,321
115
115
  datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
116
116
  datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
117
- datahub/emitter/mce_builder.py,sha256=5oZHXs85GGwfL8tY72IPnicyYrRXraN4LgtVQQcZyq8,16417
117
+ datahub/emitter/mce_builder.py,sha256=IqHOm0cpzdVC_mQOqk0yEVJUEj9xn8am2OFAwwQeX_8,16342
118
118
  datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
119
119
  datahub/emitter/mcp_builder.py,sha256=eOcuz41c4a3oTkNk39yYl9bTxpksxqATPHLcqyhPGT0,9856
120
- datahub/emitter/mcp_patch_builder.py,sha256=ykQFJshFrVF6DjkjcHQ8ZhDEws3ki0gmNjkHNfQtHwQ,4277
120
+ datahub/emitter/mcp_patch_builder.py,sha256=oonC8iGOvDzqj890CxOjWlBdDEF1RnwvbSZy1sivlTY,4572
121
121
  datahub/emitter/request_helper.py,sha256=33ORG3S3OVy97_jlWBRn7yUM5XCIkRN6WSdJvN7Ofcg,670
122
122
  datahub/emitter/rest_emitter.py,sha256=oqyRuXG1o1dYjiEIH5TFMb1q0xhRbpxPIA5qkyz0iQ8,16407
123
123
  datahub/emitter/serialization_helper.py,sha256=q12Avmf70Vy4ttQGMJoTKlE5EsybMKNg2w3MQeZiHvk,3652
@@ -233,20 +233,20 @@ datahub/ingestion/source/azure/abs_folder_utils.py,sha256=7skXus-4fSIoKpqCeU-GG0
233
233
  datahub/ingestion/source/azure/abs_utils.py,sha256=KdAlCK-PMrn35kFHxz5vrsjajyx2PD5GRgoBKdoRvcg,2075
234
234
  datahub/ingestion/source/azure/azure_common.py,sha256=Zl0pPuE6L3QcM5B1P0LsPthZmD0h7fUUS0kg2okl6IY,4053
235
235
  datahub/ingestion/source/bigquery_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
236
- datahub/ingestion/source/bigquery_v2/bigquery.py,sha256=-12CZWeSIAkI6Kb4AY8NAF3wsC_2lxhPErm5o0oUUes,14116
236
+ datahub/ingestion/source/bigquery_v2/bigquery.py,sha256=YMsyj6s7fggzisWfDdbT4w1MKJ3eRdNERsCShnu0Zqo,13681
237
237
  datahub/ingestion/source/bigquery_v2/bigquery_audit.py,sha256=IlbHA8a-gNJvnubgBfxVHpUk8rFNIG80gk5HWXa2lyE,25108
238
238
  datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py,sha256=LuGJ6LgPViLIfDQfylxlQ3CA7fZYM5MDt8M-7sfzm84,5096
239
- datahub/ingestion/source/bigquery_v2/bigquery_config.py,sha256=xnYWxbhvv-rJRHLGkOWIAn4Ir__hwinEZF1F7TWWirE,26086
239
+ datahub/ingestion/source/bigquery_v2/bigquery_config.py,sha256=PqbYNqM4-KukCU1meuvsk0qbiWa7UFh5hqHrHsvOSWQ,25889
240
240
  datahub/ingestion/source/bigquery_v2/bigquery_data_reader.py,sha256=DeT3v_Z82__8En0FcZ0kavBAWQoRvSZ5Rppm9eeDAb8,2393
241
241
  datahub/ingestion/source/bigquery_v2/bigquery_helper.py,sha256=QER3gY8e_k1_eNVj7cBso7ZzrWl_vO5PYSa6CpvqNx8,1554
242
242
  datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256=8nuQ8hMuJEswWDZtV2RjbK8RvDJUzT_S74dnyPpGFdQ,4857
243
243
  datahub/ingestion/source/bigquery_v2/bigquery_queries.py,sha256=EoHo9twb0_QdX7Nvd1HJC1Yn0rqtrfR52EVk7Hu3XOQ,3296
244
244
  datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=WxiLPFc7LwZXNDYfV9oySUD43kc2GcOf_pUokp3vFNM,8098
245
245
  datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=E5GOx4NWjyZM0xzdpBlNXbvDdKNfW9UtS64XtCYFpzI,31809
246
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=Sv6BrK62nu3xpgjYGE-x1xdSTouvvnKDJtazPobhiKQ,50813
246
+ datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=o2My5Q7ab39qHP3jjVFCQSErogGYb14s6397xHIZSqc,50568
247
247
  datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
248
248
  datahub/ingestion/source/bigquery_v2/common.py,sha256=Cxjf1a8ibkL_YRQeS0BqsjlyMgFJpaZ3iq_d7e8T8MQ,4030
249
- datahub/ingestion/source/bigquery_v2/lineage.py,sha256=Jg_pwnaj7l_KEcgq0enJXwrKh5jyUfBl4YB05YpkIVg,45415
249
+ datahub/ingestion/source/bigquery_v2/lineage.py,sha256=LJqdkCR8H55b3txCVBM-cs1T5QWxSTimJ3ebSgtXjgI,44874
250
250
  datahub/ingestion/source/bigquery_v2/profiler.py,sha256=8-yAoq8sX0E6VIwr75YbM8wITRNhGfxgte9BCeGNkMM,10681
251
251
  datahub/ingestion/source/bigquery_v2/queries.py,sha256=B2vJLZYfwM1J5JAckijKJTxLhDYA0yw3kfzj5oRQB5c,20151
252
252
  datahub/ingestion/source/bigquery_v2/queries_extractor.py,sha256=xLf-vCUAnNuDdTHghxJvPOyGeA_XLCW3r-xj-8cfn3Q,19528
@@ -491,8 +491,8 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
491
491
  datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=xsH7Ao_05VTjqpkzLkhdf5B1ULMzFoD8vkJJIJU9w-U,4077
492
492
  datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
493
493
  datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
494
- datahub/ingestion/source/tableau/tableau.py,sha256=SWEJi0LoIhb8rVVmmhVxngENo53QtXFvJE02aOIzG6Q,140034
495
- datahub/ingestion/source/tableau/tableau_common.py,sha256=9gQLq_3BlAsKll83uVlnWJRWaIDtFtREUyuimXF13Z0,26219
494
+ datahub/ingestion/source/tableau/tableau.py,sha256=fY--jFtPtCuDBAruiMStAoT7HqaTDYtiVEKzEYuzCag,140121
495
+ datahub/ingestion/source/tableau/tableau_common.py,sha256=a3Nu0Upy6_pnrd7XpSMcYHdnYca1JBW7H0jMqkYr0ME,26871
496
496
  datahub/ingestion/source/tableau/tableau_constant.py,sha256=ZcAeHsQUXVVL26ORly0ByZk_GJAFbxaKuJAlX_sYMac,2686
497
497
  datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=nSyx9RzC6TCQDm-cTVJ657qT8iDwzk_8JMKpohhmOc4,1046
498
498
  datahub/ingestion/source/tableau/tableau_validation.py,sha256=pd--LcTLTfrFsouhCOvGC_2IjeMfKbJV81EEo3ibMwE,1820
@@ -861,16 +861,19 @@ datahub/secret/datahub_secrets_client.py,sha256=WkoJDip7IAKSGDM5oHeZVL8878pd4Bix
861
861
  datahub/secret/secret_common.py,sha256=PeRFNljPlGfNrmn3VtDVbazQE6J3Q1nA3L-z3cS8LEA,2522
862
862
  datahub/secret/secret_store.py,sha256=2VP_Vd336Cy7C-2kwp4rx8MAqtYgtwv8XyzzNTXE5x8,1124
863
863
  datahub/specific/__init__.py,sha256=r5RYM5mDnskLzin3vc87HV-9GSz3P6uQw8AlsN14LaI,88
864
- datahub/specific/chart.py,sha256=DsLA5qHBIMNc1pIZ1AC5kLvwpRDd79Q56N4SANOofps,11324
865
- datahub/specific/custom_properties.py,sha256=Ob8L9b9QIbUvHfzWo4L-SNY1QSRhgRy30kLRDdenGEs,1024
866
- datahub/specific/dashboard.py,sha256=kRfyJsm7piugxBg0IfIbLmvv6Smk3D44IGVw8THLqPE,15100
867
- datahub/specific/datajob.py,sha256=5pEBrN6llpgS7jWYEfrvqpbT2vMVVpepH71jIUJUo4U,18480
868
- datahub/specific/dataproduct.py,sha256=lVv3TGkZyZ0t9CUXLnkwMhr8GK1HB-fiyRyjxTdvb7s,5259
869
- datahub/specific/dataset.py,sha256=TAI8SRhhhsv1zEi3lGv24NX6PTJDrEyt5v0Sdg-uFY8,13568
870
- datahub/specific/form.py,sha256=jVI0JD-o2-XkD1suW_ITnTZUF0GNbGjaNb9-PXdfdkA,4549
871
- datahub/specific/ownership.py,sha256=KlYnk7o0Tq2EVugW7qRWR9D3v0C8PuqIdwgUzYwlkDM,1446
872
- datahub/specific/structured_properties.py,sha256=unc0VllBdbOm7KIWf_5tFkP4TQusN7JUKoYyOFvFQhs,1767
873
- datahub/specific/structured_property.py,sha256=IYeFyafPidNrDbn1sU65rEPwIZDS-wLY1SYXSNUUbHQ,4038
864
+ datahub/specific/chart.py,sha256=NPdzDHcZkPodthOn9c8QF_aDEo2y4lCJ4t1sI556uZ0,6684
865
+ datahub/specific/dashboard.py,sha256=D8CnOSScQ0-UICFjQnQOtqL-SlNSxhSuub4vZ3BpcuI,10017
866
+ datahub/specific/datajob.py,sha256=yZXic3CuCGxg-ewnaHphoH9Jjpe-P09XbN7T-rrvkyE,13711
867
+ datahub/specific/dataproduct.py,sha256=SrBNYACDVatuXA64GCHA0Igaes24ajJqTcXmDTT1FPA,2877
868
+ datahub/specific/dataset.py,sha256=je9j3rVzpSiXoOe0UmfD7mc5vCpLAAO74Z8q1SvwPX0,9725
869
+ datahub/specific/form.py,sha256=hbxmmBWHma0d4NCZEGR6Nr6R-5A5gYgl1mmkGgnM97o,3834
870
+ datahub/specific/structured_property.py,sha256=NZ2yppDgtqrH04Wn3_m5IupyLeWoBCMygyr3nEi1A3o,4038
871
+ datahub/specific/aspect_helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
872
+ datahub/specific/aspect_helpers/custom_properties.py,sha256=s87_Aq7BgF_t_I0MCjNEJxYyrNxMTb1N0hCifT8Y6Cw,2255
873
+ datahub/specific/aspect_helpers/ownership.py,sha256=rNYiJSqb_FJQhFRSIQScg4mfxgYhPvjeaYyvutY6CN0,1861
874
+ datahub/specific/aspect_helpers/structured_properties.py,sha256=EVnFS025r-PG5PAC7VENVJO-JvDYif2VeYonsC3Z8m8,2255
875
+ datahub/specific/aspect_helpers/tags.py,sha256=YHcKfRaIvv12wcmfMc8-Dk6gf6xIvJedkn451uBuz-Y,1254
876
+ datahub/specific/aspect_helpers/terms.py,sha256=l8xoOLQ2RsIl3UnKhLisQNwrGTFIPrzfvP4zjH-AhwI,1352
874
877
  datahub/sql_parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
875
878
  datahub/sql_parsing/_models.py,sha256=il-xm1RcLdi1phJUV3xrTecdOGH31akqheuSC2N4YhQ,3141
876
879
  datahub/sql_parsing/_sqlglot_patch.py,sha256=iYJ8zOThHqqbamD5jdNr9iHTWD7ewNeHzPiTb6-rO3Y,7043
@@ -983,8 +986,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
983
986
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
984
987
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
985
988
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
986
- acryl_datahub-0.15.0.1rc13.dist-info/METADATA,sha256=KnCOYV5Kg855hgL3B3zmYHzPnXVeMoZYf_3ScEj1cyA,173444
987
- acryl_datahub-0.15.0.1rc13.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
988
- acryl_datahub-0.15.0.1rc13.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
989
- acryl_datahub-0.15.0.1rc13.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
990
- acryl_datahub-0.15.0.1rc13.dist-info/RECORD,,
989
+ acryl_datahub-0.15.0.1rc14.dist-info/METADATA,sha256=na5JJwiilGTUFiwOBRULg2a8NxVvzNRgwodacg0LOSU,173444
990
+ acryl_datahub-0.15.0.1rc14.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
991
+ acryl_datahub-0.15.0.1rc14.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
992
+ acryl_datahub-0.15.0.1rc14.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
993
+ acryl_datahub-0.15.0.1rc14.dist-info/RECORD,,
datahub/__init__.py CHANGED
@@ -3,7 +3,7 @@ import warnings
3
3
 
4
4
  # Published at https://pypi.org/project/acryl-datahub/.
5
5
  __package_name__ = "acryl-datahub"
6
- __version__ = "0.15.0.1rc13"
6
+ __version__ = "0.15.0.1rc14"
7
7
 
8
8
 
9
9
  def is_dev_mode() -> bool:
@@ -24,6 +24,7 @@ from typing import (
24
24
 
25
25
  import typing_inspect
26
26
  from avrogen.dict_wrapper import DictWrapper
27
+ from typing_extensions import assert_never
27
28
 
28
29
  from datahub.emitter.enum_helpers import get_enum_options
29
30
  from datahub.metadata.schema_classes import (
@@ -269,9 +270,8 @@ def make_owner_urn(owner: str, owner_type: OwnerType) -> str:
269
270
  return make_user_urn(owner)
270
271
  elif owner_type == OwnerType.GROUP:
271
272
  return make_group_urn(owner)
272
- # This should pretty much never happen.
273
- # TODO: With Python 3.11, we can use typing.assert_never() here.
274
- return f"urn:li:{owner_type.value}:{owner}"
273
+ else:
274
+ assert_never(owner_type)
275
275
 
276
276
 
277
277
  def make_ownership_type_urn(type: str) -> str:
@@ -2,7 +2,19 @@ import json
2
2
  import time
3
3
  from collections import defaultdict
4
4
  from dataclasses import dataclass
5
- from typing import Any, Dict, List, Optional, Sequence, Union
5
+ from typing import (
6
+ Any,
7
+ Dict,
8
+ List,
9
+ Literal,
10
+ Optional,
11
+ Protocol,
12
+ Tuple,
13
+ Union,
14
+ runtime_checkable,
15
+ )
16
+
17
+ from typing_extensions import LiteralString
6
18
 
7
19
  from datahub.emitter.aspect import JSON_PATCH_CONTENT_TYPE
8
20
  from datahub.emitter.serialization_helper import pre_json_transform
@@ -19,25 +31,36 @@ from datahub.metadata.urns import Urn
19
31
  from datahub.utilities.urns.urn import guess_entity_type
20
32
 
21
33
 
34
+ @runtime_checkable
35
+ class SupportsToObj(Protocol):
36
+ def to_obj(self) -> Any:
37
+ ...
38
+
39
+
22
40
  def _recursive_to_obj(obj: Any) -> Any:
23
41
  if isinstance(obj, list):
24
42
  return [_recursive_to_obj(v) for v in obj]
25
- elif hasattr(obj, "to_obj"):
43
+ elif isinstance(obj, SupportsToObj):
26
44
  return obj.to_obj()
27
45
  else:
28
46
  return obj
29
47
 
30
48
 
49
+ PatchPath = Tuple[Union[LiteralString, Urn], ...]
50
+ PatchOp = Literal["add", "remove", "replace"]
51
+
52
+
31
53
  @dataclass
32
- class _Patch:
33
- op: str # one of ['add', 'remove', 'replace']; we don't support move, copy or test
34
- path: str
54
+ class _Patch(SupportsToObj):
55
+ op: PatchOp
56
+ path: PatchPath
35
57
  value: Any
36
58
 
37
59
  def to_obj(self) -> Dict:
60
+ quoted_path = "/" + "/".join(MetadataPatchProposal.quote(p) for p in self.path)
38
61
  return {
39
62
  "op": self.op,
40
- "path": self.path,
63
+ "path": quoted_path,
41
64
  "value": _recursive_to_obj(self.value),
42
65
  }
43
66
 
@@ -63,15 +86,16 @@ class MetadataPatchProposal:
63
86
 
64
87
  # Json Patch quoting based on https://jsonpatch.com/#json-pointer
65
88
  @classmethod
66
- def quote(cls, value: str) -> str:
67
- return value.replace("~", "~0").replace("/", "~1")
89
+ def quote(cls, value: Union[str, Urn]) -> str:
90
+ return str(value).replace("~", "~0").replace("/", "~1")
68
91
 
69
92
  def _add_patch(
70
- self, aspect_name: str, op: str, path: Union[str, Sequence[str]], value: Any
93
+ self,
94
+ aspect_name: str,
95
+ op: PatchOp,
96
+ path: PatchPath,
97
+ value: Any,
71
98
  ) -> None:
72
- if not isinstance(path, str):
73
- path = "/" + "/".join(self.quote(p) for p in path)
74
-
75
99
  # TODO: Validate that aspectName is a valid aspect for this entityType
76
100
  self.patches[aspect_name].append(_Patch(op, path, value))
77
101
 
@@ -206,9 +206,7 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
206
206
 
207
207
  def _init_schema_resolver(self) -> SchemaResolver:
208
208
  schema_resolution_required = (
209
- self.config.use_queries_v2
210
- or self.config.lineage_parse_view_ddl
211
- or self.config.lineage_use_sql_parser
209
+ self.config.use_queries_v2 or self.config.lineage_use_sql_parser
212
210
  )
213
211
  schema_ingestion_enabled = (
214
212
  self.config.include_schema_metadata
@@ -255,18 +253,16 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
255
253
  for project in projects:
256
254
  yield from self.bq_schema_extractor.get_project_workunits(project)
257
255
 
258
- if self.config.use_queries_v2:
259
- # Always ingest View and Snapshot lineage with schema ingestion
260
- self.report.set_ingestion_stage("*", "View and Snapshot Lineage")
261
-
262
- yield from self.lineage_extractor.get_lineage_workunits_for_views_and_snapshots(
263
- [p.id for p in projects],
264
- self.bq_schema_extractor.view_refs_by_project,
265
- self.bq_schema_extractor.view_definitions,
266
- self.bq_schema_extractor.snapshot_refs_by_project,
267
- self.bq_schema_extractor.snapshots_by_ref,
268
- )
256
+ self.report.set_ingestion_stage("*", "View and Snapshot Lineage")
257
+ yield from self.lineage_extractor.get_lineage_workunits_for_views_and_snapshots(
258
+ [p.id for p in projects],
259
+ self.bq_schema_extractor.view_refs_by_project,
260
+ self.bq_schema_extractor.view_definitions,
261
+ self.bq_schema_extractor.snapshot_refs_by_project,
262
+ self.bq_schema_extractor.snapshots_by_ref,
263
+ )
269
264
 
265
+ if self.config.use_queries_v2:
270
266
  # if both usage and lineage are disabled then skip queries extractor piece
271
267
  if (
272
268
  not self.config.include_usage_statistics
@@ -306,10 +302,6 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
306
302
  if self.config.include_table_lineage:
307
303
  yield from self.lineage_extractor.get_lineage_workunits(
308
304
  [p.id for p in projects],
309
- self.bq_schema_extractor.view_refs_by_project,
310
- self.bq_schema_extractor.view_definitions,
311
- self.bq_schema_extractor.snapshot_refs_by_project,
312
- self.bq_schema_extractor.snapshots_by_ref,
313
305
  self.bq_schema_extractor.table_refs,
314
306
  )
315
307
 
@@ -463,10 +463,6 @@ class BigQueryV2Config(
463
463
  default=True,
464
464
  description="Use sql parser to resolve view/table lineage.",
465
465
  )
466
- lineage_parse_view_ddl: bool = Field(
467
- default=True,
468
- description="Sql parse view ddl to get lineage.",
469
- )
470
466
 
471
467
  lineage_sql_parser_use_raw_names: bool = Field(
472
468
  default=False,
@@ -572,11 +568,9 @@ class BigQueryV2Config(
572
568
  "See [this](https://cloud.google.com/bigquery/docs/information-schema-jobs#scope_and_syntax) for details.",
573
569
  )
574
570
 
575
- # include_view_lineage and include_view_column_lineage are inherited from SQLCommonConfig
576
- # but not used in bigquery so we hide them from docs.
577
- include_view_lineage: bool = Field(default=True, hidden_from_docs=True)
578
-
579
- include_view_column_lineage: bool = Field(default=True, hidden_from_docs=True)
571
+ _include_view_lineage = pydantic_removed_field("include_view_lineage")
572
+ _include_view_column_lineage = pydantic_removed_field("include_view_column_lineage")
573
+ _lineage_parse_view_ddl = pydantic_removed_field("lineage_parse_view_ddl")
580
574
 
581
575
  @root_validator(pre=True)
582
576
  def set_include_schema_metadata(cls, values: Dict) -> Dict:
@@ -653,14 +653,11 @@ class BigQuerySchemaGenerator:
653
653
  self.report.report_dropped(table_identifier.raw_table_name())
654
654
  return
655
655
 
656
- if self.store_table_refs:
657
- table_ref = str(
658
- BigQueryTableRef(table_identifier).get_sanitized_table_ref()
659
- )
660
- self.table_refs.add(table_ref)
661
- if self.config.lineage_parse_view_ddl and view.view_definition:
662
- self.view_refs_by_project[project_id].add(table_ref)
663
- self.view_definitions[table_ref] = view.view_definition
656
+ table_ref = str(BigQueryTableRef(table_identifier).get_sanitized_table_ref())
657
+ self.table_refs.add(table_ref)
658
+ if view.view_definition:
659
+ self.view_refs_by_project[project_id].add(table_ref)
660
+ self.view_definitions[table_ref] = view.view_definition
664
661
 
665
662
  view.column_count = len(columns)
666
663
  if not view.column_count:
@@ -701,14 +698,11 @@ class BigQuerySchemaGenerator:
701
698
  f"Snapshot doesn't have any column or unable to get columns for snapshot: {table_identifier}"
702
699
  )
703
700
 
704
- if self.store_table_refs:
705
- table_ref = str(
706
- BigQueryTableRef(table_identifier).get_sanitized_table_ref()
707
- )
708
- self.table_refs.add(table_ref)
709
- if snapshot.base_table_identifier:
710
- self.snapshot_refs_by_project[project_id].add(table_ref)
711
- self.snapshots_by_ref[table_ref] = snapshot
701
+ table_ref = str(BigQueryTableRef(table_identifier).get_sanitized_table_ref())
702
+ self.table_refs.add(table_ref)
703
+ if snapshot.base_table_identifier:
704
+ self.snapshot_refs_by_project[project_id].add(table_ref)
705
+ self.snapshots_by_ref[table_ref] = snapshot
712
706
 
713
707
  yield from self.gen_snapshot_dataset_workunits(
714
708
  table=snapshot,
@@ -1148,7 +1142,7 @@ class BigQuerySchemaGenerator:
1148
1142
  foreignKeys=foreign_keys if foreign_keys else None,
1149
1143
  )
1150
1144
 
1151
- if self.config.lineage_parse_view_ddl or self.config.lineage_use_sql_parser:
1145
+ if self.config.lineage_use_sql_parser:
1152
1146
  self.sql_parser_schema_resolver.add_schema_metadata(
1153
1147
  dataset_urn, schema_metadata
1154
1148
  )
@@ -291,16 +291,15 @@ class BigqueryLineageExtractor:
291
291
  snapshots_by_ref: FileBackedDict[BigqueryTableSnapshot],
292
292
  ) -> Iterable[MetadataWorkUnit]:
293
293
  for project in projects:
294
- if self.config.lineage_parse_view_ddl:
295
- for view in view_refs_by_project[project]:
296
- self.datasets_skip_audit_log_lineage.add(view)
297
- self.aggregator.add_view_definition(
298
- view_urn=self.identifiers.gen_dataset_urn_from_raw_ref(
299
- BigQueryTableRef.from_string_name(view)
300
- ),
301
- view_definition=view_definitions[view],
302
- default_db=project,
303
- )
294
+ for view in view_refs_by_project[project]:
295
+ self.datasets_skip_audit_log_lineage.add(view)
296
+ self.aggregator.add_view_definition(
297
+ view_urn=self.identifiers.gen_dataset_urn_from_raw_ref(
298
+ BigQueryTableRef.from_string_name(view)
299
+ ),
300
+ view_definition=view_definitions[view],
301
+ default_db=project,
302
+ )
304
303
 
305
304
  for snapshot_ref in snapshot_refs_by_project[project]:
306
305
  snapshot = snapshots_by_ref[snapshot_ref]
@@ -322,23 +321,11 @@ class BigqueryLineageExtractor:
322
321
  def get_lineage_workunits(
323
322
  self,
324
323
  projects: List[str],
325
- view_refs_by_project: Dict[str, Set[str]],
326
- view_definitions: FileBackedDict[str],
327
- snapshot_refs_by_project: Dict[str, Set[str]],
328
- snapshots_by_ref: FileBackedDict[BigqueryTableSnapshot],
329
324
  table_refs: Set[str],
330
325
  ) -> Iterable[MetadataWorkUnit]:
331
326
  if not self._should_ingest_lineage():
332
327
  return
333
328
 
334
- yield from self.get_lineage_workunits_for_views_and_snapshots(
335
- projects,
336
- view_refs_by_project,
337
- view_definitions,
338
- snapshot_refs_by_project,
339
- snapshots_by_ref,
340
- )
341
-
342
329
  if self.config.use_exported_bigquery_audit_metadata:
343
330
  projects = ["*"] # project_id not used when using exported metadata
344
331
 
@@ -109,6 +109,7 @@ from datahub.ingestion.source.tableau.tableau_common import (
109
109
  make_filter,
110
110
  make_fine_grained_lineage_class,
111
111
  make_upstream_class,
112
+ optimize_query_filter,
112
113
  published_datasource_graphql_query,
113
114
  query_metadata_cursor_based_pagination,
114
115
  sheet_graphql_query,
@@ -1363,6 +1364,8 @@ class TableauSiteSource:
1363
1364
  query_filter: dict = {},
1364
1365
  page_size_override: Optional[int] = None,
1365
1366
  ) -> Iterable[dict]:
1367
+ query_filter = optimize_query_filter(query_filter)
1368
+
1366
1369
  # Calls the get_connection_object_page function to get the objects,
1367
1370
  # and automatically handles pagination.
1368
1371
  page_size = page_size_override or self.config.page_size
@@ -1,3 +1,4 @@
1
+ import copy
1
2
  import html
2
3
  import json
3
4
  import logging
@@ -35,6 +36,7 @@ from datahub.metadata.schema_classes import (
35
36
  UpstreamClass,
36
37
  )
37
38
  from datahub.sql_parsing.sqlglot_lineage import ColumnLineageInfo, SqlParsingResult
39
+ from datahub.utilities.ordered_set import OrderedSet
38
40
 
39
41
  logger = logging.getLogger(__name__)
40
42
 
@@ -1000,3 +1002,19 @@ def get_filter_pages(query_filter: dict, page_size: int) -> List[dict]:
1000
1002
  ]
1001
1003
 
1002
1004
  return filter_pages
1005
+
1006
+
1007
+ def optimize_query_filter(query_filter: dict) -> dict:
1008
+ """
1009
+ Duplicates in the filter cause duplicates in the result,
1010
+ leading to entities/aspects being emitted multiple times unnecessarily
1011
+ """
1012
+ optimized_query = copy.deepcopy(query_filter)
1013
+
1014
+ if query_filter.get(c.ID_WITH_IN):
1015
+ optimized_query[c.ID_WITH_IN] = list(OrderedSet(query_filter[c.ID_WITH_IN]))
1016
+ if query_filter.get(c.PROJECT_NAME_WITH_IN):
1017
+ optimized_query[c.PROJECT_NAME_WITH_IN] = list(
1018
+ OrderedSet(query_filter[c.PROJECT_NAME_WITH_IN])
1019
+ )
1020
+ return optimized_query
File without changes
@@ -0,0 +1,79 @@
1
+ from abc import abstractmethod
2
+ from typing import Dict, Optional, Tuple
3
+
4
+ from typing_extensions import Self
5
+
6
+ from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath
7
+
8
+
9
+ class HasCustomPropertiesPatch(MetadataPatchProposal):
10
+ @classmethod
11
+ @abstractmethod
12
+ def _custom_properties_location(self) -> Tuple[str, PatchPath]:
13
+ ...
14
+
15
+ def add_custom_property(self, key: str, value: str) -> Self:
16
+ """Add a custom property to the entity.
17
+
18
+ Args:
19
+ key: The key of the custom property.
20
+ value: The value of the custom property.
21
+
22
+ Returns:
23
+ The patch builder instance.
24
+ """
25
+ aspect_name, path = self._custom_properties_location()
26
+ self._add_patch(
27
+ aspect_name,
28
+ "add",
29
+ path=(*path, key),
30
+ value=value,
31
+ )
32
+ return self
33
+
34
+ def add_custom_properties(
35
+ self, custom_properties: Optional[Dict[str, str]] = None
36
+ ) -> Self:
37
+ if custom_properties is not None:
38
+ for key, value in custom_properties.items():
39
+ self.add_custom_property(key, value)
40
+ return self
41
+
42
+ def remove_custom_property(self, key: str) -> Self:
43
+ """Remove a custom property from the entity.
44
+
45
+ Args:
46
+ key: The key of the custom property to remove.
47
+
48
+ Returns:
49
+ The patch builder instance.
50
+ """
51
+ aspect_name, path = self._custom_properties_location()
52
+ self._add_patch(
53
+ aspect_name,
54
+ "remove",
55
+ path=(*path, key),
56
+ value={},
57
+ )
58
+ return self
59
+
60
+ def set_custom_properties(self, custom_properties: Dict[str, str]) -> Self:
61
+ """Sets the custom properties of the entity.
62
+
63
+ This method replaces all existing custom properties with the given dictionary.
64
+
65
+ Args:
66
+ custom_properties: A dictionary containing the custom properties to be set.
67
+
68
+ Returns:
69
+ The patch builder instance.
70
+ """
71
+
72
+ aspect_name, path = self._custom_properties_location()
73
+ self._add_patch(
74
+ aspect_name,
75
+ "add",
76
+ path=path,
77
+ value=custom_properties,
78
+ )
79
+ return self
@@ -0,0 +1,67 @@
1
+ from typing import List, Optional
2
+
3
+ from typing_extensions import Self
4
+
5
+ from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
6
+ from datahub.metadata.schema_classes import (
7
+ OwnerClass,
8
+ OwnershipClass,
9
+ OwnershipTypeClass,
10
+ )
11
+
12
+
13
+ class HasOwnershipPatch(MetadataPatchProposal):
14
+ def add_owner(self, owner: OwnerClass) -> Self:
15
+ """Add an owner to the entity.
16
+
17
+ Args:
18
+ owner: The Owner object to add.
19
+
20
+ Returns:
21
+ The patch builder instance.
22
+ """
23
+ self._add_patch(
24
+ OwnershipClass.ASPECT_NAME,
25
+ "add",
26
+ path=("owners", owner.owner, str(owner.type)),
27
+ value=owner,
28
+ )
29
+ return self
30
+
31
+ def remove_owner(
32
+ self, owner: str, owner_type: Optional[OwnershipTypeClass] = None
33
+ ) -> Self:
34
+ """Remove an owner from the entity.
35
+
36
+ If owner_type is not provided, the owner will be removed regardless of ownership type.
37
+
38
+ Args:
39
+ owner: The owner to remove.
40
+ owner_type: The ownership type of the owner (optional).
41
+
42
+ Returns:
43
+ The patch builder instance.
44
+ """
45
+ self._add_patch(
46
+ OwnershipClass.ASPECT_NAME,
47
+ "remove",
48
+ path=("owners", owner) + ((str(owner_type),) if owner_type else ()),
49
+ value=owner,
50
+ )
51
+ return self
52
+
53
+ def set_owners(self, owners: List[OwnerClass]) -> Self:
54
+ """Set the owners of the entity.
55
+
56
+ This will effectively replace all existing owners with the new list - it doesn't really patch things.
57
+
58
+ Args:
59
+ owners: The list of owners to set.
60
+
61
+ Returns:
62
+ The patch builder instance.
63
+ """
64
+ self._add_patch(
65
+ OwnershipClass.ASPECT_NAME, "add", path=("owners",), value=owners
66
+ )
67
+ return self