acryl-datahub 1.0.0.3rc6__py3-none-any.whl → 1.0.0.3rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,9 +1,9 @@
1
- acryl_datahub-1.0.0.3rc6.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.0.0.3rc8.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=VrjEJq0-eU07ihy36V-x_6VzO_ae3ZtXi7aMCbdTSZw,323
4
+ datahub/_version.py,sha256=Zb8TjJYjx7zz4w3IY_sYhrV3x0o_kqbFVKFrYP8Ld48,323
5
5
  datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
6
- datahub/errors.py,sha256=BzKdcmYseHOt36zfjJXc17WNutFhp9Y23cU_L6cIkxc,612
6
+ datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  datahub/_codegen/aspect.py,sha256=PJRa-Z4ouXHq3OkulfyWhwZn-fFUBDK_UPvmqaWdbWk,1063
@@ -121,7 +121,7 @@ datahub/emitter/enum_helpers.py,sha256=QBOEUu_hDCvyL_v4ayNQV8XwJbf5zKyu0Xat0mI1K
121
121
  datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
122
122
  datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
123
123
  datahub/emitter/mce_builder.py,sha256=i-iLLdnuy7h1JrzwC2sCtQthbso-cNj1uijOQZKHbeA,16717
124
- datahub/emitter/mcp.py,sha256=v7tKlIFX4s7f77KQYeFww8QbOQu6-qU609VeQiUkcsY,9796
124
+ datahub/emitter/mcp.py,sha256=u6LphyhpbdFqboTAL_9MzXhGjc45o_BePoDFBkEEYWo,10484
125
125
  datahub/emitter/mcp_builder.py,sha256=8IwJAlolQkPpMqQJPLtGrsUqAcuFNs98nrI5iYUxgaU,11920
126
126
  datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
127
127
  datahub/emitter/request_helper.py,sha256=HpI9a9W0TzoVbrs584rF8P8w-IT_iKLmvYmO_6IHhXs,1008
@@ -312,8 +312,8 @@ datahub/ingestion/source/fivetran/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
312
312
  datahub/ingestion/source/fivetran/config.py,sha256=BP3KRfAQ6H5qyEeJNu9vNfZNwLoyj4Tl2kXiLVR5DNM,9027
313
313
  datahub/ingestion/source/fivetran/data_classes.py,sha256=ecdUJH5BEze0yv-uFpKWPNaNmV1gORDA2XMFk0zhcBw,595
314
314
  datahub/ingestion/source/fivetran/fivetran.py,sha256=avP54ePLFVpkKVv8tr6mzC7dniTmZbKn13LP0-Ohj9k,13821
315
- datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=DBS5qDRsXvJzR-x6euaR5Ss3yrV76lswcweI1XwmABw,12769
316
- datahub/ingestion/source/fivetran/fivetran_query.py,sha256=vLrTj7e-0NxZ2U4bWTB57pih42WirqPlUvwtIRfStlQ,5275
315
+ datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=5bmG705tCOaKAIjnomJfGCyCwKc7NahtW-rMP-4YifU,12902
316
+ datahub/ingestion/source/fivetran/fivetran_query.py,sha256=c1begMnLtWoLBmaKBiarpMZ6HTVAI6hDPKn26DYuMYI,5343
317
317
  datahub/ingestion/source/gc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
318
318
  datahub/ingestion/source/gc/datahub_gc.py,sha256=EXO-Stj6gGMLTSTbSBC-C3_zpjpQtFN9pAMWR95ma0I,12830
319
319
  datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=mUWcMt-_FL1SYGIgI4lGZDZGXspUUTv__5GN1W2oJ3s,17118
@@ -334,7 +334,7 @@ datahub/ingestion/source/hex/mapper.py,sha256=N3mTlEcrOmhv9ia1dnHGFgFJD2ddyTtU3H
334
334
  datahub/ingestion/source/hex/model.py,sha256=S9bUhfFcjzuio2dBS6HzSyRVPiSJvRvMQ0qyVrjV5-E,1766
335
335
  datahub/ingestion/source/hex/query_fetcher.py,sha256=ZaRrja05mK0VlIvpsFi-8-EBoJr0GSLbUxBUjycibIU,12505
336
336
  datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
337
- datahub/ingestion/source/iceberg/iceberg.py,sha256=s69XzCGD5oV_hqTyvzCt5eLKZVEzVIJo_DiAEDk3p6A,34759
337
+ datahub/ingestion/source/iceberg/iceberg.py,sha256=-8uaBerljvonaT7Gn9Evokq6-SSDiMRf8kKo7Hg1qY4,35414
338
338
  datahub/ingestion/source/iceberg/iceberg_common.py,sha256=VGosqYPmn_j6GETSnDHZ8Ay1BVOedmx2x5LHxw16I3A,12278
339
339
  datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=9iwp2vpQTi4OMbIKoDZV5lAdvjMR0ls6Llpck9grJIE,9875
340
340
  datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -457,7 +457,7 @@ datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FU
457
457
  datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=FBmiONx4EGHWV8RNJT6zHZyntKinPFFyd2oKbTUIbhE,21319
458
458
  datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
459
459
  datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=PY4Wy6i89nqRl92ARwXNqWwm-ifagkKbKKtxYWeswkk,29209
460
- datahub/ingestion/source/snowflake/snowflake_query.py,sha256=JtTrfzGqM9mk2Fr-F1X0KXzc_8ot7rD3dD2vPEuzd0E,40411
460
+ datahub/ingestion/source/snowflake/snowflake_query.py,sha256=OJDF4x5OCa0PTTdkVdO5_Wzox4039Vhf7zwDoHHW-W4,38410
461
461
  datahub/ingestion/source/snowflake/snowflake_report.py,sha256=O-465aBA8uaYZ6WepP7i6cgK6Q1jXJPjDA1j9C8klus,6762
462
462
  datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=1yGBbs2aWIdHnrwgeTR7J2lqxbbBsIt8ejCLumIpLEA,27274
463
463
  datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=_37-AQyI4uGt4fu-d3v2eAWzQ3uG835ZQxMjFwGYCng,57193
@@ -473,7 +473,7 @@ datahub/ingestion/source/sql/clickhouse.py,sha256=uSRy-HKAiGFTHVLoVtGoh23X0O1lwy
473
473
  datahub/ingestion/source/sql/cockroachdb.py,sha256=XaD7eae34plU9ISRC6PzYX9q6RdT2qkzjH6CpTOgkx4,1443
474
474
  datahub/ingestion/source/sql/druid.py,sha256=IjGZdntb5hubkIzzT9qDRDpyfbckEg2GwRncvC5mDSs,2722
475
475
  datahub/ingestion/source/sql/hana.py,sha256=0PIvcX0Rz59NyR7Ag5Bv1MBV_UbJwxl9UAopo_xe_CA,1342
476
- datahub/ingestion/source/sql/hive.py,sha256=n0XCGkNkVAe-TEyXbxlefvohbmtALbWaC1a0_B9rlG8,30670
476
+ datahub/ingestion/source/sql/hive.py,sha256=voQl6QjHUXPdx7LPONuHiFavW9nRKMjHZx7o3vJQG7A,31034
477
477
  datahub/ingestion/source/sql/hive_metastore.py,sha256=HW0zoHKarBYb8oVCy5fHvPOn-pTo25LctW_AusmH0hQ,36252
478
478
  datahub/ingestion/source/sql/mariadb.py,sha256=Hm102kmfs_1rd4lsTYhzVMZq5S3B6cyfvpHSzJjqvMw,737
479
479
  datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_k94o7g-0,3350
@@ -901,13 +901,14 @@ datahub/pydantic/compat.py,sha256=TUEo4kSEeOWVAhV6LQtst1phrpVgGtK4uif4OI5vQ2M,19
901
901
  datahub/sdk/__init__.py,sha256=QeutS6Th8K4E4ZxXuoGrmvahN6zA9Oh9asKk5mw9AIk,1670
902
902
  datahub/sdk/_all_entities.py,sha256=inbLFv2T7dhZpGfBY5FPhCWbyE0P0G8umOt0Bc7V4XA,520
903
903
  datahub/sdk/_attribution.py,sha256=0Trh8steVd27GOr9MKCZeawbuDD2_q3GIsZlCtHqEUg,1321
904
- datahub/sdk/_shared.py,sha256=5L1IkihLc7Pd2x0ypDs96kZ8ecm6o0-UZEn0J1Sffqw,24808
905
- datahub/sdk/_utils.py,sha256=aGE665Su8SGtj2CRDiTaXNYrJ8ADBsS0m4DmaXw79b8,1027
904
+ datahub/sdk/_shared.py,sha256=64m4Q7D5QdS-Z9Te0uOFwsZMBZEfwZ__2sZ8uCTHa7w,24770
905
+ datahub/sdk/_utils.py,sha256=oXE2BzsXE5zmSkCP3R1tObD4RHnPeH_ps83D_Dw9JaQ,1169
906
906
  datahub/sdk/container.py,sha256=yw_vw9Jl1wOYNwMHxQHLz5ZvVQVDWWHi9CWBR3hOCd8,7547
907
- datahub/sdk/dataset.py,sha256=5LG4c_8bHeSPYrW88KNXRgiPD8frBjR0OBVrrwdquU4,29152
907
+ datahub/sdk/dataset.py,sha256=7PlmqKDROJvsh1CtlhG8owOhLdelHVbSSg5hA5Kbwp0,29150
908
908
  datahub/sdk/entity.py,sha256=Q29AbpS58L4gD8ETwoNIwG-ouytz4c0MSSFi6-jLl_4,6742
909
909
  datahub/sdk/entity_client.py,sha256=1AC9J7-jv3rD-MFEPz2PnFrT8nFkj_WO0M-4nyVOtQk,5319
910
- datahub/sdk/main_client.py,sha256=h2MKRhR-BO0zGCMhF7z2bTncX4hagKrAYwR3wTNTtzA,3666
910
+ datahub/sdk/lineage_client.py,sha256=1JPxQx5pZYQwnzL_VX3KjAbX2QWdvjSKm8S8Bya2IAc,8617
911
+ datahub/sdk/main_client.py,sha256=EeFZw4ot6L8DP4xHMiy07n7yICTjBn080YtW8ub5mBk,3781
911
912
  datahub/sdk/mlmodel.py,sha256=amS-hHg5tT7zAqEHG17kSA60Q7td2DFtO-W2rEfb2rY,10206
912
913
  datahub/sdk/mlmodelgroup.py,sha256=_7IkqkLVeyqYVEUHTVePSDLQyESsnwht5ca1lcMODAg,7842
913
914
  datahub/sdk/resolver_client.py,sha256=nKMAZJt2tRSGfKSzoREIh43PXqjM3umLiYkYHJjo1io,3243
@@ -936,14 +937,15 @@ datahub/sql_parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
936
937
  datahub/sql_parsing/_models.py,sha256=il-xm1RcLdi1phJUV3xrTecdOGH31akqheuSC2N4YhQ,3141
937
938
  datahub/sql_parsing/_sqlglot_patch.py,sha256=dpTiGj4Jxzvbi6HkYiOTbZ81CTFwEEo6JgYOd-dnCMA,6788
938
939
  datahub/sql_parsing/datajob.py,sha256=1X8KpEk-y3_8xJuA_Po27EHZgOcxK9QADI6Om9gSGn0,1751
940
+ datahub/sql_parsing/fingerprint_utils.py,sha256=3hGiexaQXnE7eZLxo-t7hlTyVQz7wombAcQELnN-yDY,185
939
941
  datahub/sql_parsing/query_types.py,sha256=FKjDzszZzsrCfYfm7dgD6T_8865qxWl767fdGyHWBh4,2720
940
942
  datahub/sql_parsing/schema_resolver.py,sha256=ISuingLcQnOJZkNXBkc73uPwYUbbOtERAjgGhJajDiQ,10782
941
943
  datahub/sql_parsing/split_statements.py,sha256=OIQXA9e4k3G9Z1y7rbgdtZhMWt4FPnq41cE8Jkm9cBY,9542
942
- datahub/sql_parsing/sql_parsing_aggregator.py,sha256=A3_0wSxBJSRowEaslptDpBoKO42XXx5UyTEK9PkekIs,70317
944
+ datahub/sql_parsing/sql_parsing_aggregator.py,sha256=zKxIffMWK8YKIN0A7wzOfVkMGC7Cd_9nB6gWc-6BHuw,70671
943
945
  datahub/sql_parsing/sql_parsing_common.py,sha256=cZ4WvVyHZuXDGjnBvKMX2_fz2EMextB5WQWcK0_saBo,3155
944
946
  datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
945
947
  datahub/sql_parsing/sqlglot_lineage.py,sha256=l0kT8MuRIg96X7BNJaboMznF54b-yvM2nMTLyF2d0Nw,47446
946
- datahub/sql_parsing/sqlglot_utils.py,sha256=5cUiEWLWfVTI7uIxolAfOfNVo50qnklzhj86gxSFWqg,14943
948
+ datahub/sql_parsing/sqlglot_utils.py,sha256=TI11oBu1wrGeUuUGBg7hGTr6lTvztahdqiqXNJYRfbQ,14823
947
949
  datahub/sql_parsing/tool_meta_extractor.py,sha256=EV_g7sOchTSUm2p6wluNJqND7-rDYokVTqqFCM7hQ6c,7599
948
950
  datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
949
951
  datahub/telemetry/stats.py,sha256=TwaQisQlD2Bk0uw__pP6u3Ovz9r-Ip4pCwpnto4r5e0,959
@@ -1048,8 +1050,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1048
1050
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1049
1051
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1050
1052
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1051
- acryl_datahub-1.0.0.3rc6.dist-info/METADATA,sha256=ZOld7fZ4VTL1L7DczapOhGc9llaIYX4yscDrNUACf-g,176989
1052
- acryl_datahub-1.0.0.3rc6.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
1053
- acryl_datahub-1.0.0.3rc6.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1054
- acryl_datahub-1.0.0.3rc6.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1055
- acryl_datahub-1.0.0.3rc6.dist-info/RECORD,,
1053
+ acryl_datahub-1.0.0.3rc8.dist-info/METADATA,sha256=d-sEix3GRzPU8h4yA867OWhRXlXMRTsHYGGBtfBuk-c,176989
1054
+ acryl_datahub-1.0.0.3rc8.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
1055
+ acryl_datahub-1.0.0.3rc8.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1056
+ acryl_datahub-1.0.0.3rc8.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1057
+ acryl_datahub-1.0.0.3rc8.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (79.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.0.0.3rc6"
3
+ __version__ = "1.0.0.3rc8"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
datahub/emitter/mcp.py CHANGED
@@ -1,9 +1,11 @@
1
1
  import dataclasses
2
2
  import json
3
+ import warnings
3
4
  from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union
4
5
 
5
6
  from datahub.emitter.aspect import ASPECT_MAP, JSON_CONTENT_TYPE
6
7
  from datahub.emitter.serialization_helper import post_json_transform, pre_json_transform
8
+ from datahub.errors import DataHubDeprecationWarning
7
9
  from datahub.metadata.schema_classes import (
8
10
  ChangeTypeClass,
9
11
  DictWrapper,
@@ -75,13 +77,22 @@ class MetadataChangeProposalWrapper:
75
77
  if self.entityUrn and self.entityType == _ENTITY_TYPE_UNSET:
76
78
  self.entityType = guess_entity_type(self.entityUrn)
77
79
  elif self.entityUrn and self.entityType:
78
- guessed_entity_type = guess_entity_type(self.entityUrn).lower()
79
- # Entity type checking is actually case insensitive.
80
- # Note that urns are case sensitive, but entity types are not.
81
- if self.entityType.lower() != guessed_entity_type:
80
+ guessed_entity_type = guess_entity_type(self.entityUrn)
81
+ if self.entityType.lower() != guessed_entity_type.lower():
82
+ # If they aren't a case-ignored match, raise an error.
82
83
  raise ValueError(
83
84
  f"entityType {self.entityType} does not match the entity type {guessed_entity_type} from entityUrn {self.entityUrn}",
84
85
  )
86
+ elif self.entityType != guessed_entity_type:
87
+ # If they only differ in case, normalize and print a warning.
88
+ self.entityType = guessed_entity_type
89
+ warnings.warn(
90
+ f"The passed entityType {self.entityType} differs in case from the expected entity type {guessed_entity_type}. "
91
+ "This will be automatically corrected for now, but will become an error in a future release. "
92
+ "Note that the entityType field is optional and will be automatically inferred from the entityUrn.",
93
+ DataHubDeprecationWarning,
94
+ stacklevel=3,
95
+ )
85
96
  elif self.entityType == _ENTITY_TYPE_UNSET:
86
97
  raise ValueError("entityType must be set if entityUrn is not set")
87
98
 
datahub/errors.py CHANGED
@@ -41,3 +41,7 @@ class ExperimentalWarning(Warning):
41
41
 
42
42
  class APITracingWarning(Warning):
43
43
  pass
44
+
45
+
46
+ class DataHubDeprecationWarning(DeprecationWarning):
47
+ pass
@@ -54,7 +54,7 @@ class FivetranLogAPI:
54
54
  snowflake_destination_config.database,
55
55
  )
56
56
  )
57
- fivetran_log_query.set_db(
57
+ fivetran_log_query.set_schema(
58
58
  snowflake_destination_config.log_schema,
59
59
  )
60
60
  fivetran_log_database = snowflake_destination_config.database
@@ -66,8 +66,12 @@ class FivetranLogAPI:
66
66
  engine = create_engine(
67
67
  bigquery_destination_config.get_sql_alchemy_url(),
68
68
  )
69
- fivetran_log_query.set_db(bigquery_destination_config.dataset)
70
- fivetran_log_database = bigquery_destination_config.dataset
69
+ fivetran_log_query.set_schema(bigquery_destination_config.dataset)
70
+
71
+ # The "database" should be the BigQuery project name.
72
+ fivetran_log_database = engine.execute(
73
+ "SELECT @@project_id"
74
+ ).fetchone()[0]
71
75
  else:
72
76
  raise ConfigurationError(
73
77
  f"Destination platform '{destination_platform}' is not yet supported."
@@ -12,14 +12,14 @@ class FivetranLogQuery:
12
12
 
13
13
  def __init__(self) -> None:
14
14
  # Select query db clause
15
- self.db_clause: str = ""
16
-
17
- def set_db(self, db_name: str) -> None:
18
- self.db_clause = f"{db_name}."
15
+ self.schema_clause: str = ""
19
16
 
20
17
  def use_database(self, db_name: str) -> str:
21
18
  return f"use database {db_name}"
22
19
 
20
+ def set_schema(self, schema_name: str) -> None:
21
+ self.schema_clause = f"{schema_name}."
22
+
23
23
  def get_connectors_query(self) -> str:
24
24
  return f"""\
25
25
  SELECT
@@ -30,7 +30,7 @@ SELECT
30
30
  paused,
31
31
  sync_frequency,
32
32
  destination_id
33
- FROM {self.db_clause}connector
33
+ FROM {self.schema_clause}connector
34
34
  WHERE
35
35
  _fivetran_deleted = FALSE
36
36
  QUALIFY ROW_NUMBER() OVER (PARTITION BY connector_id ORDER BY _fivetran_synced DESC) = 1
@@ -42,7 +42,7 @@ SELECT id as user_id,
42
42
  given_name,
43
43
  family_name,
44
44
  email
45
- FROM {self.db_clause}user
45
+ FROM {self.schema_clause}user
46
46
  """
47
47
 
48
48
  def get_sync_logs_query(
@@ -62,7 +62,7 @@ WITH ranked_syncs AS (
62
62
  MAX(CASE WHEN message_event = 'sync_end' THEN time_stamp END) as end_time,
63
63
  MAX(CASE WHEN message_event = 'sync_end' THEN message_data END) as end_message_data,
64
64
  ROW_NUMBER() OVER (PARTITION BY connector_id ORDER BY MAX(time_stamp) DESC) as rn
65
- FROM {self.db_clause}log
65
+ FROM {self.schema_clause}log
66
66
  WHERE message_event in ('sync_start', 'sync_end')
67
67
  AND time_stamp > CURRENT_TIMESTAMP - INTERVAL '{syncs_interval} days'
68
68
  AND connector_id IN ({formatted_connector_ids})
@@ -99,11 +99,11 @@ FROM (
99
99
  dsm.name as destination_schema_name,
100
100
  tl.created_at as created_at,
101
101
  ROW_NUMBER() OVER (PARTITION BY stm.connector_id, stm.id, dtm.id ORDER BY tl.created_at DESC) as table_combo_rn
102
- FROM {self.db_clause}table_lineage as tl
103
- JOIN {self.db_clause}source_table_metadata as stm on tl.source_table_id = stm.id
104
- JOIN {self.db_clause}destination_table_metadata as dtm on tl.destination_table_id = dtm.id
105
- JOIN {self.db_clause}source_schema_metadata as ssm on stm.schema_id = ssm.id
106
- JOIN {self.db_clause}destination_schema_metadata as dsm on dtm.schema_id = dsm.id
102
+ FROM {self.schema_clause}table_lineage as tl
103
+ JOIN {self.schema_clause}source_table_metadata as stm on tl.source_table_id = stm.id
104
+ JOIN {self.schema_clause}destination_table_metadata as dtm on tl.destination_table_id = dtm.id
105
+ JOIN {self.schema_clause}source_schema_metadata as ssm on stm.schema_id = ssm.id
106
+ JOIN {self.schema_clause}destination_schema_metadata as dsm on dtm.schema_id = dsm.id
107
107
  WHERE stm.connector_id IN ({formatted_connector_ids})
108
108
  )
109
109
  -- Ensure that we only get back one entry per source and destination pair.
@@ -131,13 +131,13 @@ FROM (
131
131
  dcm.name as destination_column_name,
132
132
  cl.created_at as created_at,
133
133
  ROW_NUMBER() OVER (PARTITION BY stm.connector_id, cl.source_column_id, cl.destination_column_id ORDER BY cl.created_at DESC) as column_combo_rn
134
- FROM {self.db_clause}column_lineage as cl
135
- JOIN {self.db_clause}source_column_metadata as scm
134
+ FROM {self.schema_clause}column_lineage as cl
135
+ JOIN {self.schema_clause}source_column_metadata as scm
136
136
  ON cl.source_column_id = scm.id
137
- JOIN {self.db_clause}destination_column_metadata as dcm
137
+ JOIN {self.schema_clause}destination_column_metadata as dcm
138
138
  ON cl.destination_column_id = dcm.id
139
139
  -- Only joining source_table_metadata to get the connector_id.
140
- JOIN {self.db_clause}source_table_metadata as stm
140
+ JOIN {self.schema_clause}source_table_metadata as stm
141
141
  ON scm.table_id = stm.id
142
142
  WHERE stm.connector_id IN ({formatted_connector_ids})
143
143
  )
@@ -16,7 +16,7 @@ from pyiceberg.exceptions import (
16
16
  )
17
17
  from pyiceberg.schema import Schema, SchemaVisitorPerPrimitiveType, visit
18
18
  from pyiceberg.table import Table
19
- from pyiceberg.typedef import Identifier
19
+ from pyiceberg.typedef import Identifier, Properties
20
20
  from pyiceberg.types import (
21
21
  BinaryType,
22
22
  BooleanType,
@@ -387,8 +387,13 @@ class IcebergSource(StatefulIngestionSourceBase):
387
387
  env=self.config.env,
388
388
  )
389
389
  )
390
+ namespace_properties: Properties = catalog.load_namespace_properties(
391
+ namespace
392
+ )
390
393
  namespaces.append((namespace, namespace_urn))
391
- for aspect in self._create_iceberg_namespace_aspects(namespace):
394
+ for aspect in self._create_iceberg_namespace_aspects(
395
+ namespace, namespace_properties
396
+ ):
392
397
  yield stamping_processor.stamp_wu(
393
398
  MetadataChangeProposalWrapper(
394
399
  entityUrn=namespace_urn, aspect=aspect
@@ -608,12 +613,23 @@ class IcebergSource(StatefulIngestionSourceBase):
608
613
  return self.report
609
614
 
610
615
  def _create_iceberg_namespace_aspects(
611
- self, namespace: Identifier
616
+ self, namespace: Identifier, properties: Properties
612
617
  ) -> Iterable[_Aspect]:
613
618
  namespace_repr = ".".join(namespace)
619
+ custom_properties: Dict[str, str] = {}
620
+ for k, v in properties.items():
621
+ try:
622
+ custom_properties[str(k)] = str(v)
623
+ except Exception as e:
624
+ LOGGER.warning(
625
+ f"Exception when trying to parse namespace properties for {namespace_repr}. Exception: {e}"
626
+ )
614
627
  yield Status(removed=False)
615
628
  yield ContainerProperties(
616
- name=namespace_repr, qualifiedName=namespace_repr, env=self.config.env
629
+ name=namespace_repr,
630
+ qualifiedName=namespace_repr,
631
+ env=self.config.env,
632
+ customProperties=custom_properties,
617
633
  )
618
634
  yield SubTypes(typeNames=[DatasetContainerSubTypes.NAMESPACE])
619
635
  dpi = self._get_dataplatform_instance_aspect()
@@ -71,14 +71,6 @@ class SnowflakeQuery:
71
71
  def current_warehouse() -> str:
72
72
  return "select CURRENT_WAREHOUSE()"
73
73
 
74
- @staticmethod
75
- def current_database() -> str:
76
- return "select CURRENT_DATABASE()"
77
-
78
- @staticmethod
79
- def current_schema() -> str:
80
- return "select CURRENT_SCHEMA()"
81
-
82
74
  @staticmethod
83
75
  def show_databases() -> str:
84
76
  return "show databases"
@@ -107,8 +99,8 @@ class SnowflakeQuery:
107
99
  order by database_name"""
108
100
 
109
101
  @staticmethod
110
- def schemas_for_database(db_name: Optional[str]) -> str:
111
- db_clause = f'"{db_name}".' if db_name is not None else ""
102
+ def schemas_for_database(db_name: str) -> str:
103
+ db_clause = f'"{db_name}".'
112
104
  return f"""
113
105
  SELECT schema_name AS "SCHEMA_NAME",
114
106
  created AS "CREATED",
@@ -119,8 +111,8 @@ class SnowflakeQuery:
119
111
  order by schema_name"""
120
112
 
121
113
  @staticmethod
122
- def tables_for_database(db_name: Optional[str]) -> str:
123
- db_clause = f'"{db_name}".' if db_name is not None else ""
114
+ def tables_for_database(db_name: str) -> str:
115
+ db_clause = f'"{db_name}".'
124
116
  return f"""
125
117
  SELECT table_catalog AS "TABLE_CATALOG",
126
118
  table_schema AS "TABLE_SCHEMA",
@@ -142,8 +134,8 @@ class SnowflakeQuery:
142
134
  order by table_schema, table_name"""
143
135
 
144
136
  @staticmethod
145
- def tables_for_schema(schema_name: str, db_name: Optional[str]) -> str:
146
- db_clause = f'"{db_name}".' if db_name is not None else ""
137
+ def tables_for_schema(schema_name: str, db_name: str) -> str:
138
+ db_clause = f'"{db_name}".'
147
139
  return f"""
148
140
  SELECT table_catalog AS "TABLE_CATALOG",
149
141
  table_schema AS "TABLE_SCHEMA",
@@ -165,8 +157,8 @@ class SnowflakeQuery:
165
157
  order by table_schema, table_name"""
166
158
 
167
159
  @staticmethod
168
- def procedures_for_database(db_name: Optional[str]) -> str:
169
- db_clause = f'"{db_name}".' if db_name is not None else ""
160
+ def procedures_for_database(db_name: str) -> str:
161
+ db_clause = f'"{db_name}".'
170
162
  return f"""
171
163
  SELECT procedure_catalog AS "PROCEDURE_CATALOG",
172
164
  procedure_schema AS "PROCEDURE_SCHEMA",
@@ -382,26 +374,6 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
382
374
  ORDER BY query_start_time DESC
383
375
  ;"""
384
376
 
385
- @staticmethod
386
- def view_dependencies() -> str:
387
- return """
388
- SELECT
389
- concat(
390
- referenced_database, '.', referenced_schema,
391
- '.', referenced_object_name
392
- ) AS "VIEW_UPSTREAM",
393
- referenced_object_domain as "REFERENCED_OBJECT_DOMAIN",
394
- concat(
395
- referencing_database, '.', referencing_schema,
396
- '.', referencing_object_name
397
- ) AS "DOWNSTREAM_VIEW",
398
- referencing_object_domain AS "REFERENCING_OBJECT_DOMAIN"
399
- FROM
400
- snowflake.account_usage.object_dependencies
401
- WHERE
402
- referencing_object_domain in ('VIEW', 'MATERIALIZED VIEW')
403
- """
404
-
405
377
  # Note on use of `upstreams_deny_pattern` to ignore temporary tables:
406
378
  # Snowflake access history may include temporary tables in DIRECT_OBJECTS_ACCESSED and
407
379
  # OBJECTS_MODIFIED->columns->directSources. We do not need these temporary tables and filter these in the query.
@@ -425,32 +397,6 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
425
397
  upstreams_deny_pattern,
426
398
  )
427
399
 
428
- @staticmethod
429
- def view_dependencies_v2() -> str:
430
- return """
431
- SELECT
432
- ARRAY_UNIQUE_AGG(
433
- OBJECT_CONSTRUCT(
434
- 'upstream_object_name', concat(
435
- referenced_database, '.', referenced_schema,
436
- '.', referenced_object_name
437
- ),
438
- 'upstream_object_domain', referenced_object_domain
439
- )
440
- ) as "UPSTREAM_TABLES",
441
- concat(
442
- referencing_database, '.', referencing_schema,
443
- '.', referencing_object_name
444
- ) AS "DOWNSTREAM_TABLE_NAME",
445
- ANY_VALUE(referencing_object_domain) AS "DOWNSTREAM_TABLE_DOMAIN"
446
- FROM
447
- snowflake.account_usage.object_dependencies
448
- WHERE
449
- referencing_object_domain in ('VIEW', 'MATERIALIZED VIEW')
450
- GROUP BY
451
- DOWNSTREAM_TABLE_NAME
452
- """
453
-
454
400
  @staticmethod
455
401
  def show_external_tables() -> str:
456
402
  return "show external tables in account"
@@ -637,8 +637,13 @@ def get_view_definition_patched(self, connection, view_name, schema=None, **kw):
637
637
  self.identifier_preparer.quote_identifier(schema),
638
638
  self.identifier_preparer.quote_identifier(view_name),
639
639
  )
640
- row = connection.execute(f"SHOW CREATE TABLE {full_table}").fetchone()
641
- return row[0]
640
+ # Hive responds to the SHOW CREATE TABLE with the full view DDL,
641
+ # including the view definition. However, for multiline view definitions,
642
+ # it returns multiple rows (of one column each), each with a part of the definition.
643
+ # Any whitespace at the beginning/end of each view definition line is lost.
644
+ rows = connection.execute(f"SHOW CREATE TABLE {full_table}").fetchall()
645
+ parts = [row[0] for row in rows]
646
+ return "\n".join(parts)
642
647
 
643
648
 
644
649
  HiveDialect.get_view_names = get_view_names_patched
datahub/sdk/_shared.py CHANGED
@@ -41,7 +41,7 @@ from datahub.metadata.urns import (
41
41
  Urn,
42
42
  VersionSetUrn,
43
43
  )
44
- from datahub.sdk._utils import add_list_unique, remove_list_unique
44
+ from datahub.sdk._utils import DEFAULT_ACTOR_URN, add_list_unique, remove_list_unique
45
45
  from datahub.sdk.entity import Entity
46
46
  from datahub.utilities.urns.error import InvalidUrnError
47
47
 
@@ -54,8 +54,6 @@ DatajobUrnOrStr: TypeAlias = Union[str, DataJobUrn]
54
54
 
55
55
  ActorUrn: TypeAlias = Union[CorpUserUrn, CorpGroupUrn]
56
56
 
57
- _DEFAULT_ACTOR_URN = CorpUserUrn("__ingestion").urn()
58
-
59
57
  TrainingMetricsInputType: TypeAlias = Union[
60
58
  List[models.MLMetricClass], Dict[str, Optional[str]]
61
59
  ]
@@ -475,7 +473,7 @@ class HasTerms(Entity):
475
473
  def _terms_audit_stamp(self) -> models.AuditStampClass:
476
474
  return models.AuditStampClass(
477
475
  time=0,
478
- actor=_DEFAULT_ACTOR_URN,
476
+ actor=DEFAULT_ACTOR_URN,
479
477
  )
480
478
 
481
479
  def set_terms(self, terms: TermsInputType) -> None:
@@ -563,7 +561,7 @@ class HasInstitutionalMemory(Entity):
563
561
  def _institutional_memory_audit_stamp(self) -> models.AuditStampClass:
564
562
  return models.AuditStampClass(
565
563
  time=0,
566
- actor=_DEFAULT_ACTOR_URN,
564
+ actor=DEFAULT_ACTOR_URN,
567
565
  )
568
566
 
569
567
  @classmethod
datahub/sdk/_utils.py CHANGED
@@ -1,6 +1,10 @@
1
1
  from typing import Any, Callable, List, Protocol, TypeVar
2
2
 
3
3
  from datahub.errors import ItemNotFoundError
4
+ from datahub.metadata.urns import CorpUserUrn
5
+
6
+ # TODO: Change __ingestion to _ingestion.
7
+ DEFAULT_ACTOR_URN = CorpUserUrn("__ingestion").urn()
4
8
 
5
9
 
6
10
  class _SupportsEq(Protocol):
datahub/sdk/dataset.py CHANGED
@@ -87,7 +87,7 @@ def _parse_upstream_input(
87
87
  assert_never(upstream_input)
88
88
 
89
89
 
90
- def _parse_cll_mapping(
90
+ def parse_cll_mapping(
91
91
  *,
92
92
  upstream: DatasetUrnOrStr,
93
93
  downstream: DatasetUrnOrStr,
@@ -142,7 +142,7 @@ def _parse_upstream_lineage_input(
142
142
  )
143
143
  )
144
144
  cll.extend(
145
- _parse_cll_mapping(
145
+ parse_cll_mapping(
146
146
  upstream=dataset_urn,
147
147
  downstream=downstream_urn,
148
148
  cll_mapping=column_lineage,