acryl-datahub 1.0.0.3rc7__py3-none-any.whl → 1.0.0.3rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,9 +1,9 @@
1
- acryl_datahub-1.0.0.3rc7.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.0.0.3rc9.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=n49MhTfoZh6lQvQMYp45DAwrexBWePxkeHMtc0_gSGk,323
4
+ datahub/_version.py,sha256=6XeiyYGjXD3cLrKhOmtOz90fhxaal2Ir3lq-m_cgOes,323
5
5
  datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
6
- datahub/errors.py,sha256=BzKdcmYseHOt36zfjJXc17WNutFhp9Y23cU_L6cIkxc,612
6
+ datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  datahub/_codegen/aspect.py,sha256=PJRa-Z4ouXHq3OkulfyWhwZn-fFUBDK_UPvmqaWdbWk,1063
@@ -48,7 +48,7 @@ datahub/api/entities/dataprocess/dataprocess_instance.py,sha256=IhY-rcXs-r8EatwW
48
48
  datahub/api/entities/dataproduct/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
49
  datahub/api/entities/dataproduct/dataproduct.py,sha256=148TmItxDDyGNzfZdL8aDreSEtyAw79IN8N8oSmNOPE,21461
50
50
  datahub/api/entities/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
- datahub/api/entities/dataset/dataset.py,sha256=UX9UW5z9realL7lPxAhaKKigpVAqtxAfhqS6vY2eDnU,49638
51
+ datahub/api/entities/dataset/dataset.py,sha256=se2tv6jsvS5BmH5b53mKs8lEPF4LNzVRrd9PDovPIQk,49627
52
52
  datahub/api/entities/forms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
53
  datahub/api/entities/forms/forms.py,sha256=17GLVVrunUj6hWS7CADhNPrT4gV6il905Ny_Y_5S5Qc,15889
54
54
  datahub/api/entities/forms/forms_graphql_constants.py,sha256=DKpnKlMKTjmnyrCTvp63V4LX4THGTAMq3ep8THrSGP4,537
@@ -94,7 +94,7 @@ datahub/cli/specific/structuredproperties_cli.py,sha256=Rb06XJKxqda5RSUo188B90Wn
94
94
  datahub/cli/specific/user_cli.py,sha256=jGAokb1NRu8obs6P2g4OL2NQdFgpUBa9De55TBBtun0,1897
95
95
  datahub/configuration/__init__.py,sha256=5TN3a7CWNsLRHpdj-sv2bxKWF2IslvJwE6EpNMFrIS4,123
96
96
  datahub/configuration/_config_enum.py,sha256=ul2hr5gMmdLvBINicFkMNMi1ApmnmZSwNdUYYted5nk,1447
97
- datahub/configuration/common.py,sha256=2AUcZD8c4W8gdopt07UcQYPLl0DR-O7Spb-LUqguJXc,10619
97
+ datahub/configuration/common.py,sha256=0OXk_yhyVJroP47Nant5-5fRWOahkVB8YU4G70TR9BI,10692
98
98
  datahub/configuration/config_loader.py,sha256=hRzPFxkz-w9IqkpSa5vwCzSra1p49DyfeJNeyqGa8-4,6827
99
99
  datahub/configuration/connection_resolver.py,sha256=n4-6MwMiOEDgTouxO0SMjTILKVhJPo6-naE6FuR5qMs,1516
100
100
  datahub/configuration/datetimes.py,sha256=nayNc0mmlVKH6oVv9ud6C1dDUiZPGabW-YZxvrkosPg,2870
@@ -121,7 +121,7 @@ datahub/emitter/enum_helpers.py,sha256=QBOEUu_hDCvyL_v4ayNQV8XwJbf5zKyu0Xat0mI1K
121
121
  datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
122
122
  datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
123
123
  datahub/emitter/mce_builder.py,sha256=i-iLLdnuy7h1JrzwC2sCtQthbso-cNj1uijOQZKHbeA,16717
124
- datahub/emitter/mcp.py,sha256=v7tKlIFX4s7f77KQYeFww8QbOQu6-qU609VeQiUkcsY,9796
124
+ datahub/emitter/mcp.py,sha256=u6LphyhpbdFqboTAL_9MzXhGjc45o_BePoDFBkEEYWo,10484
125
125
  datahub/emitter/mcp_builder.py,sha256=8IwJAlolQkPpMqQJPLtGrsUqAcuFNs98nrI5iYUxgaU,11920
126
126
  datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
127
127
  datahub/emitter/request_helper.py,sha256=HpI9a9W0TzoVbrs584rF8P8w-IT_iKLmvYmO_6IHhXs,1008
@@ -146,7 +146,7 @@ datahub/ingestion/api/report.py,sha256=eM_TWWz6iJNd-c_S2_4eg2qKLGYP8vSROb_TMiCwB
146
146
  datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
147
147
  datahub/ingestion/api/sink.py,sha256=nfal7nsYY1AT2WQRjqO48uAHitpjax7TsRVzYXnqbeM,4918
148
148
  datahub/ingestion/api/source.py,sha256=2h7Tx2As0gu5-6d7PiRuJ8myr_y3MRx2YYgH735Jj18,19494
149
- datahub/ingestion/api/source_helpers.py,sha256=tzFl2sP-sFOZWX0O0eEQK-0tJcJ5vMs2bccpKRKBIFY,21081
149
+ datahub/ingestion/api/source_helpers.py,sha256=XT9y5HgfVeF52jrX39vlLn1SdXpLVyT2Su8oGNsddYo,21148
150
150
  datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
151
151
  datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL74lo,6327
152
152
  datahub/ingestion/api/auto_work_units/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -312,8 +312,8 @@ datahub/ingestion/source/fivetran/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
312
312
  datahub/ingestion/source/fivetran/config.py,sha256=BP3KRfAQ6H5qyEeJNu9vNfZNwLoyj4Tl2kXiLVR5DNM,9027
313
313
  datahub/ingestion/source/fivetran/data_classes.py,sha256=ecdUJH5BEze0yv-uFpKWPNaNmV1gORDA2XMFk0zhcBw,595
314
314
  datahub/ingestion/source/fivetran/fivetran.py,sha256=avP54ePLFVpkKVv8tr6mzC7dniTmZbKn13LP0-Ohj9k,13821
315
- datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=DBS5qDRsXvJzR-x6euaR5Ss3yrV76lswcweI1XwmABw,12769
316
- datahub/ingestion/source/fivetran/fivetran_query.py,sha256=vLrTj7e-0NxZ2U4bWTB57pih42WirqPlUvwtIRfStlQ,5275
315
+ datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=5bmG705tCOaKAIjnomJfGCyCwKc7NahtW-rMP-4YifU,12902
316
+ datahub/ingestion/source/fivetran/fivetran_query.py,sha256=c1begMnLtWoLBmaKBiarpMZ6HTVAI6hDPKn26DYuMYI,5343
317
317
  datahub/ingestion/source/gc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
318
318
  datahub/ingestion/source/gc/datahub_gc.py,sha256=EXO-Stj6gGMLTSTbSBC-C3_zpjpQtFN9pAMWR95ma0I,12830
319
319
  datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=mUWcMt-_FL1SYGIgI4lGZDZGXspUUTv__5GN1W2oJ3s,17118
@@ -445,19 +445,19 @@ datahub/ingestion/source/sigma/data_classes.py,sha256=YZkkzwftV34mq5c_4jlC2PCSiR
445
445
  datahub/ingestion/source/sigma/sigma.py,sha256=ZtPj8eu6hcJxyFcWizob4kRaxrpcqsWzh__lmuVZdt8,25212
446
446
  datahub/ingestion/source/sigma/sigma_api.py,sha256=7PK5AQa838hYeaQ5L0dioi4n4bLrpN-r7COKTTNUYw8,19837
447
447
  datahub/ingestion/source/slack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
448
- datahub/ingestion/source/slack/slack.py,sha256=3N7Yp-u9DvBmo536Z6-pQTrJgSJ3i742GePSgjlBOUU,27616
448
+ datahub/ingestion/source/slack/slack.py,sha256=B_HyRlhY2VWjMHDspQZqqejvVhnTREFx5B2GNa0jqzE,25903
449
449
  datahub/ingestion/source/snowflake/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
450
450
  datahub/ingestion/source/snowflake/constants.py,sha256=XCW3vw4JfLn_s8-oXBX6WFNMPOkX5qAQLFX2KqagojQ,2678
451
451
  datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
452
452
  datahub/ingestion/source/snowflake/oauth_generator.py,sha256=fu2VnREGuJXeTqIV2jx4TwieVnznf83HQkrE0h2DGGM,3423
453
453
  datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81xxdeizJn9nJCZ_nMIXgk9N6pEk5o,4803
454
454
  datahub/ingestion/source/snowflake/snowflake_config.py,sha256=SD2agFE64WgEDbQHPXQjAIP4gsHT1G9H8X_r-RvKGas,20804
455
- datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=pEw2O9xoTSIWDiROlkF8k4oj5zBjkqTnynLvut08yhc,17796
455
+ datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=50fHPCzOny-3Mh_DxJU83mPTww5m5T48pzqb8m6QYuo,18491
456
456
  datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
457
457
  datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=FBmiONx4EGHWV8RNJT6zHZyntKinPFFyd2oKbTUIbhE,21319
458
458
  datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
459
459
  datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=PY4Wy6i89nqRl92ARwXNqWwm-ifagkKbKKtxYWeswkk,29209
460
- datahub/ingestion/source/snowflake/snowflake_query.py,sha256=JtTrfzGqM9mk2Fr-F1X0KXzc_8ot7rD3dD2vPEuzd0E,40411
460
+ datahub/ingestion/source/snowflake/snowflake_query.py,sha256=OJDF4x5OCa0PTTdkVdO5_Wzox4039Vhf7zwDoHHW-W4,38410
461
461
  datahub/ingestion/source/snowflake/snowflake_report.py,sha256=O-465aBA8uaYZ6WepP7i6cgK6Q1jXJPjDA1j9C8klus,6762
462
462
  datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=1yGBbs2aWIdHnrwgeTR7J2lqxbbBsIt8ejCLumIpLEA,27274
463
463
  datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=_37-AQyI4uGt4fu-d3v2eAWzQ3uG835ZQxMjFwGYCng,57193
@@ -473,7 +473,7 @@ datahub/ingestion/source/sql/clickhouse.py,sha256=uSRy-HKAiGFTHVLoVtGoh23X0O1lwy
473
473
  datahub/ingestion/source/sql/cockroachdb.py,sha256=XaD7eae34plU9ISRC6PzYX9q6RdT2qkzjH6CpTOgkx4,1443
474
474
  datahub/ingestion/source/sql/druid.py,sha256=IjGZdntb5hubkIzzT9qDRDpyfbckEg2GwRncvC5mDSs,2722
475
475
  datahub/ingestion/source/sql/hana.py,sha256=0PIvcX0Rz59NyR7Ag5Bv1MBV_UbJwxl9UAopo_xe_CA,1342
476
- datahub/ingestion/source/sql/hive.py,sha256=n0XCGkNkVAe-TEyXbxlefvohbmtALbWaC1a0_B9rlG8,30670
476
+ datahub/ingestion/source/sql/hive.py,sha256=voQl6QjHUXPdx7LPONuHiFavW9nRKMjHZx7o3vJQG7A,31034
477
477
  datahub/ingestion/source/sql/hive_metastore.py,sha256=HW0zoHKarBYb8oVCy5fHvPOn-pTo25LctW_AusmH0hQ,36252
478
478
  datahub/ingestion/source/sql/mariadb.py,sha256=Hm102kmfs_1rd4lsTYhzVMZq5S3B6cyfvpHSzJjqvMw,737
479
479
  datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_k94o7g-0,3350
@@ -924,7 +924,7 @@ datahub/specific/chart.py,sha256=NPdzDHcZkPodthOn9c8QF_aDEo2y4lCJ4t1sI556uZ0,668
924
924
  datahub/specific/dashboard.py,sha256=3AsXZ1Cp03uaTHsOmJqEiXzJjZUBgDbX-zmgwMw908o,11514
925
925
  datahub/specific/datajob.py,sha256=yZXic3CuCGxg-ewnaHphoH9Jjpe-P09XbN7T-rrvkyE,13711
926
926
  datahub/specific/dataproduct.py,sha256=SrBNYACDVatuXA64GCHA0Igaes24ajJqTcXmDTT1FPA,2877
927
- datahub/specific/dataset.py,sha256=EhSjarFfvxF-JbVuNBNIcV1pEebqAdcYHDZIhnG_8sk,9714
927
+ datahub/specific/dataset.py,sha256=E4XTS4T1c5mjdDn0sTAZsXJ4eK-REV3wwhSxTQkjWJs,10067
928
928
  datahub/specific/form.py,sha256=hbxmmBWHma0d4NCZEGR6Nr6R-5A5gYgl1mmkGgnM97o,3834
929
929
  datahub/specific/structured_property.py,sha256=NZ2yppDgtqrH04Wn3_m5IupyLeWoBCMygyr3nEi1A3o,4038
930
930
  datahub/specific/aspect_helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -1050,8 +1050,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1050
1050
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1051
1051
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1052
1052
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1053
- acryl_datahub-1.0.0.3rc7.dist-info/METADATA,sha256=cS9ynN7khxxthJ9wKCRUKd8-_EHBy-cjhxBPx-KqCu0,176989
1054
- acryl_datahub-1.0.0.3rc7.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
1055
- acryl_datahub-1.0.0.3rc7.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1056
- acryl_datahub-1.0.0.3rc7.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1057
- acryl_datahub-1.0.0.3rc7.dist-info/RECORD,,
1053
+ acryl_datahub-1.0.0.3rc9.dist-info/METADATA,sha256=c2nBayXTv9LuBzQY7HuANB9MbIfnnA0H6z_DUTq4GKE,176989
1054
+ acryl_datahub-1.0.0.3rc9.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
1055
+ acryl_datahub-1.0.0.3rc9.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1056
+ acryl_datahub-1.0.0.3rc9.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1057
+ acryl_datahub-1.0.0.3rc9.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (79.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.0.0.3rc7"
3
+ __version__ = "1.0.0.3rc9"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -509,16 +509,14 @@ class Dataset(StrictModel):
509
509
  def generate_mcp(
510
510
  self,
511
511
  ) -> Iterable[Union[MetadataChangeProposalClass, MetadataChangeProposalWrapper]]:
512
- mcp = MetadataChangeProposalWrapper(
513
- entityUrn=self.urn,
514
- aspect=DatasetPropertiesClass(
515
- description=self.description,
516
- name=self.name,
517
- customProperties=self.properties,
518
- externalUrl=self.external_url,
519
- ),
520
- )
521
- yield mcp
512
+ patch_builder = self.patch_builder()
513
+
514
+ patch_builder.set_custom_properties(self.properties or {})
515
+ patch_builder.set_description(self.description)
516
+ patch_builder.set_display_name(self.name)
517
+ patch_builder.set_external_url(self.external_url)
518
+
519
+ yield from patch_builder.build()
522
520
 
523
521
  if self.schema_metadata:
524
522
  schema_fields = set()
@@ -981,7 +979,7 @@ class Dataset(StrictModel):
981
979
 
982
980
  def model_dump(self, **kwargs):
983
981
  """Custom model_dump method for Pydantic v2 to handle YAML serialization properly."""
984
- exclude = kwargs.pop("exclude", set())
982
+ exclude = kwargs.pop("exclude", None) or set()
985
983
 
986
984
  # If id and name are identical, exclude name from the output
987
985
  if self.id == self.name and self.id is not None:
@@ -33,10 +33,15 @@ REDACT_KEYS = {
33
33
  }
34
34
  REDACT_SUFFIXES = {
35
35
  "_password",
36
+ "-password",
36
37
  "_secret",
38
+ "-secret",
37
39
  "_token",
40
+ "-token",
38
41
  "_key",
42
+ "-key",
39
43
  "_key_id",
44
+ "-key-id",
40
45
  }
41
46
 
42
47
 
datahub/emitter/mcp.py CHANGED
@@ -1,9 +1,11 @@
1
1
  import dataclasses
2
2
  import json
3
+ import warnings
3
4
  from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union
4
5
 
5
6
  from datahub.emitter.aspect import ASPECT_MAP, JSON_CONTENT_TYPE
6
7
  from datahub.emitter.serialization_helper import post_json_transform, pre_json_transform
8
+ from datahub.errors import DataHubDeprecationWarning
7
9
  from datahub.metadata.schema_classes import (
8
10
  ChangeTypeClass,
9
11
  DictWrapper,
@@ -75,13 +77,22 @@ class MetadataChangeProposalWrapper:
75
77
  if self.entityUrn and self.entityType == _ENTITY_TYPE_UNSET:
76
78
  self.entityType = guess_entity_type(self.entityUrn)
77
79
  elif self.entityUrn and self.entityType:
78
- guessed_entity_type = guess_entity_type(self.entityUrn).lower()
79
- # Entity type checking is actually case insensitive.
80
- # Note that urns are case sensitive, but entity types are not.
81
- if self.entityType.lower() != guessed_entity_type:
80
+ guessed_entity_type = guess_entity_type(self.entityUrn)
81
+ if self.entityType.lower() != guessed_entity_type.lower():
82
+ # If they aren't a case-ignored match, raise an error.
82
83
  raise ValueError(
83
84
  f"entityType {self.entityType} does not match the entity type {guessed_entity_type} from entityUrn {self.entityUrn}",
84
85
  )
86
+ elif self.entityType != guessed_entity_type:
87
+ # If they only differ in case, normalize and print a warning.
88
+ self.entityType = guessed_entity_type
89
+ warnings.warn(
90
+ f"The passed entityType {self.entityType} differs in case from the expected entity type {guessed_entity_type}. "
91
+ "This will be automatically corrected for now, but will become an error in a future release. "
92
+ "Note that the entityType field is optional and will be automatically inferred from the entityUrn.",
93
+ DataHubDeprecationWarning,
94
+ stacklevel=3,
95
+ )
85
96
  elif self.entityType == _ENTITY_TYPE_UNSET:
86
97
  raise ValueError("entityType must be set if entityUrn is not set")
87
98
 
datahub/errors.py CHANGED
@@ -41,3 +41,7 @@ class ExperimentalWarning(Warning):
41
41
 
42
42
  class APITracingWarning(Warning):
43
43
  pass
44
+
45
+
46
+ class DataHubDeprecationWarning(DeprecationWarning):
47
+ pass
@@ -92,6 +92,7 @@ def create_dataset_props_patch_builder(
92
92
  patch_builder.set_last_modified(dataset_properties.lastModified)
93
93
  patch_builder.set_qualified_name(dataset_properties.qualifiedName)
94
94
  patch_builder.add_custom_properties(dataset_properties.customProperties)
95
+ patch_builder.set_external_url(dataset_properties.externalUrl)
95
96
 
96
97
  return patch_builder
97
98
 
@@ -54,7 +54,7 @@ class FivetranLogAPI:
54
54
  snowflake_destination_config.database,
55
55
  )
56
56
  )
57
- fivetran_log_query.set_db(
57
+ fivetran_log_query.set_schema(
58
58
  snowflake_destination_config.log_schema,
59
59
  )
60
60
  fivetran_log_database = snowflake_destination_config.database
@@ -66,8 +66,12 @@ class FivetranLogAPI:
66
66
  engine = create_engine(
67
67
  bigquery_destination_config.get_sql_alchemy_url(),
68
68
  )
69
- fivetran_log_query.set_db(bigquery_destination_config.dataset)
70
- fivetran_log_database = bigquery_destination_config.dataset
69
+ fivetran_log_query.set_schema(bigquery_destination_config.dataset)
70
+
71
+ # The "database" should be the BigQuery project name.
72
+ fivetran_log_database = engine.execute(
73
+ "SELECT @@project_id"
74
+ ).fetchone()[0]
71
75
  else:
72
76
  raise ConfigurationError(
73
77
  f"Destination platform '{destination_platform}' is not yet supported."
@@ -12,14 +12,14 @@ class FivetranLogQuery:
12
12
 
13
13
  def __init__(self) -> None:
14
14
  # Select query db clause
15
- self.db_clause: str = ""
16
-
17
- def set_db(self, db_name: str) -> None:
18
- self.db_clause = f"{db_name}."
15
+ self.schema_clause: str = ""
19
16
 
20
17
  def use_database(self, db_name: str) -> str:
21
18
  return f"use database {db_name}"
22
19
 
20
+ def set_schema(self, schema_name: str) -> None:
21
+ self.schema_clause = f"{schema_name}."
22
+
23
23
  def get_connectors_query(self) -> str:
24
24
  return f"""\
25
25
  SELECT
@@ -30,7 +30,7 @@ SELECT
30
30
  paused,
31
31
  sync_frequency,
32
32
  destination_id
33
- FROM {self.db_clause}connector
33
+ FROM {self.schema_clause}connector
34
34
  WHERE
35
35
  _fivetran_deleted = FALSE
36
36
  QUALIFY ROW_NUMBER() OVER (PARTITION BY connector_id ORDER BY _fivetran_synced DESC) = 1
@@ -42,7 +42,7 @@ SELECT id as user_id,
42
42
  given_name,
43
43
  family_name,
44
44
  email
45
- FROM {self.db_clause}user
45
+ FROM {self.schema_clause}user
46
46
  """
47
47
 
48
48
  def get_sync_logs_query(
@@ -62,7 +62,7 @@ WITH ranked_syncs AS (
62
62
  MAX(CASE WHEN message_event = 'sync_end' THEN time_stamp END) as end_time,
63
63
  MAX(CASE WHEN message_event = 'sync_end' THEN message_data END) as end_message_data,
64
64
  ROW_NUMBER() OVER (PARTITION BY connector_id ORDER BY MAX(time_stamp) DESC) as rn
65
- FROM {self.db_clause}log
65
+ FROM {self.schema_clause}log
66
66
  WHERE message_event in ('sync_start', 'sync_end')
67
67
  AND time_stamp > CURRENT_TIMESTAMP - INTERVAL '{syncs_interval} days'
68
68
  AND connector_id IN ({formatted_connector_ids})
@@ -99,11 +99,11 @@ FROM (
99
99
  dsm.name as destination_schema_name,
100
100
  tl.created_at as created_at,
101
101
  ROW_NUMBER() OVER (PARTITION BY stm.connector_id, stm.id, dtm.id ORDER BY tl.created_at DESC) as table_combo_rn
102
- FROM {self.db_clause}table_lineage as tl
103
- JOIN {self.db_clause}source_table_metadata as stm on tl.source_table_id = stm.id
104
- JOIN {self.db_clause}destination_table_metadata as dtm on tl.destination_table_id = dtm.id
105
- JOIN {self.db_clause}source_schema_metadata as ssm on stm.schema_id = ssm.id
106
- JOIN {self.db_clause}destination_schema_metadata as dsm on dtm.schema_id = dsm.id
102
+ FROM {self.schema_clause}table_lineage as tl
103
+ JOIN {self.schema_clause}source_table_metadata as stm on tl.source_table_id = stm.id
104
+ JOIN {self.schema_clause}destination_table_metadata as dtm on tl.destination_table_id = dtm.id
105
+ JOIN {self.schema_clause}source_schema_metadata as ssm on stm.schema_id = ssm.id
106
+ JOIN {self.schema_clause}destination_schema_metadata as dsm on dtm.schema_id = dsm.id
107
107
  WHERE stm.connector_id IN ({formatted_connector_ids})
108
108
  )
109
109
  -- Ensure that we only get back one entry per source and destination pair.
@@ -131,13 +131,13 @@ FROM (
131
131
  dcm.name as destination_column_name,
132
132
  cl.created_at as created_at,
133
133
  ROW_NUMBER() OVER (PARTITION BY stm.connector_id, cl.source_column_id, cl.destination_column_id ORDER BY cl.created_at DESC) as column_combo_rn
134
- FROM {self.db_clause}column_lineage as cl
135
- JOIN {self.db_clause}source_column_metadata as scm
134
+ FROM {self.schema_clause}column_lineage as cl
135
+ JOIN {self.schema_clause}source_column_metadata as scm
136
136
  ON cl.source_column_id = scm.id
137
- JOIN {self.db_clause}destination_column_metadata as dcm
137
+ JOIN {self.schema_clause}destination_column_metadata as dcm
138
138
  ON cl.destination_column_id = dcm.id
139
139
  -- Only joining source_table_metadata to get the connector_id.
140
- JOIN {self.db_clause}source_table_metadata as stm
140
+ JOIN {self.schema_clause}source_table_metadata as stm
141
141
  ON scm.table_id = stm.id
142
142
  WHERE stm.connector_id IN ({formatted_connector_ids})
143
143
  )
@@ -1,6 +1,5 @@
1
1
  import json
2
2
  import logging
3
- import textwrap
4
3
  from dataclasses import dataclass
5
4
  from typing import Any, Dict, Iterable, List, Optional, Tuple
6
5
 
@@ -613,6 +612,10 @@ class SlackSource(StatefulIngestionSourceBase):
613
612
  ),
614
613
  )
615
614
 
615
+ @retry(
616
+ wait=wait_exponential(multiplier=2, min=4, max=60),
617
+ before_sleep=before_sleep_log(logger, logging.ERROR, True),
618
+ )
616
619
  def get_user_to_be_updated(
617
620
  self,
618
621
  ) -> Iterable[Tuple[CorpUser, Optional[CorpUserEditableInfoClass]]]:
@@ -634,56 +637,5 @@ class SlackSource(StatefulIngestionSourceBase):
634
637
  if user_obj.email is not None:
635
638
  yield (user_obj, editable_properties)
636
639
 
637
- @retry(
638
- wait=wait_exponential(multiplier=2, min=4, max=60),
639
- before_sleep=before_sleep_log(logger, logging.ERROR, True),
640
- )
641
- def get_user_to_be_updated_oss(self) -> Iterable[CorpUser]:
642
- graphql_query = textwrap.dedent(
643
- """
644
- query listUsers($input: ListUsersInput!) {
645
- listUsers(input: $input) {
646
- total
647
- users {
648
- urn
649
- editableProperties {
650
- email
651
- slack
652
- }
653
- }
654
- }
655
- }
656
- """
657
- )
658
- start = 0
659
- count = 10
660
- total = count
661
-
662
- assert self.ctx.graph is not None
663
-
664
- while start < total:
665
- variables = {"input": {"start": start, "count": count}}
666
- response = self.ctx.graph.execute_graphql(
667
- query=graphql_query, variables=variables
668
- )
669
- list_users = response.get("listUsers", {})
670
- total = list_users.get("total", 0)
671
- users = list_users.get("users", [])
672
- for user in users:
673
- user_obj = CorpUser()
674
- editable_properties = user.get("editableProperties", {})
675
- user_obj.urn = user.get("urn")
676
- if user_obj.urn is None:
677
- continue
678
- if editable_properties is not None:
679
- user_obj.email = editable_properties.get("email")
680
- if user_obj.email is None:
681
- urn_id = Urn.from_string(user_obj.urn).get_entity_id_as_string()
682
- if "@" in urn_id:
683
- user_obj.email = urn_id
684
- if user_obj.email is not None:
685
- yield user_obj
686
- start += count
687
-
688
640
  def get_report(self) -> SourceReport:
689
641
  return self.report
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import threading
2
3
  from typing import Any, Dict, Optional
3
4
 
4
5
  import pydantic
@@ -402,13 +403,30 @@ class SnowflakeConnection(Closeable):
402
403
  def __init__(self, connection: NativeSnowflakeConnection):
403
404
  self._connection = connection
404
405
 
406
+ self._query_num_lock = threading.Lock()
407
+ self._query_num = 1
408
+
405
409
  def native_connection(self) -> NativeSnowflakeConnection:
406
410
  return self._connection
407
411
 
412
+ def get_query_no(self) -> int:
413
+ with self._query_num_lock:
414
+ no = self._query_num
415
+ self._query_num += 1
416
+ return no
417
+
408
418
  def query(self, query: str) -> Any:
409
419
  try:
410
- logger.info(f"Query: {query}", stacklevel=2)
420
+ # We often run multiple queries in parallel across multiple threads,
421
+ # so we need to number them to help with log readability.
422
+ query_num = self.get_query_no()
423
+ logger.info(f"Query #{query_num}: {query}", stacklevel=2)
411
424
  resp = self._connection.cursor(DictCursor).execute(query)
425
+ if resp is not None and resp.rowcount is not None:
426
+ logger.info(
427
+ f"Query #{query_num} got {resp.rowcount} row(s) back from Snowflake",
428
+ stacklevel=2,
429
+ )
412
430
  return resp
413
431
 
414
432
  except Exception as e:
@@ -71,14 +71,6 @@ class SnowflakeQuery:
71
71
  def current_warehouse() -> str:
72
72
  return "select CURRENT_WAREHOUSE()"
73
73
 
74
- @staticmethod
75
- def current_database() -> str:
76
- return "select CURRENT_DATABASE()"
77
-
78
- @staticmethod
79
- def current_schema() -> str:
80
- return "select CURRENT_SCHEMA()"
81
-
82
74
  @staticmethod
83
75
  def show_databases() -> str:
84
76
  return "show databases"
@@ -107,8 +99,8 @@ class SnowflakeQuery:
107
99
  order by database_name"""
108
100
 
109
101
  @staticmethod
110
- def schemas_for_database(db_name: Optional[str]) -> str:
111
- db_clause = f'"{db_name}".' if db_name is not None else ""
102
+ def schemas_for_database(db_name: str) -> str:
103
+ db_clause = f'"{db_name}".'
112
104
  return f"""
113
105
  SELECT schema_name AS "SCHEMA_NAME",
114
106
  created AS "CREATED",
@@ -119,8 +111,8 @@ class SnowflakeQuery:
119
111
  order by schema_name"""
120
112
 
121
113
  @staticmethod
122
- def tables_for_database(db_name: Optional[str]) -> str:
123
- db_clause = f'"{db_name}".' if db_name is not None else ""
114
+ def tables_for_database(db_name: str) -> str:
115
+ db_clause = f'"{db_name}".'
124
116
  return f"""
125
117
  SELECT table_catalog AS "TABLE_CATALOG",
126
118
  table_schema AS "TABLE_SCHEMA",
@@ -142,8 +134,8 @@ class SnowflakeQuery:
142
134
  order by table_schema, table_name"""
143
135
 
144
136
  @staticmethod
145
- def tables_for_schema(schema_name: str, db_name: Optional[str]) -> str:
146
- db_clause = f'"{db_name}".' if db_name is not None else ""
137
+ def tables_for_schema(schema_name: str, db_name: str) -> str:
138
+ db_clause = f'"{db_name}".'
147
139
  return f"""
148
140
  SELECT table_catalog AS "TABLE_CATALOG",
149
141
  table_schema AS "TABLE_SCHEMA",
@@ -165,8 +157,8 @@ class SnowflakeQuery:
165
157
  order by table_schema, table_name"""
166
158
 
167
159
  @staticmethod
168
- def procedures_for_database(db_name: Optional[str]) -> str:
169
- db_clause = f'"{db_name}".' if db_name is not None else ""
160
+ def procedures_for_database(db_name: str) -> str:
161
+ db_clause = f'"{db_name}".'
170
162
  return f"""
171
163
  SELECT procedure_catalog AS "PROCEDURE_CATALOG",
172
164
  procedure_schema AS "PROCEDURE_SCHEMA",
@@ -382,26 +374,6 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
382
374
  ORDER BY query_start_time DESC
383
375
  ;"""
384
376
 
385
- @staticmethod
386
- def view_dependencies() -> str:
387
- return """
388
- SELECT
389
- concat(
390
- referenced_database, '.', referenced_schema,
391
- '.', referenced_object_name
392
- ) AS "VIEW_UPSTREAM",
393
- referenced_object_domain as "REFERENCED_OBJECT_DOMAIN",
394
- concat(
395
- referencing_database, '.', referencing_schema,
396
- '.', referencing_object_name
397
- ) AS "DOWNSTREAM_VIEW",
398
- referencing_object_domain AS "REFERENCING_OBJECT_DOMAIN"
399
- FROM
400
- snowflake.account_usage.object_dependencies
401
- WHERE
402
- referencing_object_domain in ('VIEW', 'MATERIALIZED VIEW')
403
- """
404
-
405
377
  # Note on use of `upstreams_deny_pattern` to ignore temporary tables:
406
378
  # Snowflake access history may include temporary tables in DIRECT_OBJECTS_ACCESSED and
407
379
  # OBJECTS_MODIFIED->columns->directSources. We do not need these temporary tables and filter these in the query.
@@ -425,32 +397,6 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
425
397
  upstreams_deny_pattern,
426
398
  )
427
399
 
428
- @staticmethod
429
- def view_dependencies_v2() -> str:
430
- return """
431
- SELECT
432
- ARRAY_UNIQUE_AGG(
433
- OBJECT_CONSTRUCT(
434
- 'upstream_object_name', concat(
435
- referenced_database, '.', referenced_schema,
436
- '.', referenced_object_name
437
- ),
438
- 'upstream_object_domain', referenced_object_domain
439
- )
440
- ) as "UPSTREAM_TABLES",
441
- concat(
442
- referencing_database, '.', referencing_schema,
443
- '.', referencing_object_name
444
- ) AS "DOWNSTREAM_TABLE_NAME",
445
- ANY_VALUE(referencing_object_domain) AS "DOWNSTREAM_TABLE_DOMAIN"
446
- FROM
447
- snowflake.account_usage.object_dependencies
448
- WHERE
449
- referencing_object_domain in ('VIEW', 'MATERIALIZED VIEW')
450
- GROUP BY
451
- DOWNSTREAM_TABLE_NAME
452
- """
453
-
454
400
  @staticmethod
455
401
  def show_external_tables() -> str:
456
402
  return "show external tables in account"
@@ -637,8 +637,13 @@ def get_view_definition_patched(self, connection, view_name, schema=None, **kw):
637
637
  self.identifier_preparer.quote_identifier(schema),
638
638
  self.identifier_preparer.quote_identifier(view_name),
639
639
  )
640
- row = connection.execute(f"SHOW CREATE TABLE {full_table}").fetchone()
641
- return row[0]
640
+ # Hive responds to the SHOW CREATE TABLE with the full view DDL,
641
+ # including the view definition. However, for multiline view definitions,
642
+ # it returns multiple rows (of one column each), each with a part of the definition.
643
+ # Any whitespace at the beginning/end of each view definition line is lost.
644
+ rows = connection.execute(f"SHOW CREATE TABLE {full_table}").fetchall()
645
+ parts = [row[0] for row in rows]
646
+ return "\n".join(parts)
642
647
 
643
648
 
644
649
  HiveDialect.get_view_names = get_view_names_patched
@@ -292,3 +292,15 @@ class DatasetPatchBuilder(
292
292
  value=timestamp,
293
293
  )
294
294
  return self
295
+
296
+ def set_external_url(
297
+ self, external_url: Optional[str] = None
298
+ ) -> "DatasetPatchBuilder":
299
+ if external_url is not None:
300
+ self._add_patch(
301
+ DatasetProperties.ASPECT_NAME,
302
+ "add",
303
+ path=("externalUrl",),
304
+ value=external_url,
305
+ )
306
+ return self