acryl-datahub 1.0.0.3rc7__py3-none-any.whl → 1.0.0.3rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0.3rc7.dist-info → acryl_datahub-1.0.0.3rc9.dist-info}/METADATA +2529 -2529
- {acryl_datahub-1.0.0.3rc7.dist-info → acryl_datahub-1.0.0.3rc9.dist-info}/RECORD +19 -19
- {acryl_datahub-1.0.0.3rc7.dist-info → acryl_datahub-1.0.0.3rc9.dist-info}/WHEEL +1 -1
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +9 -11
- datahub/configuration/common.py +5 -0
- datahub/emitter/mcp.py +15 -4
- datahub/errors.py +4 -0
- datahub/ingestion/api/source_helpers.py +1 -0
- datahub/ingestion/source/fivetran/fivetran_log_api.py +7 -3
- datahub/ingestion/source/fivetran/fivetran_query.py +16 -16
- datahub/ingestion/source/slack/slack.py +4 -52
- datahub/ingestion/source/snowflake/snowflake_connection.py +19 -1
- datahub/ingestion/source/snowflake/snowflake_query.py +8 -62
- datahub/ingestion/source/sql/hive.py +7 -2
- datahub/specific/dataset.py +12 -0
- {acryl_datahub-1.0.0.3rc7.dist-info → acryl_datahub-1.0.0.3rc9.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0.3rc7.dist-info → acryl_datahub-1.0.0.3rc9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.0.0.3rc7.dist-info → acryl_datahub-1.0.0.3rc9.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
acryl_datahub-1.0.0.
|
|
1
|
+
acryl_datahub-1.0.0.3rc9.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=6XeiyYGjXD3cLrKhOmtOz90fhxaal2Ir3lq-m_cgOes,323
|
|
5
5
|
datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
|
|
6
|
-
datahub/errors.py,sha256=
|
|
6
|
+
datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
datahub/_codegen/aspect.py,sha256=PJRa-Z4ouXHq3OkulfyWhwZn-fFUBDK_UPvmqaWdbWk,1063
|
|
@@ -48,7 +48,7 @@ datahub/api/entities/dataprocess/dataprocess_instance.py,sha256=IhY-rcXs-r8EatwW
|
|
|
48
48
|
datahub/api/entities/dataproduct/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
49
|
datahub/api/entities/dataproduct/dataproduct.py,sha256=148TmItxDDyGNzfZdL8aDreSEtyAw79IN8N8oSmNOPE,21461
|
|
50
50
|
datahub/api/entities/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
|
-
datahub/api/entities/dataset/dataset.py,sha256=
|
|
51
|
+
datahub/api/entities/dataset/dataset.py,sha256=se2tv6jsvS5BmH5b53mKs8lEPF4LNzVRrd9PDovPIQk,49627
|
|
52
52
|
datahub/api/entities/forms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
53
|
datahub/api/entities/forms/forms.py,sha256=17GLVVrunUj6hWS7CADhNPrT4gV6il905Ny_Y_5S5Qc,15889
|
|
54
54
|
datahub/api/entities/forms/forms_graphql_constants.py,sha256=DKpnKlMKTjmnyrCTvp63V4LX4THGTAMq3ep8THrSGP4,537
|
|
@@ -94,7 +94,7 @@ datahub/cli/specific/structuredproperties_cli.py,sha256=Rb06XJKxqda5RSUo188B90Wn
|
|
|
94
94
|
datahub/cli/specific/user_cli.py,sha256=jGAokb1NRu8obs6P2g4OL2NQdFgpUBa9De55TBBtun0,1897
|
|
95
95
|
datahub/configuration/__init__.py,sha256=5TN3a7CWNsLRHpdj-sv2bxKWF2IslvJwE6EpNMFrIS4,123
|
|
96
96
|
datahub/configuration/_config_enum.py,sha256=ul2hr5gMmdLvBINicFkMNMi1ApmnmZSwNdUYYted5nk,1447
|
|
97
|
-
datahub/configuration/common.py,sha256=
|
|
97
|
+
datahub/configuration/common.py,sha256=0OXk_yhyVJroP47Nant5-5fRWOahkVB8YU4G70TR9BI,10692
|
|
98
98
|
datahub/configuration/config_loader.py,sha256=hRzPFxkz-w9IqkpSa5vwCzSra1p49DyfeJNeyqGa8-4,6827
|
|
99
99
|
datahub/configuration/connection_resolver.py,sha256=n4-6MwMiOEDgTouxO0SMjTILKVhJPo6-naE6FuR5qMs,1516
|
|
100
100
|
datahub/configuration/datetimes.py,sha256=nayNc0mmlVKH6oVv9ud6C1dDUiZPGabW-YZxvrkosPg,2870
|
|
@@ -121,7 +121,7 @@ datahub/emitter/enum_helpers.py,sha256=QBOEUu_hDCvyL_v4ayNQV8XwJbf5zKyu0Xat0mI1K
|
|
|
121
121
|
datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
|
|
122
122
|
datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
|
|
123
123
|
datahub/emitter/mce_builder.py,sha256=i-iLLdnuy7h1JrzwC2sCtQthbso-cNj1uijOQZKHbeA,16717
|
|
124
|
-
datahub/emitter/mcp.py,sha256=
|
|
124
|
+
datahub/emitter/mcp.py,sha256=u6LphyhpbdFqboTAL_9MzXhGjc45o_BePoDFBkEEYWo,10484
|
|
125
125
|
datahub/emitter/mcp_builder.py,sha256=8IwJAlolQkPpMqQJPLtGrsUqAcuFNs98nrI5iYUxgaU,11920
|
|
126
126
|
datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
|
|
127
127
|
datahub/emitter/request_helper.py,sha256=HpI9a9W0TzoVbrs584rF8P8w-IT_iKLmvYmO_6IHhXs,1008
|
|
@@ -146,7 +146,7 @@ datahub/ingestion/api/report.py,sha256=eM_TWWz6iJNd-c_S2_4eg2qKLGYP8vSROb_TMiCwB
|
|
|
146
146
|
datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
|
|
147
147
|
datahub/ingestion/api/sink.py,sha256=nfal7nsYY1AT2WQRjqO48uAHitpjax7TsRVzYXnqbeM,4918
|
|
148
148
|
datahub/ingestion/api/source.py,sha256=2h7Tx2As0gu5-6d7PiRuJ8myr_y3MRx2YYgH735Jj18,19494
|
|
149
|
-
datahub/ingestion/api/source_helpers.py,sha256=
|
|
149
|
+
datahub/ingestion/api/source_helpers.py,sha256=XT9y5HgfVeF52jrX39vlLn1SdXpLVyT2Su8oGNsddYo,21148
|
|
150
150
|
datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
|
|
151
151
|
datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL74lo,6327
|
|
152
152
|
datahub/ingestion/api/auto_work_units/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -312,8 +312,8 @@ datahub/ingestion/source/fivetran/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
|
|
|
312
312
|
datahub/ingestion/source/fivetran/config.py,sha256=BP3KRfAQ6H5qyEeJNu9vNfZNwLoyj4Tl2kXiLVR5DNM,9027
|
|
313
313
|
datahub/ingestion/source/fivetran/data_classes.py,sha256=ecdUJH5BEze0yv-uFpKWPNaNmV1gORDA2XMFk0zhcBw,595
|
|
314
314
|
datahub/ingestion/source/fivetran/fivetran.py,sha256=avP54ePLFVpkKVv8tr6mzC7dniTmZbKn13LP0-Ohj9k,13821
|
|
315
|
-
datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=
|
|
316
|
-
datahub/ingestion/source/fivetran/fivetran_query.py,sha256=
|
|
315
|
+
datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=5bmG705tCOaKAIjnomJfGCyCwKc7NahtW-rMP-4YifU,12902
|
|
316
|
+
datahub/ingestion/source/fivetran/fivetran_query.py,sha256=c1begMnLtWoLBmaKBiarpMZ6HTVAI6hDPKn26DYuMYI,5343
|
|
317
317
|
datahub/ingestion/source/gc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
318
318
|
datahub/ingestion/source/gc/datahub_gc.py,sha256=EXO-Stj6gGMLTSTbSBC-C3_zpjpQtFN9pAMWR95ma0I,12830
|
|
319
319
|
datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=mUWcMt-_FL1SYGIgI4lGZDZGXspUUTv__5GN1W2oJ3s,17118
|
|
@@ -445,19 +445,19 @@ datahub/ingestion/source/sigma/data_classes.py,sha256=YZkkzwftV34mq5c_4jlC2PCSiR
|
|
|
445
445
|
datahub/ingestion/source/sigma/sigma.py,sha256=ZtPj8eu6hcJxyFcWizob4kRaxrpcqsWzh__lmuVZdt8,25212
|
|
446
446
|
datahub/ingestion/source/sigma/sigma_api.py,sha256=7PK5AQa838hYeaQ5L0dioi4n4bLrpN-r7COKTTNUYw8,19837
|
|
447
447
|
datahub/ingestion/source/slack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
448
|
-
datahub/ingestion/source/slack/slack.py,sha256=
|
|
448
|
+
datahub/ingestion/source/slack/slack.py,sha256=B_HyRlhY2VWjMHDspQZqqejvVhnTREFx5B2GNa0jqzE,25903
|
|
449
449
|
datahub/ingestion/source/snowflake/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
450
450
|
datahub/ingestion/source/snowflake/constants.py,sha256=XCW3vw4JfLn_s8-oXBX6WFNMPOkX5qAQLFX2KqagojQ,2678
|
|
451
451
|
datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
|
|
452
452
|
datahub/ingestion/source/snowflake/oauth_generator.py,sha256=fu2VnREGuJXeTqIV2jx4TwieVnznf83HQkrE0h2DGGM,3423
|
|
453
453
|
datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81xxdeizJn9nJCZ_nMIXgk9N6pEk5o,4803
|
|
454
454
|
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=SD2agFE64WgEDbQHPXQjAIP4gsHT1G9H8X_r-RvKGas,20804
|
|
455
|
-
datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=
|
|
455
|
+
datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=50fHPCzOny-3Mh_DxJU83mPTww5m5T48pzqb8m6QYuo,18491
|
|
456
456
|
datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
|
|
457
457
|
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=FBmiONx4EGHWV8RNJT6zHZyntKinPFFyd2oKbTUIbhE,21319
|
|
458
458
|
datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
|
|
459
459
|
datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=PY4Wy6i89nqRl92ARwXNqWwm-ifagkKbKKtxYWeswkk,29209
|
|
460
|
-
datahub/ingestion/source/snowflake/snowflake_query.py,sha256=
|
|
460
|
+
datahub/ingestion/source/snowflake/snowflake_query.py,sha256=OJDF4x5OCa0PTTdkVdO5_Wzox4039Vhf7zwDoHHW-W4,38410
|
|
461
461
|
datahub/ingestion/source/snowflake/snowflake_report.py,sha256=O-465aBA8uaYZ6WepP7i6cgK6Q1jXJPjDA1j9C8klus,6762
|
|
462
462
|
datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=1yGBbs2aWIdHnrwgeTR7J2lqxbbBsIt8ejCLumIpLEA,27274
|
|
463
463
|
datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=_37-AQyI4uGt4fu-d3v2eAWzQ3uG835ZQxMjFwGYCng,57193
|
|
@@ -473,7 +473,7 @@ datahub/ingestion/source/sql/clickhouse.py,sha256=uSRy-HKAiGFTHVLoVtGoh23X0O1lwy
|
|
|
473
473
|
datahub/ingestion/source/sql/cockroachdb.py,sha256=XaD7eae34plU9ISRC6PzYX9q6RdT2qkzjH6CpTOgkx4,1443
|
|
474
474
|
datahub/ingestion/source/sql/druid.py,sha256=IjGZdntb5hubkIzzT9qDRDpyfbckEg2GwRncvC5mDSs,2722
|
|
475
475
|
datahub/ingestion/source/sql/hana.py,sha256=0PIvcX0Rz59NyR7Ag5Bv1MBV_UbJwxl9UAopo_xe_CA,1342
|
|
476
|
-
datahub/ingestion/source/sql/hive.py,sha256=
|
|
476
|
+
datahub/ingestion/source/sql/hive.py,sha256=voQl6QjHUXPdx7LPONuHiFavW9nRKMjHZx7o3vJQG7A,31034
|
|
477
477
|
datahub/ingestion/source/sql/hive_metastore.py,sha256=HW0zoHKarBYb8oVCy5fHvPOn-pTo25LctW_AusmH0hQ,36252
|
|
478
478
|
datahub/ingestion/source/sql/mariadb.py,sha256=Hm102kmfs_1rd4lsTYhzVMZq5S3B6cyfvpHSzJjqvMw,737
|
|
479
479
|
datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_k94o7g-0,3350
|
|
@@ -924,7 +924,7 @@ datahub/specific/chart.py,sha256=NPdzDHcZkPodthOn9c8QF_aDEo2y4lCJ4t1sI556uZ0,668
|
|
|
924
924
|
datahub/specific/dashboard.py,sha256=3AsXZ1Cp03uaTHsOmJqEiXzJjZUBgDbX-zmgwMw908o,11514
|
|
925
925
|
datahub/specific/datajob.py,sha256=yZXic3CuCGxg-ewnaHphoH9Jjpe-P09XbN7T-rrvkyE,13711
|
|
926
926
|
datahub/specific/dataproduct.py,sha256=SrBNYACDVatuXA64GCHA0Igaes24ajJqTcXmDTT1FPA,2877
|
|
927
|
-
datahub/specific/dataset.py,sha256=
|
|
927
|
+
datahub/specific/dataset.py,sha256=E4XTS4T1c5mjdDn0sTAZsXJ4eK-REV3wwhSxTQkjWJs,10067
|
|
928
928
|
datahub/specific/form.py,sha256=hbxmmBWHma0d4NCZEGR6Nr6R-5A5gYgl1mmkGgnM97o,3834
|
|
929
929
|
datahub/specific/structured_property.py,sha256=NZ2yppDgtqrH04Wn3_m5IupyLeWoBCMygyr3nEi1A3o,4038
|
|
930
930
|
datahub/specific/aspect_helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -1050,8 +1050,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1050
1050
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1051
1051
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1052
1052
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1053
|
-
acryl_datahub-1.0.0.
|
|
1054
|
-
acryl_datahub-1.0.0.
|
|
1055
|
-
acryl_datahub-1.0.0.
|
|
1056
|
-
acryl_datahub-1.0.0.
|
|
1057
|
-
acryl_datahub-1.0.0.
|
|
1053
|
+
acryl_datahub-1.0.0.3rc9.dist-info/METADATA,sha256=c2nBayXTv9LuBzQY7HuANB9MbIfnnA0H6z_DUTq4GKE,176989
|
|
1054
|
+
acryl_datahub-1.0.0.3rc9.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
|
|
1055
|
+
acryl_datahub-1.0.0.3rc9.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
|
|
1056
|
+
acryl_datahub-1.0.0.3rc9.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1057
|
+
acryl_datahub-1.0.0.3rc9.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
|
@@ -509,16 +509,14 @@ class Dataset(StrictModel):
|
|
|
509
509
|
def generate_mcp(
|
|
510
510
|
self,
|
|
511
511
|
) -> Iterable[Union[MetadataChangeProposalClass, MetadataChangeProposalWrapper]]:
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
)
|
|
521
|
-
yield mcp
|
|
512
|
+
patch_builder = self.patch_builder()
|
|
513
|
+
|
|
514
|
+
patch_builder.set_custom_properties(self.properties or {})
|
|
515
|
+
patch_builder.set_description(self.description)
|
|
516
|
+
patch_builder.set_display_name(self.name)
|
|
517
|
+
patch_builder.set_external_url(self.external_url)
|
|
518
|
+
|
|
519
|
+
yield from patch_builder.build()
|
|
522
520
|
|
|
523
521
|
if self.schema_metadata:
|
|
524
522
|
schema_fields = set()
|
|
@@ -981,7 +979,7 @@ class Dataset(StrictModel):
|
|
|
981
979
|
|
|
982
980
|
def model_dump(self, **kwargs):
|
|
983
981
|
"""Custom model_dump method for Pydantic v2 to handle YAML serialization properly."""
|
|
984
|
-
exclude = kwargs.pop("exclude", set()
|
|
982
|
+
exclude = kwargs.pop("exclude", None) or set()
|
|
985
983
|
|
|
986
984
|
# If id and name are identical, exclude name from the output
|
|
987
985
|
if self.id == self.name and self.id is not None:
|
datahub/configuration/common.py
CHANGED
datahub/emitter/mcp.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import json
|
|
3
|
+
import warnings
|
|
3
4
|
from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union
|
|
4
5
|
|
|
5
6
|
from datahub.emitter.aspect import ASPECT_MAP, JSON_CONTENT_TYPE
|
|
6
7
|
from datahub.emitter.serialization_helper import post_json_transform, pre_json_transform
|
|
8
|
+
from datahub.errors import DataHubDeprecationWarning
|
|
7
9
|
from datahub.metadata.schema_classes import (
|
|
8
10
|
ChangeTypeClass,
|
|
9
11
|
DictWrapper,
|
|
@@ -75,13 +77,22 @@ class MetadataChangeProposalWrapper:
|
|
|
75
77
|
if self.entityUrn and self.entityType == _ENTITY_TYPE_UNSET:
|
|
76
78
|
self.entityType = guess_entity_type(self.entityUrn)
|
|
77
79
|
elif self.entityUrn and self.entityType:
|
|
78
|
-
guessed_entity_type = guess_entity_type(self.entityUrn)
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
if self.entityType.lower() != guessed_entity_type:
|
|
80
|
+
guessed_entity_type = guess_entity_type(self.entityUrn)
|
|
81
|
+
if self.entityType.lower() != guessed_entity_type.lower():
|
|
82
|
+
# If they aren't a case-ignored match, raise an error.
|
|
82
83
|
raise ValueError(
|
|
83
84
|
f"entityType {self.entityType} does not match the entity type {guessed_entity_type} from entityUrn {self.entityUrn}",
|
|
84
85
|
)
|
|
86
|
+
elif self.entityType != guessed_entity_type:
|
|
87
|
+
# If they only differ in case, normalize and print a warning.
|
|
88
|
+
self.entityType = guessed_entity_type
|
|
89
|
+
warnings.warn(
|
|
90
|
+
f"The passed entityType {self.entityType} differs in case from the expected entity type {guessed_entity_type}. "
|
|
91
|
+
"This will be automatically corrected for now, but will become an error in a future release. "
|
|
92
|
+
"Note that the entityType field is optional and will be automatically inferred from the entityUrn.",
|
|
93
|
+
DataHubDeprecationWarning,
|
|
94
|
+
stacklevel=3,
|
|
95
|
+
)
|
|
85
96
|
elif self.entityType == _ENTITY_TYPE_UNSET:
|
|
86
97
|
raise ValueError("entityType must be set if entityUrn is not set")
|
|
87
98
|
|
datahub/errors.py
CHANGED
|
@@ -92,6 +92,7 @@ def create_dataset_props_patch_builder(
|
|
|
92
92
|
patch_builder.set_last_modified(dataset_properties.lastModified)
|
|
93
93
|
patch_builder.set_qualified_name(dataset_properties.qualifiedName)
|
|
94
94
|
patch_builder.add_custom_properties(dataset_properties.customProperties)
|
|
95
|
+
patch_builder.set_external_url(dataset_properties.externalUrl)
|
|
95
96
|
|
|
96
97
|
return patch_builder
|
|
97
98
|
|
|
@@ -54,7 +54,7 @@ class FivetranLogAPI:
|
|
|
54
54
|
snowflake_destination_config.database,
|
|
55
55
|
)
|
|
56
56
|
)
|
|
57
|
-
fivetran_log_query.
|
|
57
|
+
fivetran_log_query.set_schema(
|
|
58
58
|
snowflake_destination_config.log_schema,
|
|
59
59
|
)
|
|
60
60
|
fivetran_log_database = snowflake_destination_config.database
|
|
@@ -66,8 +66,12 @@ class FivetranLogAPI:
|
|
|
66
66
|
engine = create_engine(
|
|
67
67
|
bigquery_destination_config.get_sql_alchemy_url(),
|
|
68
68
|
)
|
|
69
|
-
fivetran_log_query.
|
|
70
|
-
|
|
69
|
+
fivetran_log_query.set_schema(bigquery_destination_config.dataset)
|
|
70
|
+
|
|
71
|
+
# The "database" should be the BigQuery project name.
|
|
72
|
+
fivetran_log_database = engine.execute(
|
|
73
|
+
"SELECT @@project_id"
|
|
74
|
+
).fetchone()[0]
|
|
71
75
|
else:
|
|
72
76
|
raise ConfigurationError(
|
|
73
77
|
f"Destination platform '{destination_platform}' is not yet supported."
|
|
@@ -12,14 +12,14 @@ class FivetranLogQuery:
|
|
|
12
12
|
|
|
13
13
|
def __init__(self) -> None:
|
|
14
14
|
# Select query db clause
|
|
15
|
-
self.
|
|
16
|
-
|
|
17
|
-
def set_db(self, db_name: str) -> None:
|
|
18
|
-
self.db_clause = f"{db_name}."
|
|
15
|
+
self.schema_clause: str = ""
|
|
19
16
|
|
|
20
17
|
def use_database(self, db_name: str) -> str:
|
|
21
18
|
return f"use database {db_name}"
|
|
22
19
|
|
|
20
|
+
def set_schema(self, schema_name: str) -> None:
|
|
21
|
+
self.schema_clause = f"{schema_name}."
|
|
22
|
+
|
|
23
23
|
def get_connectors_query(self) -> str:
|
|
24
24
|
return f"""\
|
|
25
25
|
SELECT
|
|
@@ -30,7 +30,7 @@ SELECT
|
|
|
30
30
|
paused,
|
|
31
31
|
sync_frequency,
|
|
32
32
|
destination_id
|
|
33
|
-
FROM {self.
|
|
33
|
+
FROM {self.schema_clause}connector
|
|
34
34
|
WHERE
|
|
35
35
|
_fivetran_deleted = FALSE
|
|
36
36
|
QUALIFY ROW_NUMBER() OVER (PARTITION BY connector_id ORDER BY _fivetran_synced DESC) = 1
|
|
@@ -42,7 +42,7 @@ SELECT id as user_id,
|
|
|
42
42
|
given_name,
|
|
43
43
|
family_name,
|
|
44
44
|
email
|
|
45
|
-
FROM {self.
|
|
45
|
+
FROM {self.schema_clause}user
|
|
46
46
|
"""
|
|
47
47
|
|
|
48
48
|
def get_sync_logs_query(
|
|
@@ -62,7 +62,7 @@ WITH ranked_syncs AS (
|
|
|
62
62
|
MAX(CASE WHEN message_event = 'sync_end' THEN time_stamp END) as end_time,
|
|
63
63
|
MAX(CASE WHEN message_event = 'sync_end' THEN message_data END) as end_message_data,
|
|
64
64
|
ROW_NUMBER() OVER (PARTITION BY connector_id ORDER BY MAX(time_stamp) DESC) as rn
|
|
65
|
-
FROM {self.
|
|
65
|
+
FROM {self.schema_clause}log
|
|
66
66
|
WHERE message_event in ('sync_start', 'sync_end')
|
|
67
67
|
AND time_stamp > CURRENT_TIMESTAMP - INTERVAL '{syncs_interval} days'
|
|
68
68
|
AND connector_id IN ({formatted_connector_ids})
|
|
@@ -99,11 +99,11 @@ FROM (
|
|
|
99
99
|
dsm.name as destination_schema_name,
|
|
100
100
|
tl.created_at as created_at,
|
|
101
101
|
ROW_NUMBER() OVER (PARTITION BY stm.connector_id, stm.id, dtm.id ORDER BY tl.created_at DESC) as table_combo_rn
|
|
102
|
-
FROM {self.
|
|
103
|
-
JOIN {self.
|
|
104
|
-
JOIN {self.
|
|
105
|
-
JOIN {self.
|
|
106
|
-
JOIN {self.
|
|
102
|
+
FROM {self.schema_clause}table_lineage as tl
|
|
103
|
+
JOIN {self.schema_clause}source_table_metadata as stm on tl.source_table_id = stm.id
|
|
104
|
+
JOIN {self.schema_clause}destination_table_metadata as dtm on tl.destination_table_id = dtm.id
|
|
105
|
+
JOIN {self.schema_clause}source_schema_metadata as ssm on stm.schema_id = ssm.id
|
|
106
|
+
JOIN {self.schema_clause}destination_schema_metadata as dsm on dtm.schema_id = dsm.id
|
|
107
107
|
WHERE stm.connector_id IN ({formatted_connector_ids})
|
|
108
108
|
)
|
|
109
109
|
-- Ensure that we only get back one entry per source and destination pair.
|
|
@@ -131,13 +131,13 @@ FROM (
|
|
|
131
131
|
dcm.name as destination_column_name,
|
|
132
132
|
cl.created_at as created_at,
|
|
133
133
|
ROW_NUMBER() OVER (PARTITION BY stm.connector_id, cl.source_column_id, cl.destination_column_id ORDER BY cl.created_at DESC) as column_combo_rn
|
|
134
|
-
FROM {self.
|
|
135
|
-
JOIN {self.
|
|
134
|
+
FROM {self.schema_clause}column_lineage as cl
|
|
135
|
+
JOIN {self.schema_clause}source_column_metadata as scm
|
|
136
136
|
ON cl.source_column_id = scm.id
|
|
137
|
-
JOIN {self.
|
|
137
|
+
JOIN {self.schema_clause}destination_column_metadata as dcm
|
|
138
138
|
ON cl.destination_column_id = dcm.id
|
|
139
139
|
-- Only joining source_table_metadata to get the connector_id.
|
|
140
|
-
JOIN {self.
|
|
140
|
+
JOIN {self.schema_clause}source_table_metadata as stm
|
|
141
141
|
ON scm.table_id = stm.id
|
|
142
142
|
WHERE stm.connector_id IN ({formatted_connector_ids})
|
|
143
143
|
)
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
-
import textwrap
|
|
4
3
|
from dataclasses import dataclass
|
|
5
4
|
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
|
6
5
|
|
|
@@ -613,6 +612,10 @@ class SlackSource(StatefulIngestionSourceBase):
|
|
|
613
612
|
),
|
|
614
613
|
)
|
|
615
614
|
|
|
615
|
+
@retry(
|
|
616
|
+
wait=wait_exponential(multiplier=2, min=4, max=60),
|
|
617
|
+
before_sleep=before_sleep_log(logger, logging.ERROR, True),
|
|
618
|
+
)
|
|
616
619
|
def get_user_to_be_updated(
|
|
617
620
|
self,
|
|
618
621
|
) -> Iterable[Tuple[CorpUser, Optional[CorpUserEditableInfoClass]]]:
|
|
@@ -634,56 +637,5 @@ class SlackSource(StatefulIngestionSourceBase):
|
|
|
634
637
|
if user_obj.email is not None:
|
|
635
638
|
yield (user_obj, editable_properties)
|
|
636
639
|
|
|
637
|
-
@retry(
|
|
638
|
-
wait=wait_exponential(multiplier=2, min=4, max=60),
|
|
639
|
-
before_sleep=before_sleep_log(logger, logging.ERROR, True),
|
|
640
|
-
)
|
|
641
|
-
def get_user_to_be_updated_oss(self) -> Iterable[CorpUser]:
|
|
642
|
-
graphql_query = textwrap.dedent(
|
|
643
|
-
"""
|
|
644
|
-
query listUsers($input: ListUsersInput!) {
|
|
645
|
-
listUsers(input: $input) {
|
|
646
|
-
total
|
|
647
|
-
users {
|
|
648
|
-
urn
|
|
649
|
-
editableProperties {
|
|
650
|
-
email
|
|
651
|
-
slack
|
|
652
|
-
}
|
|
653
|
-
}
|
|
654
|
-
}
|
|
655
|
-
}
|
|
656
|
-
"""
|
|
657
|
-
)
|
|
658
|
-
start = 0
|
|
659
|
-
count = 10
|
|
660
|
-
total = count
|
|
661
|
-
|
|
662
|
-
assert self.ctx.graph is not None
|
|
663
|
-
|
|
664
|
-
while start < total:
|
|
665
|
-
variables = {"input": {"start": start, "count": count}}
|
|
666
|
-
response = self.ctx.graph.execute_graphql(
|
|
667
|
-
query=graphql_query, variables=variables
|
|
668
|
-
)
|
|
669
|
-
list_users = response.get("listUsers", {})
|
|
670
|
-
total = list_users.get("total", 0)
|
|
671
|
-
users = list_users.get("users", [])
|
|
672
|
-
for user in users:
|
|
673
|
-
user_obj = CorpUser()
|
|
674
|
-
editable_properties = user.get("editableProperties", {})
|
|
675
|
-
user_obj.urn = user.get("urn")
|
|
676
|
-
if user_obj.urn is None:
|
|
677
|
-
continue
|
|
678
|
-
if editable_properties is not None:
|
|
679
|
-
user_obj.email = editable_properties.get("email")
|
|
680
|
-
if user_obj.email is None:
|
|
681
|
-
urn_id = Urn.from_string(user_obj.urn).get_entity_id_as_string()
|
|
682
|
-
if "@" in urn_id:
|
|
683
|
-
user_obj.email = urn_id
|
|
684
|
-
if user_obj.email is not None:
|
|
685
|
-
yield user_obj
|
|
686
|
-
start += count
|
|
687
|
-
|
|
688
640
|
def get_report(self) -> SourceReport:
|
|
689
641
|
return self.report
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import threading
|
|
2
3
|
from typing import Any, Dict, Optional
|
|
3
4
|
|
|
4
5
|
import pydantic
|
|
@@ -402,13 +403,30 @@ class SnowflakeConnection(Closeable):
|
|
|
402
403
|
def __init__(self, connection: NativeSnowflakeConnection):
|
|
403
404
|
self._connection = connection
|
|
404
405
|
|
|
406
|
+
self._query_num_lock = threading.Lock()
|
|
407
|
+
self._query_num = 1
|
|
408
|
+
|
|
405
409
|
def native_connection(self) -> NativeSnowflakeConnection:
|
|
406
410
|
return self._connection
|
|
407
411
|
|
|
412
|
+
def get_query_no(self) -> int:
|
|
413
|
+
with self._query_num_lock:
|
|
414
|
+
no = self._query_num
|
|
415
|
+
self._query_num += 1
|
|
416
|
+
return no
|
|
417
|
+
|
|
408
418
|
def query(self, query: str) -> Any:
|
|
409
419
|
try:
|
|
410
|
-
|
|
420
|
+
# We often run multiple queries in parallel across multiple threads,
|
|
421
|
+
# so we need to number them to help with log readability.
|
|
422
|
+
query_num = self.get_query_no()
|
|
423
|
+
logger.info(f"Query #{query_num}: {query}", stacklevel=2)
|
|
411
424
|
resp = self._connection.cursor(DictCursor).execute(query)
|
|
425
|
+
if resp is not None and resp.rowcount is not None:
|
|
426
|
+
logger.info(
|
|
427
|
+
f"Query #{query_num} got {resp.rowcount} row(s) back from Snowflake",
|
|
428
|
+
stacklevel=2,
|
|
429
|
+
)
|
|
412
430
|
return resp
|
|
413
431
|
|
|
414
432
|
except Exception as e:
|
|
@@ -71,14 +71,6 @@ class SnowflakeQuery:
|
|
|
71
71
|
def current_warehouse() -> str:
|
|
72
72
|
return "select CURRENT_WAREHOUSE()"
|
|
73
73
|
|
|
74
|
-
@staticmethod
|
|
75
|
-
def current_database() -> str:
|
|
76
|
-
return "select CURRENT_DATABASE()"
|
|
77
|
-
|
|
78
|
-
@staticmethod
|
|
79
|
-
def current_schema() -> str:
|
|
80
|
-
return "select CURRENT_SCHEMA()"
|
|
81
|
-
|
|
82
74
|
@staticmethod
|
|
83
75
|
def show_databases() -> str:
|
|
84
76
|
return "show databases"
|
|
@@ -107,8 +99,8 @@ class SnowflakeQuery:
|
|
|
107
99
|
order by database_name"""
|
|
108
100
|
|
|
109
101
|
@staticmethod
|
|
110
|
-
def schemas_for_database(db_name:
|
|
111
|
-
db_clause = f'"{db_name}".'
|
|
102
|
+
def schemas_for_database(db_name: str) -> str:
|
|
103
|
+
db_clause = f'"{db_name}".'
|
|
112
104
|
return f"""
|
|
113
105
|
SELECT schema_name AS "SCHEMA_NAME",
|
|
114
106
|
created AS "CREATED",
|
|
@@ -119,8 +111,8 @@ class SnowflakeQuery:
|
|
|
119
111
|
order by schema_name"""
|
|
120
112
|
|
|
121
113
|
@staticmethod
|
|
122
|
-
def tables_for_database(db_name:
|
|
123
|
-
db_clause = f'"{db_name}".'
|
|
114
|
+
def tables_for_database(db_name: str) -> str:
|
|
115
|
+
db_clause = f'"{db_name}".'
|
|
124
116
|
return f"""
|
|
125
117
|
SELECT table_catalog AS "TABLE_CATALOG",
|
|
126
118
|
table_schema AS "TABLE_SCHEMA",
|
|
@@ -142,8 +134,8 @@ class SnowflakeQuery:
|
|
|
142
134
|
order by table_schema, table_name"""
|
|
143
135
|
|
|
144
136
|
@staticmethod
|
|
145
|
-
def tables_for_schema(schema_name: str, db_name:
|
|
146
|
-
db_clause = f'"{db_name}".'
|
|
137
|
+
def tables_for_schema(schema_name: str, db_name: str) -> str:
|
|
138
|
+
db_clause = f'"{db_name}".'
|
|
147
139
|
return f"""
|
|
148
140
|
SELECT table_catalog AS "TABLE_CATALOG",
|
|
149
141
|
table_schema AS "TABLE_SCHEMA",
|
|
@@ -165,8 +157,8 @@ class SnowflakeQuery:
|
|
|
165
157
|
order by table_schema, table_name"""
|
|
166
158
|
|
|
167
159
|
@staticmethod
|
|
168
|
-
def procedures_for_database(db_name:
|
|
169
|
-
db_clause = f'"{db_name}".'
|
|
160
|
+
def procedures_for_database(db_name: str) -> str:
|
|
161
|
+
db_clause = f'"{db_name}".'
|
|
170
162
|
return f"""
|
|
171
163
|
SELECT procedure_catalog AS "PROCEDURE_CATALOG",
|
|
172
164
|
procedure_schema AS "PROCEDURE_SCHEMA",
|
|
@@ -382,26 +374,6 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
|
|
|
382
374
|
ORDER BY query_start_time DESC
|
|
383
375
|
;"""
|
|
384
376
|
|
|
385
|
-
@staticmethod
|
|
386
|
-
def view_dependencies() -> str:
|
|
387
|
-
return """
|
|
388
|
-
SELECT
|
|
389
|
-
concat(
|
|
390
|
-
referenced_database, '.', referenced_schema,
|
|
391
|
-
'.', referenced_object_name
|
|
392
|
-
) AS "VIEW_UPSTREAM",
|
|
393
|
-
referenced_object_domain as "REFERENCED_OBJECT_DOMAIN",
|
|
394
|
-
concat(
|
|
395
|
-
referencing_database, '.', referencing_schema,
|
|
396
|
-
'.', referencing_object_name
|
|
397
|
-
) AS "DOWNSTREAM_VIEW",
|
|
398
|
-
referencing_object_domain AS "REFERENCING_OBJECT_DOMAIN"
|
|
399
|
-
FROM
|
|
400
|
-
snowflake.account_usage.object_dependencies
|
|
401
|
-
WHERE
|
|
402
|
-
referencing_object_domain in ('VIEW', 'MATERIALIZED VIEW')
|
|
403
|
-
"""
|
|
404
|
-
|
|
405
377
|
# Note on use of `upstreams_deny_pattern` to ignore temporary tables:
|
|
406
378
|
# Snowflake access history may include temporary tables in DIRECT_OBJECTS_ACCESSED and
|
|
407
379
|
# OBJECTS_MODIFIED->columns->directSources. We do not need these temporary tables and filter these in the query.
|
|
@@ -425,32 +397,6 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
|
|
|
425
397
|
upstreams_deny_pattern,
|
|
426
398
|
)
|
|
427
399
|
|
|
428
|
-
@staticmethod
|
|
429
|
-
def view_dependencies_v2() -> str:
|
|
430
|
-
return """
|
|
431
|
-
SELECT
|
|
432
|
-
ARRAY_UNIQUE_AGG(
|
|
433
|
-
OBJECT_CONSTRUCT(
|
|
434
|
-
'upstream_object_name', concat(
|
|
435
|
-
referenced_database, '.', referenced_schema,
|
|
436
|
-
'.', referenced_object_name
|
|
437
|
-
),
|
|
438
|
-
'upstream_object_domain', referenced_object_domain
|
|
439
|
-
)
|
|
440
|
-
) as "UPSTREAM_TABLES",
|
|
441
|
-
concat(
|
|
442
|
-
referencing_database, '.', referencing_schema,
|
|
443
|
-
'.', referencing_object_name
|
|
444
|
-
) AS "DOWNSTREAM_TABLE_NAME",
|
|
445
|
-
ANY_VALUE(referencing_object_domain) AS "DOWNSTREAM_TABLE_DOMAIN"
|
|
446
|
-
FROM
|
|
447
|
-
snowflake.account_usage.object_dependencies
|
|
448
|
-
WHERE
|
|
449
|
-
referencing_object_domain in ('VIEW', 'MATERIALIZED VIEW')
|
|
450
|
-
GROUP BY
|
|
451
|
-
DOWNSTREAM_TABLE_NAME
|
|
452
|
-
"""
|
|
453
|
-
|
|
454
400
|
@staticmethod
|
|
455
401
|
def show_external_tables() -> str:
|
|
456
402
|
return "show external tables in account"
|
|
@@ -637,8 +637,13 @@ def get_view_definition_patched(self, connection, view_name, schema=None, **kw):
|
|
|
637
637
|
self.identifier_preparer.quote_identifier(schema),
|
|
638
638
|
self.identifier_preparer.quote_identifier(view_name),
|
|
639
639
|
)
|
|
640
|
-
|
|
641
|
-
|
|
640
|
+
# Hive responds to the SHOW CREATE TABLE with the full view DDL,
|
|
641
|
+
# including the view definition. However, for multiline view definitions,
|
|
642
|
+
# it returns multiple rows (of one column each), each with a part of the definition.
|
|
643
|
+
# Any whitespace at the beginning/end of each view definition line is lost.
|
|
644
|
+
rows = connection.execute(f"SHOW CREATE TABLE {full_table}").fetchall()
|
|
645
|
+
parts = [row[0] for row in rows]
|
|
646
|
+
return "\n".join(parts)
|
|
642
647
|
|
|
643
648
|
|
|
644
649
|
HiveDialect.get_view_names = get_view_names_patched
|
datahub/specific/dataset.py
CHANGED
|
@@ -292,3 +292,15 @@ class DatasetPatchBuilder(
|
|
|
292
292
|
value=timestamp,
|
|
293
293
|
)
|
|
294
294
|
return self
|
|
295
|
+
|
|
296
|
+
def set_external_url(
|
|
297
|
+
self, external_url: Optional[str] = None
|
|
298
|
+
) -> "DatasetPatchBuilder":
|
|
299
|
+
if external_url is not None:
|
|
300
|
+
self._add_patch(
|
|
301
|
+
DatasetProperties.ASPECT_NAME,
|
|
302
|
+
"add",
|
|
303
|
+
path=("externalUrl",),
|
|
304
|
+
value=external_url,
|
|
305
|
+
)
|
|
306
|
+
return self
|
|
File without changes
|
|
File without changes
|
|
File without changes
|