acryl-datahub 0.15.0.4rc1__py3-none-any.whl → 0.15.0.4rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,4 +1,4 @@
1
- datahub/__init__.py,sha256=xAoAiT2wj9RiduZiLAlc28hjzEtCWW6b2I4AOY-rBkc,576
1
+ datahub/__init__.py,sha256=HrewhTx8f-ZiEquwq1xFBeH4Q_XtcsZDS2f_TupvZIk,576
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
3
  datahub/entrypoints.py,sha256=vbkUx_jVIkr_V4wtoQhOpledna-pD_tco1mloRnb7QY,8029
4
4
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -61,7 +61,7 @@ datahub/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
61
  datahub/cli/check_cli.py,sha256=9dXNyzZayHeoFjwFjLkMVyx6DiCZfeESyI-sYtGA6bE,12850
62
62
  datahub/cli/cli_utils.py,sha256=onbG7z9hIm0zCAm0a2ulTOsHC_NVkdIsbg__EMj02DQ,13540
63
63
  datahub/cli/config_utils.py,sha256=yuXw7RzpRY5x_-MAoqWbv46qUkIeRNAJL4_OeJpYdBE,4879
64
- datahub/cli/container_cli.py,sha256=t5hEKqmTEi9LMAs6DTuHtIJBSU5LAXND3JyCLAdJOO8,2828
64
+ datahub/cli/container_cli.py,sha256=8D73hLfTHsDg4Cedh_2x0utl7ppOeB1TUJVRgur-Crw,3624
65
65
  datahub/cli/delete_cli.py,sha256=oQ4Yy6hxZHcl67MYJiQumLs_8QmFEj7SPZFzxFXvDk8,23481
66
66
  datahub/cli/docker_check.py,sha256=rED4wHXqxcQ_qNFyIgFEZ85BHT9ZTE5YC-oUKqbRqi0,9432
67
67
  datahub/cli/docker_cli.py,sha256=w9ZQMRVlHwfJI2XDe7mO0lwnT7-dZoK6tPadSMgwEM8,36493
@@ -112,12 +112,12 @@ datahub/configuration/validate_multiline_string.py,sha256=l9PF6_EAC_1lWxU_RWrvPB
112
112
  datahub/configuration/yaml.py,sha256=dLmjCalPOjgdc7mmJxtlP7uOrIHZiAWxD1gwAFOdtUU,308
113
113
  datahub/emitter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
114
114
  datahub/emitter/aspect.py,sha256=ef0DVycqg-tRPurkYjc-5zknmLP2p2Y2RxP55WkvAEc,480
115
- datahub/emitter/enum_helpers.py,sha256=ZeALUAPi10Q4Z6VM0_WiU9Y60_d0ugZHcUoVmuOCEec,321
115
+ datahub/emitter/enum_helpers.py,sha256=QBOEUu_hDCvyL_v4ayNQV8XwJbf5zKyu0Xat0mI1Kgo,376
116
116
  datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
117
117
  datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
118
- datahub/emitter/mce_builder.py,sha256=B-uUSB2Gq_gRSiqWIUAYvHGagnVRrdL0ZLf8bW8yfDs,16326
118
+ datahub/emitter/mce_builder.py,sha256=9wjXG1WmWZUN7-_JdRJ5OcH8IPG0b3TGzxry4yscOR0,16545
119
119
  datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
120
- datahub/emitter/mcp_builder.py,sha256=AHSeMfcFxvJl2PXyDQ5HsnWbk6HkJqUtppNKjQtIbUI,10791
120
+ datahub/emitter/mcp_builder.py,sha256=_-d5o7RIwgtMMdr_9tg0oU5ta6lL4dqOie1a68WEjKg,11638
121
121
  datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
122
122
  datahub/emitter/request_helper.py,sha256=33ORG3S3OVy97_jlWBRn7yUM5XCIkRN6WSdJvN7Ofcg,670
123
123
  datahub/emitter/rest_emitter.py,sha256=v-A4eR_GSbXg-dsUgHAMcUd68qNEF5KO2MYlyhAYn8I,17880
@@ -129,17 +129,17 @@ datahub/ingestion/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
129
129
  datahub/ingestion/api/closeable.py,sha256=k12AT--s4GDtZ-po_rVm5QKgvGIDteeRPByZPIOfecA,599
130
130
  datahub/ingestion/api/committable.py,sha256=4S6GuBzvX2vb1A8P506NbspOKfZ1621sBG8t0lvRb8o,886
131
131
  datahub/ingestion/api/common.py,sha256=nJVL8YdvokYFajOjmVpSNlLbZJ5iVOFS4KJDlGtJ_jc,2735
132
- datahub/ingestion/api/decorators.py,sha256=KTNdf2B20L-wlEPF8UsL89a8zwvRSOfA7gOOZnnYalY,3933
132
+ datahub/ingestion/api/decorators.py,sha256=b9bxHXlqCLDgqrVdPU6WNQg1koZcK62AkZ9vNwvWeK4,4029
133
133
  datahub/ingestion/api/global_context.py,sha256=OdSJg4a_RKE52nu8MSiEkK2UqRRDhDTyOleHEAzPKho,575
134
134
  datahub/ingestion/api/incremental_lineage_helper.py,sha256=JTmJvXzzwI04oTUTIeTKKscT_hjnr8nW34NFWJvCXDc,5871
135
135
  datahub/ingestion/api/incremental_properties_helper.py,sha256=KzdxdrQtaMV2XMHfPsCtRa7ffDGPA1w1hgPUjeenZBU,2514
136
136
  datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py,sha256=3lLdkkxVqE9MVc26cdXImPeWy16az5BwgcorWxeBV50,1759
137
137
  datahub/ingestion/api/pipeline_run_listener.py,sha256=5uBP__LbMQxJ2utlf07cIzQINqPbUOKiZyOJta6a0og,713
138
- datahub/ingestion/api/registry.py,sha256=LGElUdzhNQoEr-k2SN23mJaIYnA1PYfF97LQxBmWmD8,7262
138
+ datahub/ingestion/api/registry.py,sha256=nigH50Qzz-imLrfapsuGjbjdUphGzwjT_5jUoQUlt9I,7445
139
139
  datahub/ingestion/api/report.py,sha256=eM_TWWz6iJNd-c_S2_4eg2qKLGYP8vSROb_TMiCwBhY,4644
140
140
  datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
141
- datahub/ingestion/api/sink.py,sha256=3jw7-x9gXGreOPwn49wG5fT3C8pYhaNMQITdMN6kbag,4478
142
- datahub/ingestion/api/source.py,sha256=yLx_7TCyhflo0hloYzC4y2ovh3TWEVcDh1agvh8AQwI,19036
141
+ datahub/ingestion/api/sink.py,sha256=nfal7nsYY1AT2WQRjqO48uAHitpjax7TsRVzYXnqbeM,4918
142
+ datahub/ingestion/api/source.py,sha256=kjR7qL0rchi4S_mSP-j5yE9MLD7gnbN1wdNvUU-Lna4,19154
143
143
  datahub/ingestion/api/source_helpers.py,sha256=FvtTEGktO_x8TKqkAvtdR7rwi4A7efb8vb9TSt_zz5E,19644
144
144
  datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
145
145
  datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL74lo,6327
@@ -218,7 +218,7 @@ datahub/ingestion/source/abs/report.py,sha256=fzkTdTewYlWrTk4f2Cyl-e8RV4qw9wEVtm
218
218
  datahub/ingestion/source/abs/source.py,sha256=cuMezUzr-Smp5tok2ceYor5I5jp52NDMjfeN8kfIbvg,24816
219
219
  datahub/ingestion/source/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
220
220
  datahub/ingestion/source/aws/aws_common.py,sha256=DfdQgkJ_s2isFx8WvqKTlAcBk4KE8SgfpmA5BgC3fgY,17716
221
- datahub/ingestion/source/aws/glue.py,sha256=9KYv53loNa-keVwCRLDb-5II_AjeHVRf1Fb2HXqtZXk,57653
221
+ datahub/ingestion/source/aws/glue.py,sha256=qwkZMcbBlHIdhhuRj-gHNYMeuMADrvaHcN3gik0n_08,57919
222
222
  datahub/ingestion/source/aws/s3_boto_utils.py,sha256=Y54jlLV5gLcuZ4Zs57kIW5dYHD89RSFfsVNlFbRnSkQ,3901
223
223
  datahub/ingestion/source/aws/s3_util.py,sha256=OFypcgmVC6jnZM90-gjcPpAMtTV1lbnreCaMhCzNlzs,2149
224
224
  datahub/ingestion/source/aws/sagemaker.py,sha256=Bl2tkBYnrindgx61VHYgNovUF_Kp_fXNcivQn28vC2w,5254
@@ -360,7 +360,7 @@ datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
360
360
  datahub/ingestion/source/powerbi/config.py,sha256=CzG-kdcGqB0nYnQ8W40Anb1gsbMZ5TcF_dL_I02xfrE,22764
361
361
  datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=-njW1kJOy-LY5JFwJLhVQ0bMBj9NQz5TZhQqsSi_KsM,2285
362
362
  datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=5df3qvalCS9hZ46DPXs6XDcw9-IofGf8Eol_rUC7LHI,20329
363
- datahub/ingestion/source/powerbi/powerbi.py,sha256=6O1U5jA5BDvc3CF35kUckzeQRlWF4PNbVMedMyZE_pY,54499
363
+ datahub/ingestion/source/powerbi/powerbi.py,sha256=xCNMgL-KuPGpIFv_PP1woyiddY_PpbX1HEl3aDk7F1c,54535
364
364
  datahub/ingestion/source/powerbi/m_query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
365
365
  datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=EbaEasEOGZ73jz0cQofH9ez65wSvRBof0R6GQaIVLnM,2009
366
366
  datahub/ingestion/source/powerbi/m_query/native_sql_parser.py,sha256=zzKVDGeUM3Yv3-zNah4D6mSnr6jXsstNuLmzczcPQEE,3683
@@ -370,9 +370,9 @@ datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=t0n1dDYjlzElSJo5ztea
370
370
  datahub/ingestion/source/powerbi/m_query/tree_function.py,sha256=h77DunhlgOP0fAg8UXDXxxInOi7Pay85_d1Ca4YqyKs,6134
371
371
  datahub/ingestion/source/powerbi/m_query/validator.py,sha256=crG-VZy2XPieiDliP9yVMgiFcc8b2xbZyDFEATXqEAQ,1155
372
372
  datahub/ingestion/source/powerbi/rest_api_wrapper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
373
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py,sha256=xqAsnNUCP44Wd1rE1m_phbKtNCMJTFJfOX4_2varadg,8298
373
+ datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py,sha256=kS337FgY-fLPjeRryQ-adVm1VAEThI88svii2Q9sGTc,8435
374
374
  datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py,sha256=eNKW9ShWJ5F3pKgTVQ6xc1H1rl-JBIy9ye1pq5C2Kb0,39598
375
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py,sha256=ROLfaSWTNyNFO118kjOqFMTbFPT_D9XnnpMTfYcDchM,26193
375
+ datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py,sha256=k8rP2uwXb6maS7VzprUcqr2ggjimz0tILVJezze0jyA,26441
376
376
  datahub/ingestion/source/powerbi/rest_api_wrapper/profiling_utils.py,sha256=bgcPheyqOj6KdRjDyANDK5yggItglcBIjbGFIwAxSds,1392
377
377
  datahub/ingestion/source/powerbi/rest_api_wrapper/query.py,sha256=VNw1Uvli6g0pnu9FpigYmnCdEPbVEipz7vdZU_WmHf4,616
378
378
  datahub/ingestion/source/powerbi_report_server/__init__.py,sha256=N9fGcrHXBbuPmx9rpGjd_jkMC3smXmfiwISDP1QZapk,324
@@ -424,7 +424,7 @@ datahub/ingestion/source/sigma/data_classes.py,sha256=YZkkzwftV34mq5c_4jlC2PCSiR
424
424
  datahub/ingestion/source/sigma/sigma.py,sha256=T-zAgbEw83JSu_4j1gCYibSCaRLXjY3Kt6HdYPEZAFA,24096
425
425
  datahub/ingestion/source/sigma/sigma_api.py,sha256=SVvbUs2vjueUdDa-3FzeMsaX5pNpApVI192P7EZzPcI,17870
426
426
  datahub/ingestion/source/slack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
427
- datahub/ingestion/source/slack/slack.py,sha256=C_3iXUS72h7HALhBW_AIyi3nNOqzyh7Ogflr-qI5ZEE,12946
427
+ datahub/ingestion/source/slack/slack.py,sha256=VpLS-6zuQa8hIuHnZhLf8wRdN72Xell3ZMd0kK3A0i8,13188
428
428
  datahub/ingestion/source/snowflake/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
429
429
  datahub/ingestion/source/snowflake/constants.py,sha256=22n-0r04nuy-ImxWFFpmbrt_GrNdxV9WZKri7rmtrpQ,2628
430
430
  datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
@@ -473,8 +473,8 @@ datahub/ingestion/source/sql/trino.py,sha256=FEn_BQ3pm23hKx94ek5kk5IXGNYcBqZEhll
473
473
  datahub/ingestion/source/sql/two_tier_sql_source.py,sha256=YDrGBb5WKVls6qv17QU5foKrf71SydzEltc3WsVAhQc,5732
474
474
  datahub/ingestion/source/sql/vertica.py,sha256=_9OgSgIgqBml0av063rb8nACiT3SAmzpw0ouyF91wv8,33382
475
475
  datahub/ingestion/source/sql/mssql/__init__.py,sha256=1agpl8S_uDW40olkhCX_W19dbr5GO9qgjS3R7pLRZSk,87
476
- datahub/ingestion/source/sql/mssql/job_models.py,sha256=ztXDrD4anhzwWvACIm9fucE2WhMDMKkJ4alMYOQOqWA,7083
477
- datahub/ingestion/source/sql/mssql/source.py,sha256=WV2rU_sN5pqd4MEu6p4kwQRpADFjG0qh27tx7qP5AOw,30931
476
+ datahub/ingestion/source/sql/mssql/job_models.py,sha256=tiACTVNAo3WXT-JXZfpBG5UyhflrLGi1cyS8mAUL9Yw,7107
477
+ datahub/ingestion/source/sql/mssql/source.py,sha256=UUlIvdINDzJ7BODsNaMMXGOA3LeYKT26wVRmmFYxujs,31631
478
478
  datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py,sha256=RpnvKPalAAaOD_eUg8bZ4VkGTSeLFWuy0mefwc4s3x8,2837
479
479
  datahub/ingestion/source/state/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
480
480
  datahub/ingestion/source/state/checkpoint.py,sha256=-fTUZKkY4nHTFqSWZ0jJkkdIu_tWlOjRNhm4FTr4ul4,8860
@@ -572,7 +572,7 @@ datahub/metadata/schema.avsc,sha256=sAPtgHSNJ1a126Vz7OjVIMKFjrrIG9f4cvRH6SkJ0jc,
572
572
  datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
573
573
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
574
574
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
575
- datahub/metadata/_urns/urn_defs.py,sha256=MdAOrpRL4CL5VKSBk1I_DTVYz2_rYuIQ9tAEmAdIK4I,109984
575
+ datahub/metadata/_urns/urn_defs.py,sha256=SoCD7TNdGcPKI9vD8ZXLZzxggJTHtvRe4Jgaj_Mm2x8,132110
576
576
  datahub/metadata/com/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
577
577
  datahub/metadata/com/linkedin/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
578
578
  datahub/metadata/com/linkedin/events/__init__.py,sha256=s_dR0plZF-rOxxIbE8ojekJqwiHzl2WYR-Z3kW6kKS0,298
@@ -957,7 +957,7 @@ datahub/utilities/yaml_sync_utils.py,sha256=65IEe8quW3_zHCR8CyoDkZyopeZJazU-IyMr
957
957
  datahub/utilities/registries/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
958
958
  datahub/utilities/registries/domain_registry.py,sha256=0SfcZNop-PXBbl-AWw92vAyb28i0YXTr-TKdBwixmOw,2452
959
959
  datahub/utilities/urns/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
960
- datahub/utilities/urns/_urn_base.py,sha256=3JVe4u2fZvFwLCGaDwqDmEgbnveVVbX52cE4GleI634,10420
960
+ datahub/utilities/urns/_urn_base.py,sha256=phYous0_uLIYQ_rB-2YyqKjxonTL-69fXA7Js1WxQ-Q,10560
961
961
  datahub/utilities/urns/corp_group_urn.py,sha256=6H5Q6nZvAXu80IZBDCeM8xo_9ap9pgwtyi60QXx3hzY,75
962
962
  datahub/utilities/urns/corpuser_urn.py,sha256=h-Yh-9QRbtQOhxxzxEBc7skoavpGaKDKVNrsxSXZ1yQ,88
963
963
  datahub/utilities/urns/data_flow_urn.py,sha256=w1Z7ET1L1OtYD1w-xiUYtyCczsxZZ1l3LRyTRv5NdpE,73
@@ -992,8 +992,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
992
992
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
993
993
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
994
994
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
995
- acryl_datahub-0.15.0.4rc1.dist-info/METADATA,sha256=78vUNhiirHpceObDU05VhkgjPwGVXn0TOFHunHTcnV0,173250
996
- acryl_datahub-0.15.0.4rc1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
997
- acryl_datahub-0.15.0.4rc1.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
998
- acryl_datahub-0.15.0.4rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
999
- acryl_datahub-0.15.0.4rc1.dist-info/RECORD,,
995
+ acryl_datahub-0.15.0.4rc3.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
996
+ acryl_datahub-0.15.0.4rc3.dist-info/METADATA,sha256=C9F7IqZAxUmbhfzHj1q5vcIO1xL4dkLjFWpfcqZvJ5g,173382
997
+ acryl_datahub-0.15.0.4rc3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
998
+ acryl_datahub-0.15.0.4rc3.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
999
+ acryl_datahub-0.15.0.4rc3.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1000
+ acryl_datahub-0.15.0.4rc3.dist-info/RECORD,,
datahub/__init__.py CHANGED
@@ -3,7 +3,7 @@ import warnings
3
3
 
4
4
  # Published at https://pypi.org/project/acryl-datahub/.
5
5
  __package_name__ = "acryl-datahub"
6
- __version__ = "0.15.0.4rc1"
6
+ __version__ = "0.15.0.4rc3"
7
7
 
8
8
 
9
9
  def is_dev_mode() -> bool:
@@ -1,10 +1,13 @@
1
1
  import logging
2
- from typing import List
2
+ from typing import Any, List
3
3
 
4
4
  import click
5
+ import progressbar
5
6
 
7
+ from datahub.emitter.mcp import MetadataChangeProposalWrapper
6
8
  from datahub.ingestion.graph.client import get_default_graph
7
9
  from datahub.metadata.schema_classes import (
10
+ DomainsClass,
8
11
  GlossaryTermAssociationClass,
9
12
  OwnerClass,
10
13
  OwnershipTypeClass,
@@ -27,12 +30,12 @@ def apply_association_to_container(
27
30
  association_type: str,
28
31
  ) -> None:
29
32
  """
30
- Common function to add either tags, terms, or owners to child datasets (for now).
33
+ Common function to add either tags, terms, domains, or owners to child datasets (for now).
31
34
 
32
35
  Args:
33
36
  container_urn: The URN of the container
34
37
  association_urn: The URN of the tag, term, or user to apply
35
- association_type: One of 'tag', 'term', or 'owner'
38
+ association_type: One of 'tag', 'term', 'domain' or 'owner'
36
39
  """
37
40
  urns: List[str] = []
38
41
  graph = get_default_graph()
@@ -43,10 +46,10 @@ def apply_association_to_container(
43
46
  )
44
47
  )
45
48
 
49
+ all_patches: List[Any] = []
46
50
  for urn in urns:
47
- logger.info(f"Adding {association_type} {association_urn} to {urn}")
48
51
  builder = DatasetPatchBuilder(urn)
49
-
52
+ patches: List[Any] = []
50
53
  if association_type == "tag":
51
54
  patches = builder.add_tag(TagAssociationClass(association_urn)).build()
52
55
  elif association_type == "term":
@@ -60,9 +63,17 @@ def apply_association_to_container(
60
63
  type=OwnershipTypeClass.TECHNICAL_OWNER,
61
64
  )
62
65
  ).build()
63
-
64
- for mcp in patches:
65
- graph.emit(mcp)
66
+ elif association_type == "domain":
67
+ patches = [
68
+ MetadataChangeProposalWrapper(
69
+ entityUrn=urn,
70
+ aspect=DomainsClass(domains=[association_urn]),
71
+ )
72
+ ]
73
+ all_patches.extend(patches)
74
+ mcps_iter = progressbar.progressbar(all_patches, redirect_stdout=True)
75
+ for mcp in mcps_iter:
76
+ graph.emit(mcp)
66
77
 
67
78
 
68
79
  @container.command()
@@ -83,7 +94,15 @@ def term(container_urn: str, term_urn: str) -> None:
83
94
 
84
95
  @container.command()
85
96
  @click.option("--container-urn", required=True, type=str)
86
- @click.option("--owner-id", required=True, type=str)
87
- def owner(container_urn: str, owner_id: str) -> None:
97
+ @click.option("--owner-urn", required=True, type=str)
98
+ def owner(container_urn: str, owner_urn: str) -> None:
88
99
  """Add patch to add a owner to all datasets in a container"""
89
- apply_association_to_container(container_urn, owner_id, "owner")
100
+ apply_association_to_container(container_urn, owner_urn, "owner")
101
+
102
+
103
+ @container.command()
104
+ @click.option("--container-urn", required=True, type=str)
105
+ @click.option("--domain-urn", required=True, type=str)
106
+ def domain(container_urn: str, domain_urn: str) -> None:
107
+ """Add patch to add a domain to all datasets in a container"""
108
+ apply_association_to_container(container_urn, domain_urn, "domain")
@@ -1,11 +1,13 @@
1
1
  from typing import List, Type
2
2
 
3
+ from typing_extensions import LiteralString
3
4
 
4
- def get_enum_options(_class: Type[object]) -> List[str]:
5
+
6
+ def get_enum_options(class_: Type[object]) -> List[LiteralString]:
5
7
  """Get the valid values for an enum in the datahub.metadata.schema_classes module."""
6
8
 
7
9
  return [
8
10
  value
9
- for name, value in vars(_class).items()
11
+ for name, value in vars(class_).items()
10
12
  if not callable(value) and not name.startswith("_")
11
13
  ]
@@ -440,6 +440,10 @@ def can_add_aspect_to_snapshot(
440
440
 
441
441
 
442
442
  def can_add_aspect(mce: MetadataChangeEventClass, AspectType: Type[Aspect]) -> bool:
443
+ # TODO: This is specific to snapshot types. We have a more general method
444
+ # in `entity_supports_aspect`, which should be used instead. This method
445
+ # should be deprecated, and all usages should be replaced.
446
+
443
447
  SnapshotType = type(mce.proposedSnapshot)
444
448
 
445
449
  return can_add_aspect_to_snapshot(SnapshotType, AspectType)
@@ -90,6 +90,25 @@ class ContainerKey(DatahubKey):
90
90
  def as_urn(self) -> str:
91
91
  return make_container_urn(guid=self.guid())
92
92
 
93
+ def parent_key(self) -> Optional["ContainerKey"]:
94
+ # Find the immediate base class of self.
95
+ # This is a bit of a hack, but it works.
96
+ base_classes = self.__class__.__bases__
97
+ if len(base_classes) != 1:
98
+ # TODO: Raise a more specific error.
99
+ raise ValueError(
100
+ f"Unable to determine parent key for {self.__class__}: {self}"
101
+ )
102
+ base_class = base_classes[0]
103
+ if base_class is DatahubKey or base_class is ContainerKey:
104
+ return None
105
+
106
+ # We need to use `__dict__` instead of `pydantic.BaseModel.dict()`
107
+ # in order to include "excluded" fields e.g. `backcompat_env_as_instance`.
108
+ # Tricky: this only works because DatahubKey is a BaseModel and hence
109
+ # allows extra fields.
110
+ return base_class(**self.__dict__)
111
+
93
112
 
94
113
  # DEPRECATION: Keeping the `PlatformKey` name around for backwards compatibility.
95
114
  PlatformKey = ContainerKey
@@ -25,6 +25,8 @@ def config_class(config_cls: Type) -> Callable[[Type], Type]:
25
25
  # add the create method only if it has not been overridden from the base Source.create method
26
26
  cls.create = classmethod(default_create)
27
27
 
28
+ # TODO: Once we're on Python 3.10, we should call abc.update_abstractmethods here.
29
+
28
30
  return cls
29
31
 
30
32
  return wrapper
@@ -173,8 +173,10 @@ class PluginRegistry(Generic[T]):
173
173
 
174
174
  tp = self._ensure_not_lazy(key)
175
175
  if isinstance(tp, ModuleNotFoundError):
176
+ # TODO: Once we're on Python 3.11 (with PEP 678), we can use .add_note()
177
+ # to enrich the error instead of wrapping it.
176
178
  raise ConfigurationError(
177
- f"{key} is disabled; try running: pip install '{__package_name__}[{key}]'"
179
+ f"{key} is disabled due to a missing dependency: {tp.name}; try running `pip install '{__package_name__}[{key}]'`"
178
180
  ) from tp
179
181
  elif isinstance(tp, Exception):
180
182
  raise ConfigurationError(
@@ -110,6 +110,10 @@ class Sink(Generic[SinkConfig, SinkReportType], Closeable, metaclass=ABCMeta):
110
110
  self.__post_init__()
111
111
 
112
112
  def __post_init__(self) -> None:
113
+ """Hook called after the sink's main initialization is complete.
114
+
115
+ Sink subclasses can override this method to customize initialization.
116
+ """
113
117
  pass
114
118
 
115
119
  @classmethod
@@ -117,9 +121,17 @@ class Sink(Generic[SinkConfig, SinkReportType], Closeable, metaclass=ABCMeta):
117
121
  return cls(ctx, cls.get_config_class().parse_obj(config_dict))
118
122
 
119
123
  def handle_work_unit_start(self, workunit: WorkUnit) -> None:
124
+ """Called at the start of each new workunit.
125
+
126
+ This method is deprecated and will be removed in a future release.
127
+ """
120
128
  pass
121
129
 
122
130
  def handle_work_unit_end(self, workunit: WorkUnit) -> None:
131
+ """Called at the end of each workunit.
132
+
133
+ This method is deprecated and will be removed in a future release.
134
+ """
123
135
  pass
124
136
 
125
137
  @abstractmethod
@@ -404,8 +404,11 @@ class Source(Closeable, metaclass=ABCMeta):
404
404
  # Technically, this method should be abstract. However, the @config_class
405
405
  # decorator automatically generates a create method at runtime if one is
406
406
  # not defined. Python still treats the class as abstract because it thinks
407
- # the create method is missing. To avoid the class becoming abstract, we
408
- # can't make this method abstract.
407
+ # the create method is missing.
408
+ #
409
+ # Once we're on Python 3.10, we can use the abc.update_abstractmethods(cls)
410
+ # method in the config_class decorator. That would allow us to make this
411
+ # method abstract.
409
412
  raise NotImplementedError('sources must implement "create"')
410
413
 
411
414
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
@@ -738,11 +738,17 @@ class GlueSource(StatefulIngestionSourceBase):
738
738
  self,
739
739
  ) -> Tuple[List[Mapping[str, Any]], List[Dict]]:
740
740
  all_databases = [*self.get_all_databases()]
741
- all_tables = [
742
- tables
743
- for database in all_databases
744
- for tables in self.get_tables_from_database(database)
745
- ]
741
+ all_tables = []
742
+ for database in all_databases:
743
+ try:
744
+ for tables in self.get_tables_from_database(database):
745
+ all_tables.append(tables)
746
+ except Exception as e:
747
+ self.report.failure(
748
+ message="Failed to get tables from database",
749
+ context=database["Name"],
750
+ exc=e,
751
+ )
746
752
  return all_databases, all_tables
747
753
 
748
754
  def get_lineage_if_enabled(
@@ -1322,14 +1322,14 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
1322
1322
  context=",".join(
1323
1323
  [
1324
1324
  dataset.name
1325
- for dataset in workspace.independent_datasets
1325
+ for dataset in workspace.independent_datasets.values()
1326
1326
  if dataset.name
1327
1327
  ]
1328
1328
  ),
1329
1329
  )
1330
1330
  return
1331
1331
 
1332
- for dataset in workspace.independent_datasets:
1332
+ for dataset in workspace.independent_datasets.values():
1333
1333
  yield from auto_workunit(
1334
1334
  stream=self.mapper.to_datahub_dataset(
1335
1335
  dataset=dataset,
@@ -1440,7 +1440,7 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
1440
1440
 
1441
1441
  yield from auto_workunit(self.emit_app(workspace=workspace))
1442
1442
 
1443
- for dashboard in workspace.dashboards:
1443
+ for dashboard in workspace.dashboards.values():
1444
1444
  try:
1445
1445
  # Fetch PowerBi users for dashboards
1446
1446
  dashboard.users = self.powerbi_client.get_dashboard_users(dashboard)
@@ -1459,7 +1459,7 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
1459
1459
  if wu is not None:
1460
1460
  yield wu
1461
1461
 
1462
- for report in workspace.reports:
1462
+ for report in workspace.reports.values():
1463
1463
  for work_unit in self.mapper.report_to_datahub_work_units(
1464
1464
  report, workspace
1465
1465
  ):
@@ -71,13 +71,13 @@ class Workspace:
71
71
  id: str
72
72
  name: str
73
73
  type: str # This is used as a subtype of the Container entity.
74
- dashboards: List["Dashboard"]
75
- reports: List["Report"]
76
- datasets: Dict[str, "PowerBIDataset"]
77
- report_endorsements: Dict[str, List[str]]
78
- dashboard_endorsements: Dict[str, List[str]]
74
+ dashboards: Dict[str, "Dashboard"] # key = dashboard id
75
+ reports: Dict[str, "Report"] # key = report id
76
+ datasets: Dict[str, "PowerBIDataset"] # key = dataset id
77
+ report_endorsements: Dict[str, List[str]] # key = report id
78
+ dashboard_endorsements: Dict[str, List[str]] # key = dashboard id
79
79
  scan_result: dict
80
- independent_datasets: List["PowerBIDataset"]
80
+ independent_datasets: Dict[str, "PowerBIDataset"] # key = dataset id
81
81
  app: Optional["App"]
82
82
 
83
83
  def get_urn_part(self, workspace_id_as_urn_part: Optional[bool] = False) -> str:
@@ -193,15 +193,18 @@ class PowerBiAPI:
193
193
  def get_report_users(self, workspace_id: str, report_id: str) -> List[User]:
194
194
  return self._get_entity_users(workspace_id, Constant.REPORTS, report_id)
195
195
 
196
- def get_reports(self, workspace: Workspace) -> List[Report]:
196
+ def get_reports(self, workspace: Workspace) -> Dict[str, Report]:
197
197
  """
198
198
  Fetch the report from PowerBi for the given Workspace
199
199
  """
200
- reports: List[Report] = []
200
+ reports: Dict[str, Report] = {}
201
201
  try:
202
- reports = self._get_resolver().get_reports(workspace)
202
+ reports = {
203
+ report.id: report
204
+ for report in self._get_resolver().get_reports(workspace)
205
+ }
203
206
  # Fill Report dataset
204
- for report in reports:
207
+ for report in reports.values():
205
208
  if report.dataset_id:
206
209
  report.dataset = self.dataset_registry.get(report.dataset_id)
207
210
  if report.dataset is None:
@@ -222,7 +225,7 @@ class PowerBiAPI:
222
225
  )
223
226
  return
224
227
 
225
- for report in reports:
228
+ for report in reports.values():
226
229
  report.users = self.get_report_users(
227
230
  workspace_id=workspace.id, report_id=report.id
228
231
  )
@@ -234,7 +237,7 @@ class PowerBiAPI:
234
237
  )
235
238
  return
236
239
 
237
- for report in reports:
240
+ for report in reports.values():
238
241
  report.tags = workspace.report_endorsements.get(report.id, [])
239
242
 
240
243
  fill_ownership()
@@ -270,12 +273,12 @@ class PowerBiAPI:
270
273
  name=workspace[Constant.NAME],
271
274
  type=workspace[Constant.TYPE],
272
275
  datasets={},
273
- dashboards=[],
274
- reports=[],
276
+ dashboards={},
277
+ reports={},
275
278
  report_endorsements={},
276
279
  dashboard_endorsements={},
277
280
  scan_result={},
278
- independent_datasets=[],
281
+ independent_datasets={},
279
282
  app=None, # It will be populated in _fill_metadata_from_scan_result method
280
283
  )
281
284
  for workspace in groups
@@ -561,12 +564,12 @@ class PowerBiAPI:
561
564
  name=workspace_metadata[Constant.NAME],
562
565
  type=workspace_metadata[Constant.TYPE],
563
566
  datasets={},
564
- dashboards=[],
565
- reports=[],
567
+ dashboards={},
568
+ reports={},
566
569
  report_endorsements={},
567
570
  dashboard_endorsements={},
568
571
  scan_result={},
569
- independent_datasets=[],
572
+ independent_datasets={},
570
573
  app=None, # It is getting set from scan-result
571
574
  )
572
575
  cur_workspace.scan_result = workspace_metadata
@@ -597,25 +600,28 @@ class PowerBiAPI:
597
600
  def _fill_independent_datasets(self, workspace: Workspace) -> None:
598
601
  reachable_datasets: List[str] = []
599
602
  # Find out reachable datasets
600
- for dashboard in workspace.dashboards:
603
+ for dashboard in workspace.dashboards.values():
601
604
  for tile in dashboard.tiles:
602
605
  if tile.dataset is not None:
603
606
  reachable_datasets.append(tile.dataset.id)
604
607
 
605
- for report in workspace.reports:
608
+ for report in workspace.reports.values():
606
609
  if report.dataset is not None:
607
610
  reachable_datasets.append(report.dataset.id)
608
611
 
609
612
  # Set datasets not present in reachable_datasets
610
613
  for dataset in workspace.datasets.values():
611
614
  if dataset.id not in reachable_datasets:
612
- workspace.independent_datasets.append(dataset)
615
+ workspace.independent_datasets[dataset.id] = dataset
613
616
 
614
617
  def _fill_regular_metadata_detail(self, workspace: Workspace) -> None:
615
618
  def fill_dashboards() -> None:
616
- workspace.dashboards = self._get_resolver().get_dashboards(workspace)
619
+ workspace.dashboards = {
620
+ dashboard.id: dashboard
621
+ for dashboard in self._get_resolver().get_dashboards(workspace)
622
+ }
617
623
  # set tiles of Dashboard
618
- for dashboard in workspace.dashboards:
624
+ for dashboard in workspace.dashboards.values():
619
625
  dashboard.tiles = self._get_resolver().get_tiles(
620
626
  workspace, dashboard=dashboard
621
627
  )
@@ -644,7 +650,7 @@ class PowerBiAPI:
644
650
  "Skipping tag retrieval for dashboard as extract_endorsements_to_tags is set to false"
645
651
  )
646
652
  return
647
- for dashboard in workspace.dashboards:
653
+ for dashboard in workspace.dashboards.values():
648
654
  dashboard.tags = workspace.dashboard_endorsements.get(dashboard.id, [])
649
655
 
650
656
  if self.__config.extract_dashboards:
@@ -5,6 +5,8 @@ from typing import Iterable, List, Optional, Tuple
5
5
 
6
6
  from pydantic import Field, SecretStr
7
7
  from slack_sdk import WebClient
8
+ from tenacity import retry, wait_exponential
9
+ from tenacity.before_sleep import before_sleep_log
8
10
 
9
11
  import datahub.emitter.mce_builder as builder
10
12
  from datahub.configuration.common import ConfigModel
@@ -294,6 +296,10 @@ class SlackSource(Source):
294
296
  return
295
297
  raise e
296
298
 
299
+ @retry(
300
+ wait=wait_exponential(multiplier=2, min=4, max=60),
301
+ before_sleep=before_sleep_log(logger, logging.ERROR, True),
302
+ )
297
303
  def get_user_to_be_updated(self) -> Iterable[CorpUser]:
298
304
  graphql_query = textwrap.dedent(
299
305
  """
@@ -156,7 +156,7 @@ class MSSQLDataJob:
156
156
  entity: Union[StoredProcedure, JobStep]
157
157
  type: str = "dataJob"
158
158
  source: str = "mssql"
159
- external_url: str = ""
159
+ external_url: Optional[str] = None
160
160
  description: Optional[str] = None
161
161
  status: Optional[str] = None
162
162
  incoming: List[str] = field(default_factory=list)
@@ -228,7 +228,7 @@ class MSSQLDataFlow:
228
228
  entity: Union[MSSQLJob, MSSQLProceduresContainer]
229
229
  type: str = "dataFlow"
230
230
  source: str = "mssql"
231
- external_url: str = ""
231
+ external_url: Optional[str] = None
232
232
  flow_properties: Dict[str, str] = field(default_factory=dict)
233
233
 
234
234
  def add_property(