acryl-datahub 0.15.0.3rc1__py3-none-any.whl → 0.15.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (36) hide show
  1. acryl_datahub-0.15.0.4.dist-info/LICENSE +202 -0
  2. {acryl_datahub-0.15.0.3rc1.dist-info → acryl_datahub-0.15.0.4.dist-info}/METADATA +2411 -2408
  3. {acryl_datahub-0.15.0.3rc1.dist-info → acryl_datahub-0.15.0.4.dist-info}/RECORD +36 -33
  4. datahub/__init__.py +1 -1
  5. datahub/cli/container_cli.py +108 -0
  6. datahub/emitter/enum_helpers.py +4 -2
  7. datahub/emitter/mce_builder.py +4 -0
  8. datahub/emitter/mcp_builder.py +19 -0
  9. datahub/entrypoints.py +2 -0
  10. datahub/ingestion/api/decorators.py +2 -0
  11. datahub/ingestion/api/registry.py +3 -1
  12. datahub/ingestion/api/sink.py +12 -0
  13. datahub/ingestion/api/source.py +5 -2
  14. datahub/ingestion/source/aws/glue.py +11 -5
  15. datahub/ingestion/source/aws/s3_util.py +1 -24
  16. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +2 -2
  17. datahub/ingestion/source/dbt/dbt_common.py +2 -2
  18. datahub/ingestion/source/powerbi/powerbi.py +4 -4
  19. datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py +6 -6
  20. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +24 -18
  21. datahub/ingestion/source/s3/source.py +6 -2
  22. datahub/ingestion/source/slack/slack.py +6 -0
  23. datahub/ingestion/source/sql/hive_metastore.py +3 -3
  24. datahub/ingestion/source/sql/mssql/job_models.py +2 -2
  25. datahub/ingestion/source/sql/mssql/source.py +26 -11
  26. datahub/ingestion/source/sql/teradata.py +2 -2
  27. datahub/ingestion/source/tableau/tableau.py +23 -10
  28. datahub/metadata/_schema_classes.py +401 -401
  29. datahub/metadata/_urns/urn_defs.py +1857 -1408
  30. datahub/metadata/schema.avsc +16624 -16266
  31. datahub/sql_parsing/sql_parsing_aggregator.py +3 -3
  32. datahub/utilities/groupby.py +17 -0
  33. datahub/utilities/urns/_urn_base.py +6 -2
  34. {acryl_datahub-0.15.0.3rc1.dist-info → acryl_datahub-0.15.0.4.dist-info}/WHEEL +0 -0
  35. {acryl_datahub-0.15.0.3rc1.dist-info → acryl_datahub-0.15.0.4.dist-info}/entry_points.txt +0 -0
  36. {acryl_datahub-0.15.0.3rc1.dist-info → acryl_datahub-0.15.0.4.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
- datahub/__init__.py,sha256=2Qi1M0twhFDzi6WSn0GA-qtAgY8DJPzJGRgXBzJrkHc,576
1
+ datahub/__init__.py,sha256=ozMEEWnTPovcdtF1e_DLiI51HpnTCPkOeFwgJcbcXUs,573
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
- datahub/entrypoints.py,sha256=IMtLWvGuiqoUSnNaCaFjhd86NHwuXSWXp2kUL-xDkk0,7950
3
+ datahub/entrypoints.py,sha256=vbkUx_jVIkr_V4wtoQhOpledna-pD_tco1mloRnb7QY,8029
4
4
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  datahub/_codegen/aspect.py,sha256=PJRa-Z4ouXHq3OkulfyWhwZn-fFUBDK_UPvmqaWdbWk,1063
@@ -61,6 +61,7 @@ datahub/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
61
  datahub/cli/check_cli.py,sha256=9dXNyzZayHeoFjwFjLkMVyx6DiCZfeESyI-sYtGA6bE,12850
62
62
  datahub/cli/cli_utils.py,sha256=onbG7z9hIm0zCAm0a2ulTOsHC_NVkdIsbg__EMj02DQ,13540
63
63
  datahub/cli/config_utils.py,sha256=yuXw7RzpRY5x_-MAoqWbv46qUkIeRNAJL4_OeJpYdBE,4879
64
+ datahub/cli/container_cli.py,sha256=8D73hLfTHsDg4Cedh_2x0utl7ppOeB1TUJVRgur-Crw,3624
64
65
  datahub/cli/delete_cli.py,sha256=oQ4Yy6hxZHcl67MYJiQumLs_8QmFEj7SPZFzxFXvDk8,23481
65
66
  datahub/cli/docker_check.py,sha256=rED4wHXqxcQ_qNFyIgFEZ85BHT9ZTE5YC-oUKqbRqi0,9432
66
67
  datahub/cli/docker_cli.py,sha256=w9ZQMRVlHwfJI2XDe7mO0lwnT7-dZoK6tPadSMgwEM8,36493
@@ -111,12 +112,12 @@ datahub/configuration/validate_multiline_string.py,sha256=l9PF6_EAC_1lWxU_RWrvPB
111
112
  datahub/configuration/yaml.py,sha256=dLmjCalPOjgdc7mmJxtlP7uOrIHZiAWxD1gwAFOdtUU,308
112
113
  datahub/emitter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
113
114
  datahub/emitter/aspect.py,sha256=ef0DVycqg-tRPurkYjc-5zknmLP2p2Y2RxP55WkvAEc,480
114
- datahub/emitter/enum_helpers.py,sha256=ZeALUAPi10Q4Z6VM0_WiU9Y60_d0ugZHcUoVmuOCEec,321
115
+ datahub/emitter/enum_helpers.py,sha256=QBOEUu_hDCvyL_v4ayNQV8XwJbf5zKyu0Xat0mI1Kgo,376
115
116
  datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
116
117
  datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
117
- datahub/emitter/mce_builder.py,sha256=B-uUSB2Gq_gRSiqWIUAYvHGagnVRrdL0ZLf8bW8yfDs,16326
118
+ datahub/emitter/mce_builder.py,sha256=9wjXG1WmWZUN7-_JdRJ5OcH8IPG0b3TGzxry4yscOR0,16545
118
119
  datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
119
- datahub/emitter/mcp_builder.py,sha256=AHSeMfcFxvJl2PXyDQ5HsnWbk6HkJqUtppNKjQtIbUI,10791
120
+ datahub/emitter/mcp_builder.py,sha256=_-d5o7RIwgtMMdr_9tg0oU5ta6lL4dqOie1a68WEjKg,11638
120
121
  datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
121
122
  datahub/emitter/request_helper.py,sha256=33ORG3S3OVy97_jlWBRn7yUM5XCIkRN6WSdJvN7Ofcg,670
122
123
  datahub/emitter/rest_emitter.py,sha256=v-A4eR_GSbXg-dsUgHAMcUd68qNEF5KO2MYlyhAYn8I,17880
@@ -128,17 +129,17 @@ datahub/ingestion/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
128
129
  datahub/ingestion/api/closeable.py,sha256=k12AT--s4GDtZ-po_rVm5QKgvGIDteeRPByZPIOfecA,599
129
130
  datahub/ingestion/api/committable.py,sha256=4S6GuBzvX2vb1A8P506NbspOKfZ1621sBG8t0lvRb8o,886
130
131
  datahub/ingestion/api/common.py,sha256=nJVL8YdvokYFajOjmVpSNlLbZJ5iVOFS4KJDlGtJ_jc,2735
131
- datahub/ingestion/api/decorators.py,sha256=KTNdf2B20L-wlEPF8UsL89a8zwvRSOfA7gOOZnnYalY,3933
132
+ datahub/ingestion/api/decorators.py,sha256=b9bxHXlqCLDgqrVdPU6WNQg1koZcK62AkZ9vNwvWeK4,4029
132
133
  datahub/ingestion/api/global_context.py,sha256=OdSJg4a_RKE52nu8MSiEkK2UqRRDhDTyOleHEAzPKho,575
133
134
  datahub/ingestion/api/incremental_lineage_helper.py,sha256=JTmJvXzzwI04oTUTIeTKKscT_hjnr8nW34NFWJvCXDc,5871
134
135
  datahub/ingestion/api/incremental_properties_helper.py,sha256=KzdxdrQtaMV2XMHfPsCtRa7ffDGPA1w1hgPUjeenZBU,2514
135
136
  datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py,sha256=3lLdkkxVqE9MVc26cdXImPeWy16az5BwgcorWxeBV50,1759
136
137
  datahub/ingestion/api/pipeline_run_listener.py,sha256=5uBP__LbMQxJ2utlf07cIzQINqPbUOKiZyOJta6a0og,713
137
- datahub/ingestion/api/registry.py,sha256=LGElUdzhNQoEr-k2SN23mJaIYnA1PYfF97LQxBmWmD8,7262
138
+ datahub/ingestion/api/registry.py,sha256=nigH50Qzz-imLrfapsuGjbjdUphGzwjT_5jUoQUlt9I,7445
138
139
  datahub/ingestion/api/report.py,sha256=eM_TWWz6iJNd-c_S2_4eg2qKLGYP8vSROb_TMiCwBhY,4644
139
140
  datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
140
- datahub/ingestion/api/sink.py,sha256=3jw7-x9gXGreOPwn49wG5fT3C8pYhaNMQITdMN6kbag,4478
141
- datahub/ingestion/api/source.py,sha256=yLx_7TCyhflo0hloYzC4y2ovh3TWEVcDh1agvh8AQwI,19036
141
+ datahub/ingestion/api/sink.py,sha256=nfal7nsYY1AT2WQRjqO48uAHitpjax7TsRVzYXnqbeM,4918
142
+ datahub/ingestion/api/source.py,sha256=kjR7qL0rchi4S_mSP-j5yE9MLD7gnbN1wdNvUU-Lna4,19154
142
143
  datahub/ingestion/api/source_helpers.py,sha256=FvtTEGktO_x8TKqkAvtdR7rwi4A7efb8vb9TSt_zz5E,19644
143
144
  datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
144
145
  datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL74lo,6327
@@ -217,9 +218,9 @@ datahub/ingestion/source/abs/report.py,sha256=fzkTdTewYlWrTk4f2Cyl-e8RV4qw9wEVtm
217
218
  datahub/ingestion/source/abs/source.py,sha256=cuMezUzr-Smp5tok2ceYor5I5jp52NDMjfeN8kfIbvg,24816
218
219
  datahub/ingestion/source/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
219
220
  datahub/ingestion/source/aws/aws_common.py,sha256=DfdQgkJ_s2isFx8WvqKTlAcBk4KE8SgfpmA5BgC3fgY,17716
220
- datahub/ingestion/source/aws/glue.py,sha256=9KYv53loNa-keVwCRLDb-5II_AjeHVRf1Fb2HXqtZXk,57653
221
+ datahub/ingestion/source/aws/glue.py,sha256=qwkZMcbBlHIdhhuRj-gHNYMeuMADrvaHcN3gik0n_08,57919
221
222
  datahub/ingestion/source/aws/s3_boto_utils.py,sha256=Y54jlLV5gLcuZ4Zs57kIW5dYHD89RSFfsVNlFbRnSkQ,3901
222
- datahub/ingestion/source/aws/s3_util.py,sha256=pikTe9SuiKdN-TZ8eOhB0PYq0aUgUPDpxwtTLsVofRs,2834
223
+ datahub/ingestion/source/aws/s3_util.py,sha256=OFypcgmVC6jnZM90-gjcPpAMtTV1lbnreCaMhCzNlzs,2149
223
224
  datahub/ingestion/source/aws/sagemaker.py,sha256=Bl2tkBYnrindgx61VHYgNovUF_Kp_fXNcivQn28vC2w,5254
224
225
  datahub/ingestion/source/aws/sagemaker_processors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
225
226
  datahub/ingestion/source/aws/sagemaker_processors/common.py,sha256=NvYfI8LHgDvhEZE7qp6qF1NSZ0_SQKhg3ivtdjsdpFg,2172
@@ -243,7 +244,7 @@ datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256
243
244
  datahub/ingestion/source/bigquery_v2/bigquery_queries.py,sha256=EoHo9twb0_QdX7Nvd1HJC1Yn0rqtrfR52EVk7Hu3XOQ,3296
244
245
  datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=qH8k8wyMlUVzUTVhSd3FgOMGCK1D5NYuC0KF8tez_Ys,7957
245
246
  datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=E5GOx4NWjyZM0xzdpBlNXbvDdKNfW9UtS64XtCYFpzI,31809
246
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=XdlTd4VxhYinbLt_UNsiZroaRqD7Fy7lzncskHzD_nc,50790
247
+ datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=c1hlsgat7l27fQN8GwvHkdme7rQ4LqIQKFwwA8z7kqw,50824
247
248
  datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
248
249
  datahub/ingestion/source/bigquery_v2/common.py,sha256=Cxjf1a8ibkL_YRQeS0BqsjlyMgFJpaZ3iq_d7e8T8MQ,4030
249
250
  datahub/ingestion/source/bigquery_v2/lineage.py,sha256=Dkig1SEfPxw6zZDeSulUYnqsu4WGCVPXypGPEUVriyU,44907
@@ -274,7 +275,7 @@ datahub/ingestion/source/datahub/report.py,sha256=VHBfCbwFRzdLdB7hQG9ST4EiZxl_vB
274
275
  datahub/ingestion/source/datahub/state.py,sha256=PZoT7sSK1wadVf5vN6phrgr7I6LL7ePP-EJjP1OO0bQ,3507
275
276
  datahub/ingestion/source/dbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
276
277
  datahub/ingestion/source/dbt/dbt_cloud.py,sha256=tNpSHbPlLq-oFGbJsdkWY9kIaWmpjcZLWhj1CSewGGY,17981
277
- datahub/ingestion/source/dbt/dbt_common.py,sha256=ivB2cnm7zGSZzP36Etk54bSn-VmLjfAQK7wl4thwYrc,80499
278
+ datahub/ingestion/source/dbt/dbt_common.py,sha256=y4VINaQQ-WhEf-rICGLGi1U88nKmRdVQPmh88OJROWg,80536
278
279
  datahub/ingestion/source/dbt/dbt_core.py,sha256=m6cA9vVd4Nh2arc-T2_xeQoxvreRbMhTDIJuYsx3wHc,22722
279
280
  datahub/ingestion/source/dbt/dbt_tests.py,sha256=Q5KISW_AOOWqyxmyOgJQquyX7xlfOqKu9WhrHoLKC0M,9881
280
281
  datahub/ingestion/source/delta_lake/__init__.py,sha256=u5oqUeus81ONAtdl6o9Puw33ODSMun-0wLIamrZ4BUM,71
@@ -359,7 +360,7 @@ datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
359
360
  datahub/ingestion/source/powerbi/config.py,sha256=CzG-kdcGqB0nYnQ8W40Anb1gsbMZ5TcF_dL_I02xfrE,22764
360
361
  datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=-njW1kJOy-LY5JFwJLhVQ0bMBj9NQz5TZhQqsSi_KsM,2285
361
362
  datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=5df3qvalCS9hZ46DPXs6XDcw9-IofGf8Eol_rUC7LHI,20329
362
- datahub/ingestion/source/powerbi/powerbi.py,sha256=6O1U5jA5BDvc3CF35kUckzeQRlWF4PNbVMedMyZE_pY,54499
363
+ datahub/ingestion/source/powerbi/powerbi.py,sha256=xCNMgL-KuPGpIFv_PP1woyiddY_PpbX1HEl3aDk7F1c,54535
363
364
  datahub/ingestion/source/powerbi/m_query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
364
365
  datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=EbaEasEOGZ73jz0cQofH9ez65wSvRBof0R6GQaIVLnM,2009
365
366
  datahub/ingestion/source/powerbi/m_query/native_sql_parser.py,sha256=zzKVDGeUM3Yv3-zNah4D6mSnr6jXsstNuLmzczcPQEE,3683
@@ -369,9 +370,9 @@ datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=t0n1dDYjlzElSJo5ztea
369
370
  datahub/ingestion/source/powerbi/m_query/tree_function.py,sha256=h77DunhlgOP0fAg8UXDXxxInOi7Pay85_d1Ca4YqyKs,6134
370
371
  datahub/ingestion/source/powerbi/m_query/validator.py,sha256=crG-VZy2XPieiDliP9yVMgiFcc8b2xbZyDFEATXqEAQ,1155
371
372
  datahub/ingestion/source/powerbi/rest_api_wrapper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
372
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py,sha256=xqAsnNUCP44Wd1rE1m_phbKtNCMJTFJfOX4_2varadg,8298
373
+ datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py,sha256=kS337FgY-fLPjeRryQ-adVm1VAEThI88svii2Q9sGTc,8435
373
374
  datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py,sha256=eNKW9ShWJ5F3pKgTVQ6xc1H1rl-JBIy9ye1pq5C2Kb0,39598
374
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py,sha256=ROLfaSWTNyNFO118kjOqFMTbFPT_D9XnnpMTfYcDchM,26193
375
+ datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py,sha256=k8rP2uwXb6maS7VzprUcqr2ggjimz0tILVJezze0jyA,26441
375
376
  datahub/ingestion/source/powerbi/rest_api_wrapper/profiling_utils.py,sha256=bgcPheyqOj6KdRjDyANDK5yggItglcBIjbGFIwAxSds,1392
376
377
  datahub/ingestion/source/powerbi/rest_api_wrapper/query.py,sha256=VNw1Uvli6g0pnu9FpigYmnCdEPbVEipz7vdZU_WmHf4,616
377
378
  datahub/ingestion/source/powerbi_report_server/__init__.py,sha256=N9fGcrHXBbuPmx9rpGjd_jkMC3smXmfiwISDP1QZapk,324
@@ -403,7 +404,7 @@ datahub/ingestion/source/s3/config.py,sha256=Zs1nrBZKLImteZreIcSMMRLj8vBGgxakNDs
403
404
  datahub/ingestion/source/s3/datalake_profiler_config.py,sha256=FfrcgK-JEF94vw-l3q6pN6FENXb-wZzW2w1VUZVkwW8,3620
404
405
  datahub/ingestion/source/s3/profiling.py,sha256=yKNCKpr6w7qpCH-baeSkNE9VjkN6eBot_weD-2_Jxzk,17579
405
406
  datahub/ingestion/source/s3/report.py,sha256=fzkTdTewYlWrTk4f2Cyl-e8RV4qw9wEVtm0cdKD-Xgw,542
406
- datahub/ingestion/source/s3/source.py,sha256=VvpO1kSLllD6hugVIU5eNRGd42X6hy0GLedh_w4t9yA,47261
407
+ datahub/ingestion/source/s3/source.py,sha256=IE_K_HE_S7w8fpGPT8OptU5-VmwapntsI5PePv_wUQA,47412
407
408
  datahub/ingestion/source/sac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
408
409
  datahub/ingestion/source/sac/sac.py,sha256=zPSO9ukuyhvNaaVzeAYpA-_sFma_XMcCQMPaGvDWuTk,30226
409
410
  datahub/ingestion/source/sac/sac_common.py,sha256=-xQTDBtgH56AnpRXWGDnlmQqUuLRx-7wF1U1kQFWtX8,998
@@ -423,7 +424,7 @@ datahub/ingestion/source/sigma/data_classes.py,sha256=YZkkzwftV34mq5c_4jlC2PCSiR
423
424
  datahub/ingestion/source/sigma/sigma.py,sha256=T-zAgbEw83JSu_4j1gCYibSCaRLXjY3Kt6HdYPEZAFA,24096
424
425
  datahub/ingestion/source/sigma/sigma_api.py,sha256=SVvbUs2vjueUdDa-3FzeMsaX5pNpApVI192P7EZzPcI,17870
425
426
  datahub/ingestion/source/slack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
426
- datahub/ingestion/source/slack/slack.py,sha256=C_3iXUS72h7HALhBW_AIyi3nNOqzyh7Ogflr-qI5ZEE,12946
427
+ datahub/ingestion/source/slack/slack.py,sha256=VpLS-6zuQa8hIuHnZhLf8wRdN72Xell3ZMd0kK3A0i8,13188
427
428
  datahub/ingestion/source/snowflake/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
428
429
  datahub/ingestion/source/snowflake/constants.py,sha256=22n-0r04nuy-ImxWFFpmbrt_GrNdxV9WZKri7rmtrpQ,2628
429
430
  datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
@@ -452,7 +453,7 @@ datahub/ingestion/source/sql/cockroachdb.py,sha256=XaD7eae34plU9ISRC6PzYX9q6RdT2
452
453
  datahub/ingestion/source/sql/druid.py,sha256=lhO9CCOlHV-6LjBuAxAxtB9I1pvPtsGSdr63bz6_ilA,2837
453
454
  datahub/ingestion/source/sql/hana.py,sha256=0PIvcX0Rz59NyR7Ag5Bv1MBV_UbJwxl9UAopo_xe_CA,1342
454
455
  datahub/ingestion/source/sql/hive.py,sha256=NRUrEWnR1JN5U0q4CHlRacdKzxJhS4unFXnXYZT7vZE,30306
455
- datahub/ingestion/source/sql/hive_metastore.py,sha256=n9WvJzGBYVwjSUKuAWQcYuRJttH81k2S2zjHuw8gvME,36074
456
+ datahub/ingestion/source/sql/hive_metastore.py,sha256=65DI0PeJMpGOEhTfo6cygeybgaFqi93yGnLLRy58ATo,36117
456
457
  datahub/ingestion/source/sql/mariadb.py,sha256=Hm102kmfs_1rd4lsTYhzVMZq5S3B6cyfvpHSzJjqvMw,737
457
458
  datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_k94o7g-0,3350
458
459
  datahub/ingestion/source/sql/oracle.py,sha256=tVP3AiZO97psM8O8UzBb9C7__s8y4fkyQbXBv3m1LU4,24503
@@ -467,13 +468,13 @@ datahub/ingestion/source/sql/sql_types.py,sha256=uuU3taVe4oCTXkqg1wSMGzTwVleRyUR
467
468
  datahub/ingestion/source/sql/sql_utils.py,sha256=q-Bsk6WxlsRtrw9RXBxvqI3zuaMTC_F25T2VrCziR9I,8418
468
469
  datahub/ingestion/source/sql/sqlalchemy_data_reader.py,sha256=FvHZ4JEK3aR2DYOBZiT_ZsAy12RjTu4t_KIR_92B11k,2644
469
470
  datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py,sha256=KOpbmDIE2h1hyYEsbVHJi2B7FlsyUMTXZx4diyzltQg,1826
470
- datahub/ingestion/source/sql/teradata.py,sha256=ioGzrSpBsKmf2WIAxDofhyq3FU2xpa31bTZjrQhz6-M,32707
471
+ datahub/ingestion/source/sql/teradata.py,sha256=5lTNMOOOmrG71fTAyTs7iYFroeTiGIdATwXQmH6sWJg,32741
471
472
  datahub/ingestion/source/sql/trino.py,sha256=FEn_BQ3pm23hKx94ek5kk5IXGNYcBqZEhllRJFUzfU8,17895
472
473
  datahub/ingestion/source/sql/two_tier_sql_source.py,sha256=YDrGBb5WKVls6qv17QU5foKrf71SydzEltc3WsVAhQc,5732
473
474
  datahub/ingestion/source/sql/vertica.py,sha256=_9OgSgIgqBml0av063rb8nACiT3SAmzpw0ouyF91wv8,33382
474
475
  datahub/ingestion/source/sql/mssql/__init__.py,sha256=1agpl8S_uDW40olkhCX_W19dbr5GO9qgjS3R7pLRZSk,87
475
- datahub/ingestion/source/sql/mssql/job_models.py,sha256=ztXDrD4anhzwWvACIm9fucE2WhMDMKkJ4alMYOQOqWA,7083
476
- datahub/ingestion/source/sql/mssql/source.py,sha256=WV2rU_sN5pqd4MEu6p4kwQRpADFjG0qh27tx7qP5AOw,30931
476
+ datahub/ingestion/source/sql/mssql/job_models.py,sha256=tiACTVNAo3WXT-JXZfpBG5UyhflrLGi1cyS8mAUL9Yw,7107
477
+ datahub/ingestion/source/sql/mssql/source.py,sha256=UUlIvdINDzJ7BODsNaMMXGOA3LeYKT26wVRmmFYxujs,31631
477
478
  datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py,sha256=RpnvKPalAAaOD_eUg8bZ4VkGTSeLFWuy0mefwc4s3x8,2837
478
479
  datahub/ingestion/source/state/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
479
480
  datahub/ingestion/source/state/checkpoint.py,sha256=-fTUZKkY4nHTFqSWZ0jJkkdIu_tWlOjRNhm4FTr4ul4,8860
@@ -491,7 +492,7 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
491
492
  datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=DziD57PbHn2Tcy51tYXCG-GQgyTGMUxnkuzVS_xihFY,4079
492
493
  datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
493
494
  datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
494
- datahub/ingestion/source/tableau/tableau.py,sha256=F2SaJVvhCSe3yvoAvdM5mPpMkdyfY3_zKQ0lLbA1g38,152539
495
+ datahub/ingestion/source/tableau/tableau.py,sha256=YjWyzYZ7hGeMxlqAUQNNJ9LJXlHrc5fHqf7lBWkr1aE,153184
495
496
  datahub/ingestion/source/tableau/tableau_common.py,sha256=3AUgXxTGOKM609xvcDrRItGXhUfuNYku2LFaj8z2Hg4,26936
496
497
  datahub/ingestion/source/tableau/tableau_constant.py,sha256=ZcAeHsQUXVVL26ORly0ByZk_GJAFbxaKuJAlX_sYMac,2686
497
498
  datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=nSyx9RzC6TCQDm-cTVJ657qT8iDwzk_8JMKpohhmOc4,1046
@@ -566,12 +567,12 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
566
567
  datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
567
568
  datahub/lite/lite_util.py,sha256=pgBpT3vTO1YCQ2njZRNyicSkHYeEmQCt41BaXU8WvMo,4503
568
569
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
569
- datahub/metadata/_schema_classes.py,sha256=sbCVtCvb9xI_4p_Q7WOqae_HqHa1dpteT6gNn8dd3bk,975061
570
- datahub/metadata/schema.avsc,sha256=59Q_iZ204Yr-t66h0ESaY7YTnGzuc99A6g2_4cvk04k,736072
570
+ datahub/metadata/_schema_classes.py,sha256=GMLN7Ov0m39EWaXlziVgINqxhihZDzNy2BztBIR9YM8,975061
571
+ datahub/metadata/schema.avsc,sha256=sAPtgHSNJ1a126Vz7OjVIMKFjrrIG9f4cvRH6SkJ0jc,640786
571
572
  datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
572
573
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
573
574
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
574
- datahub/metadata/_urns/urn_defs.py,sha256=7wLzbGE-UnPZiJlCm8RcU3hROhHJ3QtwxJLGFLLjJlw,109984
575
+ datahub/metadata/_urns/urn_defs.py,sha256=SoCD7TNdGcPKI9vD8ZXLZzxggJTHtvRe4Jgaj_Mm2x8,132110
575
576
  datahub/metadata/com/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
576
577
  datahub/metadata/com/linkedin/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
577
578
  datahub/metadata/com/linkedin/events/__init__.py,sha256=s_dR0plZF-rOxxIbE8ojekJqwiHzl2WYR-Z3kW6kKS0,298
@@ -885,7 +886,7 @@ datahub/sql_parsing/datajob.py,sha256=1X8KpEk-y3_8xJuA_Po27EHZgOcxK9QADI6Om9gSGn
885
886
  datahub/sql_parsing/query_types.py,sha256=FKjDzszZzsrCfYfm7dgD6T_8865qxWl767fdGyHWBh4,2720
886
887
  datahub/sql_parsing/schema_resolver.py,sha256=8dYz6pC3Y35pXBn41grOE2dKkSiSeLHOz-N138uWQg4,10796
887
888
  datahub/sql_parsing/split_statements.py,sha256=uZhAXLaRxDfmK0lPBW2oM_YVdJfSMhdgndnfd9iIXuA,5001
888
- datahub/sql_parsing/sql_parsing_aggregator.py,sha256=tRr5o9_dcMa8sfVyo6iOS2aHT2-gxC90ZXW5-QX7998,70121
889
+ datahub/sql_parsing/sql_parsing_aggregator.py,sha256=XNZWjeaRhzaT92mzsJZGJfYaxJENsyp5dSHTmL81RIc,70130
889
890
  datahub/sql_parsing/sql_parsing_common.py,sha256=h_V_m54hJ9EUh5kczq7cYOIeNeo4bgf0Px0H-Nq-UIg,2602
890
891
  datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
891
892
  datahub/sql_parsing/sqlglot_lineage.py,sha256=42n8yCmCt25bOR8fmq4n_nNubn5kLuw_Mx36SFC9Nj0,47460
@@ -918,6 +919,7 @@ datahub/utilities/delta.py,sha256=hkpF8W7Lvg2gUJBQR3mmIzOxsRQ6i5cchRPFlAVoV10,11
918
919
  datahub/utilities/docs_build.py,sha256=uFMK3z1d4BExpsrvguHunidbEDAzQ8hoOP7iQ0A_IVw,211
919
920
  datahub/utilities/file_backed_collections.py,sha256=B3gQS0isgbCM9cH3DEBzpA4PVixtSwr5vJoNGmEG-fg,21960
920
921
  datahub/utilities/global_warning_util.py,sha256=adrEl3WhetQ-bymrPINjd976ZFndhbvk3QosUYGsos8,261
922
+ datahub/utilities/groupby.py,sha256=pe6rP4ZCttYB98yjbs0Aey8C32aLb7rq-NJ_BFky0H4,524
921
923
  datahub/utilities/hive_schema_to_avro.py,sha256=1MP0a6FFVEYxLg_4lKF7hPxbHJJy0uRQYkML5zRwV3Q,11622
922
924
  datahub/utilities/is_pytest.py,sha256=2m9T4S9IIKhI5RfTqrB2ZmumzHocdxBHpM1HroWj2XQ,138
923
925
  datahub/utilities/logging_manager.py,sha256=bc-x5VZGvFUHT0HD-TF3Uz_nzw3dpKdJSbz6kjpAqAQ,10073
@@ -955,7 +957,7 @@ datahub/utilities/yaml_sync_utils.py,sha256=65IEe8quW3_zHCR8CyoDkZyopeZJazU-IyMr
955
957
  datahub/utilities/registries/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
956
958
  datahub/utilities/registries/domain_registry.py,sha256=0SfcZNop-PXBbl-AWw92vAyb28i0YXTr-TKdBwixmOw,2452
957
959
  datahub/utilities/urns/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
958
- datahub/utilities/urns/_urn_base.py,sha256=3JVe4u2fZvFwLCGaDwqDmEgbnveVVbX52cE4GleI634,10420
960
+ datahub/utilities/urns/_urn_base.py,sha256=phYous0_uLIYQ_rB-2YyqKjxonTL-69fXA7Js1WxQ-Q,10560
959
961
  datahub/utilities/urns/corp_group_urn.py,sha256=6H5Q6nZvAXu80IZBDCeM8xo_9ap9pgwtyi60QXx3hzY,75
960
962
  datahub/utilities/urns/corpuser_urn.py,sha256=h-Yh-9QRbtQOhxxzxEBc7skoavpGaKDKVNrsxSXZ1yQ,88
961
963
  datahub/utilities/urns/data_flow_urn.py,sha256=w1Z7ET1L1OtYD1w-xiUYtyCczsxZZ1l3LRyTRv5NdpE,73
@@ -990,8 +992,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
990
992
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
991
993
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
992
994
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
993
- acryl_datahub-0.15.0.3rc1.dist-info/METADATA,sha256=_OcIbELm2dNMPpLWj69rGVeCXvqg-Nw9OcomoonUAq0,173250
994
- acryl_datahub-0.15.0.3rc1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
995
- acryl_datahub-0.15.0.3rc1.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
996
- acryl_datahub-0.15.0.3rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
997
- acryl_datahub-0.15.0.3rc1.dist-info/RECORD,,
995
+ acryl_datahub-0.15.0.4.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
996
+ acryl_datahub-0.15.0.4.dist-info/METADATA,sha256=SZZl6M2VxjH8KB01qAcYfdnNiX2nH9Bus3ikPRvu5Bc,173373
997
+ acryl_datahub-0.15.0.4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
998
+ acryl_datahub-0.15.0.4.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
999
+ acryl_datahub-0.15.0.4.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1000
+ acryl_datahub-0.15.0.4.dist-info/RECORD,,
datahub/__init__.py CHANGED
@@ -3,7 +3,7 @@ import warnings
3
3
 
4
4
  # Published at https://pypi.org/project/acryl-datahub/.
5
5
  __package_name__ = "acryl-datahub"
6
- __version__ = "0.15.0.3rc1"
6
+ __version__ = "0.15.0.4"
7
7
 
8
8
 
9
9
  def is_dev_mode() -> bool:
@@ -0,0 +1,108 @@
1
+ import logging
2
+ from typing import Any, List
3
+
4
+ import click
5
+ import progressbar
6
+
7
+ from datahub.emitter.mcp import MetadataChangeProposalWrapper
8
+ from datahub.ingestion.graph.client import get_default_graph
9
+ from datahub.metadata.schema_classes import (
10
+ DomainsClass,
11
+ GlossaryTermAssociationClass,
12
+ OwnerClass,
13
+ OwnershipTypeClass,
14
+ TagAssociationClass,
15
+ )
16
+ from datahub.specific.dataset import DatasetPatchBuilder
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ @click.group()
22
+ def container() -> None:
23
+ """A group of commands to interact with containers in DataHub."""
24
+ pass
25
+
26
+
27
+ def apply_association_to_container(
28
+ container_urn: str,
29
+ association_urn: str,
30
+ association_type: str,
31
+ ) -> None:
32
+ """
33
+ Common function to add either tags, terms, domains, or owners to child datasets (for now).
34
+
35
+ Args:
36
+ container_urn: The URN of the container
37
+ association_urn: The URN of the tag, term, or user to apply
38
+ association_type: One of 'tag', 'term', 'domain' or 'owner'
39
+ """
40
+ urns: List[str] = []
41
+ graph = get_default_graph()
42
+ logger.info(f"Using {graph}")
43
+ urns.extend(
44
+ graph.get_urns_by_filter(
45
+ container=container_urn, batch_size=1000, entity_types=["dataset"]
46
+ )
47
+ )
48
+
49
+ all_patches: List[Any] = []
50
+ for urn in urns:
51
+ builder = DatasetPatchBuilder(urn)
52
+ patches: List[Any] = []
53
+ if association_type == "tag":
54
+ patches = builder.add_tag(TagAssociationClass(association_urn)).build()
55
+ elif association_type == "term":
56
+ patches = builder.add_term(
57
+ GlossaryTermAssociationClass(association_urn)
58
+ ).build()
59
+ elif association_type == "owner":
60
+ patches = builder.add_owner(
61
+ OwnerClass(
62
+ owner=association_urn,
63
+ type=OwnershipTypeClass.TECHNICAL_OWNER,
64
+ )
65
+ ).build()
66
+ elif association_type == "domain":
67
+ patches = [
68
+ MetadataChangeProposalWrapper(
69
+ entityUrn=urn,
70
+ aspect=DomainsClass(domains=[association_urn]),
71
+ )
72
+ ]
73
+ all_patches.extend(patches)
74
+ mcps_iter = progressbar.progressbar(all_patches, redirect_stdout=True)
75
+ for mcp in mcps_iter:
76
+ graph.emit(mcp)
77
+
78
+
79
+ @container.command()
80
+ @click.option("--container-urn", required=True, type=str)
81
+ @click.option("--tag-urn", required=True, type=str)
82
+ def tag(container_urn: str, tag_urn: str) -> None:
83
+ """Add patch to add a tag to all datasets in a container"""
84
+ apply_association_to_container(container_urn, tag_urn, "tag")
85
+
86
+
87
+ @container.command()
88
+ @click.option("--container-urn", required=True, type=str)
89
+ @click.option("--term-urn", required=True, type=str)
90
+ def term(container_urn: str, term_urn: str) -> None:
91
+ """Add patch to add a term to all datasets in a container"""
92
+ apply_association_to_container(container_urn, term_urn, "term")
93
+
94
+
95
+ @container.command()
96
+ @click.option("--container-urn", required=True, type=str)
97
+ @click.option("--owner-urn", required=True, type=str)
98
+ def owner(container_urn: str, owner_urn: str) -> None:
99
+ """Add patch to add a owner to all datasets in a container"""
100
+ apply_association_to_container(container_urn, owner_urn, "owner")
101
+
102
+
103
+ @container.command()
104
+ @click.option("--container-urn", required=True, type=str)
105
+ @click.option("--domain-urn", required=True, type=str)
106
+ def domain(container_urn: str, domain_urn: str) -> None:
107
+ """Add patch to add a domain to all datasets in a container"""
108
+ apply_association_to_container(container_urn, domain_urn, "domain")
@@ -1,11 +1,13 @@
1
1
  from typing import List, Type
2
2
 
3
+ from typing_extensions import LiteralString
3
4
 
4
- def get_enum_options(_class: Type[object]) -> List[str]:
5
+
6
+ def get_enum_options(class_: Type[object]) -> List[LiteralString]:
5
7
  """Get the valid values for an enum in the datahub.metadata.schema_classes module."""
6
8
 
7
9
  return [
8
10
  value
9
- for name, value in vars(_class).items()
11
+ for name, value in vars(class_).items()
10
12
  if not callable(value) and not name.startswith("_")
11
13
  ]
@@ -440,6 +440,10 @@ def can_add_aspect_to_snapshot(
440
440
 
441
441
 
442
442
  def can_add_aspect(mce: MetadataChangeEventClass, AspectType: Type[Aspect]) -> bool:
443
+ # TODO: This is specific to snapshot types. We have a more general method
444
+ # in `entity_supports_aspect`, which should be used instead. This method
445
+ # should be deprecated, and all usages should be replaced.
446
+
443
447
  SnapshotType = type(mce.proposedSnapshot)
444
448
 
445
449
  return can_add_aspect_to_snapshot(SnapshotType, AspectType)
@@ -90,6 +90,25 @@ class ContainerKey(DatahubKey):
90
90
  def as_urn(self) -> str:
91
91
  return make_container_urn(guid=self.guid())
92
92
 
93
+ def parent_key(self) -> Optional["ContainerKey"]:
94
+ # Find the immediate base class of self.
95
+ # This is a bit of a hack, but it works.
96
+ base_classes = self.__class__.__bases__
97
+ if len(base_classes) != 1:
98
+ # TODO: Raise a more specific error.
99
+ raise ValueError(
100
+ f"Unable to determine parent key for {self.__class__}: {self}"
101
+ )
102
+ base_class = base_classes[0]
103
+ if base_class is DatahubKey or base_class is ContainerKey:
104
+ return None
105
+
106
+ # We need to use `__dict__` instead of `pydantic.BaseModel.dict()`
107
+ # in order to include "excluded" fields e.g. `backcompat_env_as_instance`.
108
+ # Tricky: this only works because DatahubKey is a BaseModel and hence
109
+ # allows extra fields.
110
+ return base_class(**self.__dict__)
111
+
93
112
 
94
113
  # DEPRECATION: Keeping the `PlatformKey` name around for backwards compatibility.
95
114
  PlatformKey = ContainerKey
datahub/entrypoints.py CHANGED
@@ -14,6 +14,7 @@ from datahub.cli.cli_utils import (
14
14
  make_shim_command,
15
15
  )
16
16
  from datahub.cli.config_utils import DATAHUB_CONFIG_PATH, write_gms_config
17
+ from datahub.cli.container_cli import container
17
18
  from datahub.cli.delete_cli import delete
18
19
  from datahub.cli.docker_cli import docker
19
20
  from datahub.cli.env_utils import get_boolean_env_variable
@@ -180,6 +181,7 @@ datahub.add_command(properties)
180
181
  datahub.add_command(forms)
181
182
  datahub.add_command(datacontract)
182
183
  datahub.add_command(assertions)
184
+ datahub.add_command(container)
183
185
 
184
186
  try:
185
187
  from datahub.cli.lite_cli import lite
@@ -25,6 +25,8 @@ def config_class(config_cls: Type) -> Callable[[Type], Type]:
25
25
  # add the create method only if it has not been overridden from the base Source.create method
26
26
  cls.create = classmethod(default_create)
27
27
 
28
+ # TODO: Once we're on Python 3.10, we should call abc.update_abstractmethods here.
29
+
28
30
  return cls
29
31
 
30
32
  return wrapper
@@ -173,8 +173,10 @@ class PluginRegistry(Generic[T]):
173
173
 
174
174
  tp = self._ensure_not_lazy(key)
175
175
  if isinstance(tp, ModuleNotFoundError):
176
+ # TODO: Once we're on Python 3.11 (with PEP 678), we can use .add_note()
177
+ # to enrich the error instead of wrapping it.
176
178
  raise ConfigurationError(
177
- f"{key} is disabled; try running: pip install '{__package_name__}[{key}]'"
179
+ f"{key} is disabled due to a missing dependency: {tp.name}; try running `pip install '{__package_name__}[{key}]'`"
178
180
  ) from tp
179
181
  elif isinstance(tp, Exception):
180
182
  raise ConfigurationError(
@@ -110,6 +110,10 @@ class Sink(Generic[SinkConfig, SinkReportType], Closeable, metaclass=ABCMeta):
110
110
  self.__post_init__()
111
111
 
112
112
  def __post_init__(self) -> None:
113
+ """Hook called after the sink's main initialization is complete.
114
+
115
+ Sink subclasses can override this method to customize initialization.
116
+ """
113
117
  pass
114
118
 
115
119
  @classmethod
@@ -117,9 +121,17 @@ class Sink(Generic[SinkConfig, SinkReportType], Closeable, metaclass=ABCMeta):
117
121
  return cls(ctx, cls.get_config_class().parse_obj(config_dict))
118
122
 
119
123
  def handle_work_unit_start(self, workunit: WorkUnit) -> None:
124
+ """Called at the start of each new workunit.
125
+
126
+ This method is deprecated and will be removed in a future release.
127
+ """
120
128
  pass
121
129
 
122
130
  def handle_work_unit_end(self, workunit: WorkUnit) -> None:
131
+ """Called at the end of each workunit.
132
+
133
+ This method is deprecated and will be removed in a future release.
134
+ """
123
135
  pass
124
136
 
125
137
  @abstractmethod
@@ -404,8 +404,11 @@ class Source(Closeable, metaclass=ABCMeta):
404
404
  # Technically, this method should be abstract. However, the @config_class
405
405
  # decorator automatically generates a create method at runtime if one is
406
406
  # not defined. Python still treats the class as abstract because it thinks
407
- # the create method is missing. To avoid the class becoming abstract, we
408
- # can't make this method abstract.
407
+ # the create method is missing.
408
+ #
409
+ # Once we're on Python 3.10, we can use the abc.update_abstractmethods(cls)
410
+ # method in the config_class decorator. That would allow us to make this
411
+ # method abstract.
409
412
  raise NotImplementedError('sources must implement "create"')
410
413
 
411
414
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
@@ -738,11 +738,17 @@ class GlueSource(StatefulIngestionSourceBase):
738
738
  self,
739
739
  ) -> Tuple[List[Mapping[str, Any]], List[Dict]]:
740
740
  all_databases = [*self.get_all_databases()]
741
- all_tables = [
742
- tables
743
- for database in all_databases
744
- for tables in self.get_tables_from_database(database)
745
- ]
741
+ all_tables = []
742
+ for database in all_databases:
743
+ try:
744
+ for tables in self.get_tables_from_database(database):
745
+ all_tables.append(tables)
746
+ except Exception as e:
747
+ self.report.failure(
748
+ message="Failed to get tables from database",
749
+ context=database["Name"],
750
+ exc=e,
751
+ )
746
752
  return all_databases, all_tables
747
753
 
748
754
  def get_lineage_if_enabled(
@@ -1,11 +1,6 @@
1
1
  import logging
2
2
  import os
3
- from collections import defaultdict
4
- from typing import TYPE_CHECKING, Dict, Iterable, List, Optional
5
-
6
- if TYPE_CHECKING:
7
- from mypy_boto3_s3.service_resource import ObjectSummary
8
-
3
+ from typing import Optional
9
4
 
10
5
  S3_PREFIXES = ["s3://", "s3n://", "s3a://"]
11
6
 
@@ -73,21 +68,3 @@ def get_key_prefix(s3_uri: str) -> str:
73
68
  f"Not an S3 URI. Must start with one of the following prefixes: {str(S3_PREFIXES)}"
74
69
  )
75
70
  return strip_s3_prefix(s3_uri).split("/", maxsplit=1)[1]
76
-
77
-
78
- def group_s3_objects_by_dirname(
79
- s3_objects: Iterable["ObjectSummary"],
80
- ) -> Dict[str, List["ObjectSummary"]]:
81
- """
82
- Groups S3 objects by their directory name.
83
-
84
- If a s3_object in the root directory (i.e., s3://bucket/file.txt), it is grouped under '/'.
85
- """
86
- grouped_s3_objs = defaultdict(list)
87
- for obj in s3_objects:
88
- if "/" in obj.key:
89
- dirname = obj.key.rsplit("/", 1)[0]
90
- else:
91
- dirname = "/"
92
- grouped_s3_objs[dirname].append(obj)
93
- return grouped_s3_objs
@@ -2,7 +2,6 @@ import logging
2
2
  import re
3
3
  from base64 import b32decode
4
4
  from collections import defaultdict
5
- from itertools import groupby
6
5
  from typing import Dict, Iterable, List, Optional, Set, Type, Union, cast
7
6
 
8
7
  from google.cloud.bigquery.table import TableListItem
@@ -101,6 +100,7 @@ from datahub.metadata.schema_classes import (
101
100
  from datahub.metadata.urns import TagUrn
102
101
  from datahub.sql_parsing.schema_resolver import SchemaResolver
103
102
  from datahub.utilities.file_backed_collections import FileBackedDict
103
+ from datahub.utilities.groupby import groupby_unsorted
104
104
  from datahub.utilities.hive_schema_to_avro import (
105
105
  HiveColumnToAvroConverter,
106
106
  get_schema_fields_for_hive_column,
@@ -730,7 +730,7 @@ class BigQuerySchemaGenerator:
730
730
  foreign_keys: List[BigqueryTableConstraint] = list(
731
731
  filter(lambda x: x.type == "FOREIGN KEY", table.constraints)
732
732
  )
733
- for key, group in groupby(
733
+ for key, group in groupby_unsorted(
734
734
  foreign_keys,
735
735
  lambda x: f"{x.referenced_project_id}.{x.referenced_dataset}.{x.referenced_table_name}",
736
736
  ):
@@ -1,4 +1,3 @@
1
- import itertools
2
1
  import logging
3
2
  import re
4
3
  from abc import abstractmethod
@@ -111,6 +110,7 @@ from datahub.sql_parsing.sqlglot_utils import (
111
110
  parse_statements_and_pick,
112
111
  try_format_query,
113
112
  )
113
+ from datahub.utilities.groupby import groupby_unsorted
114
114
  from datahub.utilities.lossy_collections import LossyList
115
115
  from datahub.utilities.mapping import Constants, OperationProcessor
116
116
  from datahub.utilities.time import datetime_to_ts_millis
@@ -1929,7 +1929,7 @@ class DBTSourceBase(StatefulIngestionSourceBase):
1929
1929
  else None
1930
1930
  ),
1931
1931
  )
1932
- for downstream, upstreams in itertools.groupby(
1932
+ for downstream, upstreams in groupby_unsorted(
1933
1933
  node.upstream_cll, lambda x: x.downstream_col
1934
1934
  )
1935
1935
  ]
@@ -1322,14 +1322,14 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
1322
1322
  context=",".join(
1323
1323
  [
1324
1324
  dataset.name
1325
- for dataset in workspace.independent_datasets
1325
+ for dataset in workspace.independent_datasets.values()
1326
1326
  if dataset.name
1327
1327
  ]
1328
1328
  ),
1329
1329
  )
1330
1330
  return
1331
1331
 
1332
- for dataset in workspace.independent_datasets:
1332
+ for dataset in workspace.independent_datasets.values():
1333
1333
  yield from auto_workunit(
1334
1334
  stream=self.mapper.to_datahub_dataset(
1335
1335
  dataset=dataset,
@@ -1440,7 +1440,7 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
1440
1440
 
1441
1441
  yield from auto_workunit(self.emit_app(workspace=workspace))
1442
1442
 
1443
- for dashboard in workspace.dashboards:
1443
+ for dashboard in workspace.dashboards.values():
1444
1444
  try:
1445
1445
  # Fetch PowerBi users for dashboards
1446
1446
  dashboard.users = self.powerbi_client.get_dashboard_users(dashboard)
@@ -1459,7 +1459,7 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
1459
1459
  if wu is not None:
1460
1460
  yield wu
1461
1461
 
1462
- for report in workspace.reports:
1462
+ for report in workspace.reports.values():
1463
1463
  for work_unit in self.mapper.report_to_datahub_work_units(
1464
1464
  report, workspace
1465
1465
  ):