acryl-datahub 0.15.0.3rc1__py3-none-any.whl → 0.15.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- acryl_datahub-0.15.0.4.dist-info/LICENSE +202 -0
- {acryl_datahub-0.15.0.3rc1.dist-info → acryl_datahub-0.15.0.4.dist-info}/METADATA +2411 -2408
- {acryl_datahub-0.15.0.3rc1.dist-info → acryl_datahub-0.15.0.4.dist-info}/RECORD +36 -33
- datahub/__init__.py +1 -1
- datahub/cli/container_cli.py +108 -0
- datahub/emitter/enum_helpers.py +4 -2
- datahub/emitter/mce_builder.py +4 -0
- datahub/emitter/mcp_builder.py +19 -0
- datahub/entrypoints.py +2 -0
- datahub/ingestion/api/decorators.py +2 -0
- datahub/ingestion/api/registry.py +3 -1
- datahub/ingestion/api/sink.py +12 -0
- datahub/ingestion/api/source.py +5 -2
- datahub/ingestion/source/aws/glue.py +11 -5
- datahub/ingestion/source/aws/s3_util.py +1 -24
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +2 -2
- datahub/ingestion/source/dbt/dbt_common.py +2 -2
- datahub/ingestion/source/powerbi/powerbi.py +4 -4
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py +6 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +24 -18
- datahub/ingestion/source/s3/source.py +6 -2
- datahub/ingestion/source/slack/slack.py +6 -0
- datahub/ingestion/source/sql/hive_metastore.py +3 -3
- datahub/ingestion/source/sql/mssql/job_models.py +2 -2
- datahub/ingestion/source/sql/mssql/source.py +26 -11
- datahub/ingestion/source/sql/teradata.py +2 -2
- datahub/ingestion/source/tableau/tableau.py +23 -10
- datahub/metadata/_schema_classes.py +401 -401
- datahub/metadata/_urns/urn_defs.py +1857 -1408
- datahub/metadata/schema.avsc +16624 -16266
- datahub/sql_parsing/sql_parsing_aggregator.py +3 -3
- datahub/utilities/groupby.py +17 -0
- datahub/utilities/urns/_urn_base.py +6 -2
- {acryl_datahub-0.15.0.3rc1.dist-info → acryl_datahub-0.15.0.4.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.3rc1.dist-info → acryl_datahub-0.15.0.4.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.3rc1.dist-info → acryl_datahub-0.15.0.4.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
datahub/__init__.py,sha256=
|
|
1
|
+
datahub/__init__.py,sha256=ozMEEWnTPovcdtF1e_DLiI51HpnTCPkOeFwgJcbcXUs,573
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
|
-
datahub/entrypoints.py,sha256=
|
|
3
|
+
datahub/entrypoints.py,sha256=vbkUx_jVIkr_V4wtoQhOpledna-pD_tco1mloRnb7QY,8029
|
|
4
4
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
datahub/_codegen/aspect.py,sha256=PJRa-Z4ouXHq3OkulfyWhwZn-fFUBDK_UPvmqaWdbWk,1063
|
|
@@ -61,6 +61,7 @@ datahub/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
61
61
|
datahub/cli/check_cli.py,sha256=9dXNyzZayHeoFjwFjLkMVyx6DiCZfeESyI-sYtGA6bE,12850
|
|
62
62
|
datahub/cli/cli_utils.py,sha256=onbG7z9hIm0zCAm0a2ulTOsHC_NVkdIsbg__EMj02DQ,13540
|
|
63
63
|
datahub/cli/config_utils.py,sha256=yuXw7RzpRY5x_-MAoqWbv46qUkIeRNAJL4_OeJpYdBE,4879
|
|
64
|
+
datahub/cli/container_cli.py,sha256=8D73hLfTHsDg4Cedh_2x0utl7ppOeB1TUJVRgur-Crw,3624
|
|
64
65
|
datahub/cli/delete_cli.py,sha256=oQ4Yy6hxZHcl67MYJiQumLs_8QmFEj7SPZFzxFXvDk8,23481
|
|
65
66
|
datahub/cli/docker_check.py,sha256=rED4wHXqxcQ_qNFyIgFEZ85BHT9ZTE5YC-oUKqbRqi0,9432
|
|
66
67
|
datahub/cli/docker_cli.py,sha256=w9ZQMRVlHwfJI2XDe7mO0lwnT7-dZoK6tPadSMgwEM8,36493
|
|
@@ -111,12 +112,12 @@ datahub/configuration/validate_multiline_string.py,sha256=l9PF6_EAC_1lWxU_RWrvPB
|
|
|
111
112
|
datahub/configuration/yaml.py,sha256=dLmjCalPOjgdc7mmJxtlP7uOrIHZiAWxD1gwAFOdtUU,308
|
|
112
113
|
datahub/emitter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
113
114
|
datahub/emitter/aspect.py,sha256=ef0DVycqg-tRPurkYjc-5zknmLP2p2Y2RxP55WkvAEc,480
|
|
114
|
-
datahub/emitter/enum_helpers.py,sha256=
|
|
115
|
+
datahub/emitter/enum_helpers.py,sha256=QBOEUu_hDCvyL_v4ayNQV8XwJbf5zKyu0Xat0mI1Kgo,376
|
|
115
116
|
datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
|
|
116
117
|
datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
|
|
117
|
-
datahub/emitter/mce_builder.py,sha256=
|
|
118
|
+
datahub/emitter/mce_builder.py,sha256=9wjXG1WmWZUN7-_JdRJ5OcH8IPG0b3TGzxry4yscOR0,16545
|
|
118
119
|
datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
|
|
119
|
-
datahub/emitter/mcp_builder.py,sha256=
|
|
120
|
+
datahub/emitter/mcp_builder.py,sha256=_-d5o7RIwgtMMdr_9tg0oU5ta6lL4dqOie1a68WEjKg,11638
|
|
120
121
|
datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
|
|
121
122
|
datahub/emitter/request_helper.py,sha256=33ORG3S3OVy97_jlWBRn7yUM5XCIkRN6WSdJvN7Ofcg,670
|
|
122
123
|
datahub/emitter/rest_emitter.py,sha256=v-A4eR_GSbXg-dsUgHAMcUd68qNEF5KO2MYlyhAYn8I,17880
|
|
@@ -128,17 +129,17 @@ datahub/ingestion/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
|
|
|
128
129
|
datahub/ingestion/api/closeable.py,sha256=k12AT--s4GDtZ-po_rVm5QKgvGIDteeRPByZPIOfecA,599
|
|
129
130
|
datahub/ingestion/api/committable.py,sha256=4S6GuBzvX2vb1A8P506NbspOKfZ1621sBG8t0lvRb8o,886
|
|
130
131
|
datahub/ingestion/api/common.py,sha256=nJVL8YdvokYFajOjmVpSNlLbZJ5iVOFS4KJDlGtJ_jc,2735
|
|
131
|
-
datahub/ingestion/api/decorators.py,sha256=
|
|
132
|
+
datahub/ingestion/api/decorators.py,sha256=b9bxHXlqCLDgqrVdPU6WNQg1koZcK62AkZ9vNwvWeK4,4029
|
|
132
133
|
datahub/ingestion/api/global_context.py,sha256=OdSJg4a_RKE52nu8MSiEkK2UqRRDhDTyOleHEAzPKho,575
|
|
133
134
|
datahub/ingestion/api/incremental_lineage_helper.py,sha256=JTmJvXzzwI04oTUTIeTKKscT_hjnr8nW34NFWJvCXDc,5871
|
|
134
135
|
datahub/ingestion/api/incremental_properties_helper.py,sha256=KzdxdrQtaMV2XMHfPsCtRa7ffDGPA1w1hgPUjeenZBU,2514
|
|
135
136
|
datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py,sha256=3lLdkkxVqE9MVc26cdXImPeWy16az5BwgcorWxeBV50,1759
|
|
136
137
|
datahub/ingestion/api/pipeline_run_listener.py,sha256=5uBP__LbMQxJ2utlf07cIzQINqPbUOKiZyOJta6a0og,713
|
|
137
|
-
datahub/ingestion/api/registry.py,sha256=
|
|
138
|
+
datahub/ingestion/api/registry.py,sha256=nigH50Qzz-imLrfapsuGjbjdUphGzwjT_5jUoQUlt9I,7445
|
|
138
139
|
datahub/ingestion/api/report.py,sha256=eM_TWWz6iJNd-c_S2_4eg2qKLGYP8vSROb_TMiCwBhY,4644
|
|
139
140
|
datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
|
|
140
|
-
datahub/ingestion/api/sink.py,sha256=
|
|
141
|
-
datahub/ingestion/api/source.py,sha256=
|
|
141
|
+
datahub/ingestion/api/sink.py,sha256=nfal7nsYY1AT2WQRjqO48uAHitpjax7TsRVzYXnqbeM,4918
|
|
142
|
+
datahub/ingestion/api/source.py,sha256=kjR7qL0rchi4S_mSP-j5yE9MLD7gnbN1wdNvUU-Lna4,19154
|
|
142
143
|
datahub/ingestion/api/source_helpers.py,sha256=FvtTEGktO_x8TKqkAvtdR7rwi4A7efb8vb9TSt_zz5E,19644
|
|
143
144
|
datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
|
|
144
145
|
datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL74lo,6327
|
|
@@ -217,9 +218,9 @@ datahub/ingestion/source/abs/report.py,sha256=fzkTdTewYlWrTk4f2Cyl-e8RV4qw9wEVtm
|
|
|
217
218
|
datahub/ingestion/source/abs/source.py,sha256=cuMezUzr-Smp5tok2ceYor5I5jp52NDMjfeN8kfIbvg,24816
|
|
218
219
|
datahub/ingestion/source/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
219
220
|
datahub/ingestion/source/aws/aws_common.py,sha256=DfdQgkJ_s2isFx8WvqKTlAcBk4KE8SgfpmA5BgC3fgY,17716
|
|
220
|
-
datahub/ingestion/source/aws/glue.py,sha256=
|
|
221
|
+
datahub/ingestion/source/aws/glue.py,sha256=qwkZMcbBlHIdhhuRj-gHNYMeuMADrvaHcN3gik0n_08,57919
|
|
221
222
|
datahub/ingestion/source/aws/s3_boto_utils.py,sha256=Y54jlLV5gLcuZ4Zs57kIW5dYHD89RSFfsVNlFbRnSkQ,3901
|
|
222
|
-
datahub/ingestion/source/aws/s3_util.py,sha256=
|
|
223
|
+
datahub/ingestion/source/aws/s3_util.py,sha256=OFypcgmVC6jnZM90-gjcPpAMtTV1lbnreCaMhCzNlzs,2149
|
|
223
224
|
datahub/ingestion/source/aws/sagemaker.py,sha256=Bl2tkBYnrindgx61VHYgNovUF_Kp_fXNcivQn28vC2w,5254
|
|
224
225
|
datahub/ingestion/source/aws/sagemaker_processors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
225
226
|
datahub/ingestion/source/aws/sagemaker_processors/common.py,sha256=NvYfI8LHgDvhEZE7qp6qF1NSZ0_SQKhg3ivtdjsdpFg,2172
|
|
@@ -243,7 +244,7 @@ datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256
|
|
|
243
244
|
datahub/ingestion/source/bigquery_v2/bigquery_queries.py,sha256=EoHo9twb0_QdX7Nvd1HJC1Yn0rqtrfR52EVk7Hu3XOQ,3296
|
|
244
245
|
datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=qH8k8wyMlUVzUTVhSd3FgOMGCK1D5NYuC0KF8tez_Ys,7957
|
|
245
246
|
datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=E5GOx4NWjyZM0xzdpBlNXbvDdKNfW9UtS64XtCYFpzI,31809
|
|
246
|
-
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=
|
|
247
|
+
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=c1hlsgat7l27fQN8GwvHkdme7rQ4LqIQKFwwA8z7kqw,50824
|
|
247
248
|
datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
|
|
248
249
|
datahub/ingestion/source/bigquery_v2/common.py,sha256=Cxjf1a8ibkL_YRQeS0BqsjlyMgFJpaZ3iq_d7e8T8MQ,4030
|
|
249
250
|
datahub/ingestion/source/bigquery_v2/lineage.py,sha256=Dkig1SEfPxw6zZDeSulUYnqsu4WGCVPXypGPEUVriyU,44907
|
|
@@ -274,7 +275,7 @@ datahub/ingestion/source/datahub/report.py,sha256=VHBfCbwFRzdLdB7hQG9ST4EiZxl_vB
|
|
|
274
275
|
datahub/ingestion/source/datahub/state.py,sha256=PZoT7sSK1wadVf5vN6phrgr7I6LL7ePP-EJjP1OO0bQ,3507
|
|
275
276
|
datahub/ingestion/source/dbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
276
277
|
datahub/ingestion/source/dbt/dbt_cloud.py,sha256=tNpSHbPlLq-oFGbJsdkWY9kIaWmpjcZLWhj1CSewGGY,17981
|
|
277
|
-
datahub/ingestion/source/dbt/dbt_common.py,sha256=
|
|
278
|
+
datahub/ingestion/source/dbt/dbt_common.py,sha256=y4VINaQQ-WhEf-rICGLGi1U88nKmRdVQPmh88OJROWg,80536
|
|
278
279
|
datahub/ingestion/source/dbt/dbt_core.py,sha256=m6cA9vVd4Nh2arc-T2_xeQoxvreRbMhTDIJuYsx3wHc,22722
|
|
279
280
|
datahub/ingestion/source/dbt/dbt_tests.py,sha256=Q5KISW_AOOWqyxmyOgJQquyX7xlfOqKu9WhrHoLKC0M,9881
|
|
280
281
|
datahub/ingestion/source/delta_lake/__init__.py,sha256=u5oqUeus81ONAtdl6o9Puw33ODSMun-0wLIamrZ4BUM,71
|
|
@@ -359,7 +360,7 @@ datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
|
359
360
|
datahub/ingestion/source/powerbi/config.py,sha256=CzG-kdcGqB0nYnQ8W40Anb1gsbMZ5TcF_dL_I02xfrE,22764
|
|
360
361
|
datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=-njW1kJOy-LY5JFwJLhVQ0bMBj9NQz5TZhQqsSi_KsM,2285
|
|
361
362
|
datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=5df3qvalCS9hZ46DPXs6XDcw9-IofGf8Eol_rUC7LHI,20329
|
|
362
|
-
datahub/ingestion/source/powerbi/powerbi.py,sha256=
|
|
363
|
+
datahub/ingestion/source/powerbi/powerbi.py,sha256=xCNMgL-KuPGpIFv_PP1woyiddY_PpbX1HEl3aDk7F1c,54535
|
|
363
364
|
datahub/ingestion/source/powerbi/m_query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
364
365
|
datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=EbaEasEOGZ73jz0cQofH9ez65wSvRBof0R6GQaIVLnM,2009
|
|
365
366
|
datahub/ingestion/source/powerbi/m_query/native_sql_parser.py,sha256=zzKVDGeUM3Yv3-zNah4D6mSnr6jXsstNuLmzczcPQEE,3683
|
|
@@ -369,9 +370,9 @@ datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=t0n1dDYjlzElSJo5ztea
|
|
|
369
370
|
datahub/ingestion/source/powerbi/m_query/tree_function.py,sha256=h77DunhlgOP0fAg8UXDXxxInOi7Pay85_d1Ca4YqyKs,6134
|
|
370
371
|
datahub/ingestion/source/powerbi/m_query/validator.py,sha256=crG-VZy2XPieiDliP9yVMgiFcc8b2xbZyDFEATXqEAQ,1155
|
|
371
372
|
datahub/ingestion/source/powerbi/rest_api_wrapper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
372
|
-
datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py,sha256=
|
|
373
|
+
datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py,sha256=kS337FgY-fLPjeRryQ-adVm1VAEThI88svii2Q9sGTc,8435
|
|
373
374
|
datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py,sha256=eNKW9ShWJ5F3pKgTVQ6xc1H1rl-JBIy9ye1pq5C2Kb0,39598
|
|
374
|
-
datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py,sha256=
|
|
375
|
+
datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py,sha256=k8rP2uwXb6maS7VzprUcqr2ggjimz0tILVJezze0jyA,26441
|
|
375
376
|
datahub/ingestion/source/powerbi/rest_api_wrapper/profiling_utils.py,sha256=bgcPheyqOj6KdRjDyANDK5yggItglcBIjbGFIwAxSds,1392
|
|
376
377
|
datahub/ingestion/source/powerbi/rest_api_wrapper/query.py,sha256=VNw1Uvli6g0pnu9FpigYmnCdEPbVEipz7vdZU_WmHf4,616
|
|
377
378
|
datahub/ingestion/source/powerbi_report_server/__init__.py,sha256=N9fGcrHXBbuPmx9rpGjd_jkMC3smXmfiwISDP1QZapk,324
|
|
@@ -403,7 +404,7 @@ datahub/ingestion/source/s3/config.py,sha256=Zs1nrBZKLImteZreIcSMMRLj8vBGgxakNDs
|
|
|
403
404
|
datahub/ingestion/source/s3/datalake_profiler_config.py,sha256=FfrcgK-JEF94vw-l3q6pN6FENXb-wZzW2w1VUZVkwW8,3620
|
|
404
405
|
datahub/ingestion/source/s3/profiling.py,sha256=yKNCKpr6w7qpCH-baeSkNE9VjkN6eBot_weD-2_Jxzk,17579
|
|
405
406
|
datahub/ingestion/source/s3/report.py,sha256=fzkTdTewYlWrTk4f2Cyl-e8RV4qw9wEVtm0cdKD-Xgw,542
|
|
406
|
-
datahub/ingestion/source/s3/source.py,sha256=
|
|
407
|
+
datahub/ingestion/source/s3/source.py,sha256=IE_K_HE_S7w8fpGPT8OptU5-VmwapntsI5PePv_wUQA,47412
|
|
407
408
|
datahub/ingestion/source/sac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
408
409
|
datahub/ingestion/source/sac/sac.py,sha256=zPSO9ukuyhvNaaVzeAYpA-_sFma_XMcCQMPaGvDWuTk,30226
|
|
409
410
|
datahub/ingestion/source/sac/sac_common.py,sha256=-xQTDBtgH56AnpRXWGDnlmQqUuLRx-7wF1U1kQFWtX8,998
|
|
@@ -423,7 +424,7 @@ datahub/ingestion/source/sigma/data_classes.py,sha256=YZkkzwftV34mq5c_4jlC2PCSiR
|
|
|
423
424
|
datahub/ingestion/source/sigma/sigma.py,sha256=T-zAgbEw83JSu_4j1gCYibSCaRLXjY3Kt6HdYPEZAFA,24096
|
|
424
425
|
datahub/ingestion/source/sigma/sigma_api.py,sha256=SVvbUs2vjueUdDa-3FzeMsaX5pNpApVI192P7EZzPcI,17870
|
|
425
426
|
datahub/ingestion/source/slack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
426
|
-
datahub/ingestion/source/slack/slack.py,sha256=
|
|
427
|
+
datahub/ingestion/source/slack/slack.py,sha256=VpLS-6zuQa8hIuHnZhLf8wRdN72Xell3ZMd0kK3A0i8,13188
|
|
427
428
|
datahub/ingestion/source/snowflake/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
428
429
|
datahub/ingestion/source/snowflake/constants.py,sha256=22n-0r04nuy-ImxWFFpmbrt_GrNdxV9WZKri7rmtrpQ,2628
|
|
429
430
|
datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
|
|
@@ -452,7 +453,7 @@ datahub/ingestion/source/sql/cockroachdb.py,sha256=XaD7eae34plU9ISRC6PzYX9q6RdT2
|
|
|
452
453
|
datahub/ingestion/source/sql/druid.py,sha256=lhO9CCOlHV-6LjBuAxAxtB9I1pvPtsGSdr63bz6_ilA,2837
|
|
453
454
|
datahub/ingestion/source/sql/hana.py,sha256=0PIvcX0Rz59NyR7Ag5Bv1MBV_UbJwxl9UAopo_xe_CA,1342
|
|
454
455
|
datahub/ingestion/source/sql/hive.py,sha256=NRUrEWnR1JN5U0q4CHlRacdKzxJhS4unFXnXYZT7vZE,30306
|
|
455
|
-
datahub/ingestion/source/sql/hive_metastore.py,sha256=
|
|
456
|
+
datahub/ingestion/source/sql/hive_metastore.py,sha256=65DI0PeJMpGOEhTfo6cygeybgaFqi93yGnLLRy58ATo,36117
|
|
456
457
|
datahub/ingestion/source/sql/mariadb.py,sha256=Hm102kmfs_1rd4lsTYhzVMZq5S3B6cyfvpHSzJjqvMw,737
|
|
457
458
|
datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_k94o7g-0,3350
|
|
458
459
|
datahub/ingestion/source/sql/oracle.py,sha256=tVP3AiZO97psM8O8UzBb9C7__s8y4fkyQbXBv3m1LU4,24503
|
|
@@ -467,13 +468,13 @@ datahub/ingestion/source/sql/sql_types.py,sha256=uuU3taVe4oCTXkqg1wSMGzTwVleRyUR
|
|
|
467
468
|
datahub/ingestion/source/sql/sql_utils.py,sha256=q-Bsk6WxlsRtrw9RXBxvqI3zuaMTC_F25T2VrCziR9I,8418
|
|
468
469
|
datahub/ingestion/source/sql/sqlalchemy_data_reader.py,sha256=FvHZ4JEK3aR2DYOBZiT_ZsAy12RjTu4t_KIR_92B11k,2644
|
|
469
470
|
datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py,sha256=KOpbmDIE2h1hyYEsbVHJi2B7FlsyUMTXZx4diyzltQg,1826
|
|
470
|
-
datahub/ingestion/source/sql/teradata.py,sha256=
|
|
471
|
+
datahub/ingestion/source/sql/teradata.py,sha256=5lTNMOOOmrG71fTAyTs7iYFroeTiGIdATwXQmH6sWJg,32741
|
|
471
472
|
datahub/ingestion/source/sql/trino.py,sha256=FEn_BQ3pm23hKx94ek5kk5IXGNYcBqZEhllRJFUzfU8,17895
|
|
472
473
|
datahub/ingestion/source/sql/two_tier_sql_source.py,sha256=YDrGBb5WKVls6qv17QU5foKrf71SydzEltc3WsVAhQc,5732
|
|
473
474
|
datahub/ingestion/source/sql/vertica.py,sha256=_9OgSgIgqBml0av063rb8nACiT3SAmzpw0ouyF91wv8,33382
|
|
474
475
|
datahub/ingestion/source/sql/mssql/__init__.py,sha256=1agpl8S_uDW40olkhCX_W19dbr5GO9qgjS3R7pLRZSk,87
|
|
475
|
-
datahub/ingestion/source/sql/mssql/job_models.py,sha256=
|
|
476
|
-
datahub/ingestion/source/sql/mssql/source.py,sha256=
|
|
476
|
+
datahub/ingestion/source/sql/mssql/job_models.py,sha256=tiACTVNAo3WXT-JXZfpBG5UyhflrLGi1cyS8mAUL9Yw,7107
|
|
477
|
+
datahub/ingestion/source/sql/mssql/source.py,sha256=UUlIvdINDzJ7BODsNaMMXGOA3LeYKT26wVRmmFYxujs,31631
|
|
477
478
|
datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py,sha256=RpnvKPalAAaOD_eUg8bZ4VkGTSeLFWuy0mefwc4s3x8,2837
|
|
478
479
|
datahub/ingestion/source/state/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
479
480
|
datahub/ingestion/source/state/checkpoint.py,sha256=-fTUZKkY4nHTFqSWZ0jJkkdIu_tWlOjRNhm4FTr4ul4,8860
|
|
@@ -491,7 +492,7 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
|
|
|
491
492
|
datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=DziD57PbHn2Tcy51tYXCG-GQgyTGMUxnkuzVS_xihFY,4079
|
|
492
493
|
datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
|
|
493
494
|
datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
494
|
-
datahub/ingestion/source/tableau/tableau.py,sha256=
|
|
495
|
+
datahub/ingestion/source/tableau/tableau.py,sha256=YjWyzYZ7hGeMxlqAUQNNJ9LJXlHrc5fHqf7lBWkr1aE,153184
|
|
495
496
|
datahub/ingestion/source/tableau/tableau_common.py,sha256=3AUgXxTGOKM609xvcDrRItGXhUfuNYku2LFaj8z2Hg4,26936
|
|
496
497
|
datahub/ingestion/source/tableau/tableau_constant.py,sha256=ZcAeHsQUXVVL26ORly0ByZk_GJAFbxaKuJAlX_sYMac,2686
|
|
497
498
|
datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=nSyx9RzC6TCQDm-cTVJ657qT8iDwzk_8JMKpohhmOc4,1046
|
|
@@ -566,12 +567,12 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
|
|
|
566
567
|
datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
|
|
567
568
|
datahub/lite/lite_util.py,sha256=pgBpT3vTO1YCQ2njZRNyicSkHYeEmQCt41BaXU8WvMo,4503
|
|
568
569
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
569
|
-
datahub/metadata/_schema_classes.py,sha256=
|
|
570
|
-
datahub/metadata/schema.avsc,sha256=
|
|
570
|
+
datahub/metadata/_schema_classes.py,sha256=GMLN7Ov0m39EWaXlziVgINqxhihZDzNy2BztBIR9YM8,975061
|
|
571
|
+
datahub/metadata/schema.avsc,sha256=sAPtgHSNJ1a126Vz7OjVIMKFjrrIG9f4cvRH6SkJ0jc,640786
|
|
571
572
|
datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
|
|
572
573
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
573
574
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
574
|
-
datahub/metadata/_urns/urn_defs.py,sha256=
|
|
575
|
+
datahub/metadata/_urns/urn_defs.py,sha256=SoCD7TNdGcPKI9vD8ZXLZzxggJTHtvRe4Jgaj_Mm2x8,132110
|
|
575
576
|
datahub/metadata/com/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
|
|
576
577
|
datahub/metadata/com/linkedin/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
|
|
577
578
|
datahub/metadata/com/linkedin/events/__init__.py,sha256=s_dR0plZF-rOxxIbE8ojekJqwiHzl2WYR-Z3kW6kKS0,298
|
|
@@ -885,7 +886,7 @@ datahub/sql_parsing/datajob.py,sha256=1X8KpEk-y3_8xJuA_Po27EHZgOcxK9QADI6Om9gSGn
|
|
|
885
886
|
datahub/sql_parsing/query_types.py,sha256=FKjDzszZzsrCfYfm7dgD6T_8865qxWl767fdGyHWBh4,2720
|
|
886
887
|
datahub/sql_parsing/schema_resolver.py,sha256=8dYz6pC3Y35pXBn41grOE2dKkSiSeLHOz-N138uWQg4,10796
|
|
887
888
|
datahub/sql_parsing/split_statements.py,sha256=uZhAXLaRxDfmK0lPBW2oM_YVdJfSMhdgndnfd9iIXuA,5001
|
|
888
|
-
datahub/sql_parsing/sql_parsing_aggregator.py,sha256=
|
|
889
|
+
datahub/sql_parsing/sql_parsing_aggregator.py,sha256=XNZWjeaRhzaT92mzsJZGJfYaxJENsyp5dSHTmL81RIc,70130
|
|
889
890
|
datahub/sql_parsing/sql_parsing_common.py,sha256=h_V_m54hJ9EUh5kczq7cYOIeNeo4bgf0Px0H-Nq-UIg,2602
|
|
890
891
|
datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
|
|
891
892
|
datahub/sql_parsing/sqlglot_lineage.py,sha256=42n8yCmCt25bOR8fmq4n_nNubn5kLuw_Mx36SFC9Nj0,47460
|
|
@@ -918,6 +919,7 @@ datahub/utilities/delta.py,sha256=hkpF8W7Lvg2gUJBQR3mmIzOxsRQ6i5cchRPFlAVoV10,11
|
|
|
918
919
|
datahub/utilities/docs_build.py,sha256=uFMK3z1d4BExpsrvguHunidbEDAzQ8hoOP7iQ0A_IVw,211
|
|
919
920
|
datahub/utilities/file_backed_collections.py,sha256=B3gQS0isgbCM9cH3DEBzpA4PVixtSwr5vJoNGmEG-fg,21960
|
|
920
921
|
datahub/utilities/global_warning_util.py,sha256=adrEl3WhetQ-bymrPINjd976ZFndhbvk3QosUYGsos8,261
|
|
922
|
+
datahub/utilities/groupby.py,sha256=pe6rP4ZCttYB98yjbs0Aey8C32aLb7rq-NJ_BFky0H4,524
|
|
921
923
|
datahub/utilities/hive_schema_to_avro.py,sha256=1MP0a6FFVEYxLg_4lKF7hPxbHJJy0uRQYkML5zRwV3Q,11622
|
|
922
924
|
datahub/utilities/is_pytest.py,sha256=2m9T4S9IIKhI5RfTqrB2ZmumzHocdxBHpM1HroWj2XQ,138
|
|
923
925
|
datahub/utilities/logging_manager.py,sha256=bc-x5VZGvFUHT0HD-TF3Uz_nzw3dpKdJSbz6kjpAqAQ,10073
|
|
@@ -955,7 +957,7 @@ datahub/utilities/yaml_sync_utils.py,sha256=65IEe8quW3_zHCR8CyoDkZyopeZJazU-IyMr
|
|
|
955
957
|
datahub/utilities/registries/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
956
958
|
datahub/utilities/registries/domain_registry.py,sha256=0SfcZNop-PXBbl-AWw92vAyb28i0YXTr-TKdBwixmOw,2452
|
|
957
959
|
datahub/utilities/urns/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
958
|
-
datahub/utilities/urns/_urn_base.py,sha256=
|
|
960
|
+
datahub/utilities/urns/_urn_base.py,sha256=phYous0_uLIYQ_rB-2YyqKjxonTL-69fXA7Js1WxQ-Q,10560
|
|
959
961
|
datahub/utilities/urns/corp_group_urn.py,sha256=6H5Q6nZvAXu80IZBDCeM8xo_9ap9pgwtyi60QXx3hzY,75
|
|
960
962
|
datahub/utilities/urns/corpuser_urn.py,sha256=h-Yh-9QRbtQOhxxzxEBc7skoavpGaKDKVNrsxSXZ1yQ,88
|
|
961
963
|
datahub/utilities/urns/data_flow_urn.py,sha256=w1Z7ET1L1OtYD1w-xiUYtyCczsxZZ1l3LRyTRv5NdpE,73
|
|
@@ -990,8 +992,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
990
992
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
991
993
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
992
994
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
993
|
-
acryl_datahub-0.15.0.
|
|
994
|
-
acryl_datahub-0.15.0.
|
|
995
|
-
acryl_datahub-0.15.0.
|
|
996
|
-
acryl_datahub-0.15.0.
|
|
997
|
-
acryl_datahub-0.15.0.
|
|
995
|
+
acryl_datahub-0.15.0.4.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
996
|
+
acryl_datahub-0.15.0.4.dist-info/METADATA,sha256=SZZl6M2VxjH8KB01qAcYfdnNiX2nH9Bus3ikPRvu5Bc,173373
|
|
997
|
+
acryl_datahub-0.15.0.4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
998
|
+
acryl_datahub-0.15.0.4.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
|
|
999
|
+
acryl_datahub-0.15.0.4.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1000
|
+
acryl_datahub-0.15.0.4.dist-info/RECORD,,
|
datahub/__init__.py
CHANGED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, List
|
|
3
|
+
|
|
4
|
+
import click
|
|
5
|
+
import progressbar
|
|
6
|
+
|
|
7
|
+
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
8
|
+
from datahub.ingestion.graph.client import get_default_graph
|
|
9
|
+
from datahub.metadata.schema_classes import (
|
|
10
|
+
DomainsClass,
|
|
11
|
+
GlossaryTermAssociationClass,
|
|
12
|
+
OwnerClass,
|
|
13
|
+
OwnershipTypeClass,
|
|
14
|
+
TagAssociationClass,
|
|
15
|
+
)
|
|
16
|
+
from datahub.specific.dataset import DatasetPatchBuilder
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@click.group()
|
|
22
|
+
def container() -> None:
|
|
23
|
+
"""A group of commands to interact with containers in DataHub."""
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def apply_association_to_container(
|
|
28
|
+
container_urn: str,
|
|
29
|
+
association_urn: str,
|
|
30
|
+
association_type: str,
|
|
31
|
+
) -> None:
|
|
32
|
+
"""
|
|
33
|
+
Common function to add either tags, terms, domains, or owners to child datasets (for now).
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
container_urn: The URN of the container
|
|
37
|
+
association_urn: The URN of the tag, term, or user to apply
|
|
38
|
+
association_type: One of 'tag', 'term', 'domain' or 'owner'
|
|
39
|
+
"""
|
|
40
|
+
urns: List[str] = []
|
|
41
|
+
graph = get_default_graph()
|
|
42
|
+
logger.info(f"Using {graph}")
|
|
43
|
+
urns.extend(
|
|
44
|
+
graph.get_urns_by_filter(
|
|
45
|
+
container=container_urn, batch_size=1000, entity_types=["dataset"]
|
|
46
|
+
)
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
all_patches: List[Any] = []
|
|
50
|
+
for urn in urns:
|
|
51
|
+
builder = DatasetPatchBuilder(urn)
|
|
52
|
+
patches: List[Any] = []
|
|
53
|
+
if association_type == "tag":
|
|
54
|
+
patches = builder.add_tag(TagAssociationClass(association_urn)).build()
|
|
55
|
+
elif association_type == "term":
|
|
56
|
+
patches = builder.add_term(
|
|
57
|
+
GlossaryTermAssociationClass(association_urn)
|
|
58
|
+
).build()
|
|
59
|
+
elif association_type == "owner":
|
|
60
|
+
patches = builder.add_owner(
|
|
61
|
+
OwnerClass(
|
|
62
|
+
owner=association_urn,
|
|
63
|
+
type=OwnershipTypeClass.TECHNICAL_OWNER,
|
|
64
|
+
)
|
|
65
|
+
).build()
|
|
66
|
+
elif association_type == "domain":
|
|
67
|
+
patches = [
|
|
68
|
+
MetadataChangeProposalWrapper(
|
|
69
|
+
entityUrn=urn,
|
|
70
|
+
aspect=DomainsClass(domains=[association_urn]),
|
|
71
|
+
)
|
|
72
|
+
]
|
|
73
|
+
all_patches.extend(patches)
|
|
74
|
+
mcps_iter = progressbar.progressbar(all_patches, redirect_stdout=True)
|
|
75
|
+
for mcp in mcps_iter:
|
|
76
|
+
graph.emit(mcp)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@container.command()
|
|
80
|
+
@click.option("--container-urn", required=True, type=str)
|
|
81
|
+
@click.option("--tag-urn", required=True, type=str)
|
|
82
|
+
def tag(container_urn: str, tag_urn: str) -> None:
|
|
83
|
+
"""Add patch to add a tag to all datasets in a container"""
|
|
84
|
+
apply_association_to_container(container_urn, tag_urn, "tag")
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@container.command()
|
|
88
|
+
@click.option("--container-urn", required=True, type=str)
|
|
89
|
+
@click.option("--term-urn", required=True, type=str)
|
|
90
|
+
def term(container_urn: str, term_urn: str) -> None:
|
|
91
|
+
"""Add patch to add a term to all datasets in a container"""
|
|
92
|
+
apply_association_to_container(container_urn, term_urn, "term")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@container.command()
|
|
96
|
+
@click.option("--container-urn", required=True, type=str)
|
|
97
|
+
@click.option("--owner-urn", required=True, type=str)
|
|
98
|
+
def owner(container_urn: str, owner_urn: str) -> None:
|
|
99
|
+
"""Add patch to add a owner to all datasets in a container"""
|
|
100
|
+
apply_association_to_container(container_urn, owner_urn, "owner")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
@container.command()
|
|
104
|
+
@click.option("--container-urn", required=True, type=str)
|
|
105
|
+
@click.option("--domain-urn", required=True, type=str)
|
|
106
|
+
def domain(container_urn: str, domain_urn: str) -> None:
|
|
107
|
+
"""Add patch to add a domain to all datasets in a container"""
|
|
108
|
+
apply_association_to_container(container_urn, domain_urn, "domain")
|
datahub/emitter/enum_helpers.py
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
from typing import List, Type
|
|
2
2
|
|
|
3
|
+
from typing_extensions import LiteralString
|
|
3
4
|
|
|
4
|
-
|
|
5
|
+
|
|
6
|
+
def get_enum_options(class_: Type[object]) -> List[LiteralString]:
|
|
5
7
|
"""Get the valid values for an enum in the datahub.metadata.schema_classes module."""
|
|
6
8
|
|
|
7
9
|
return [
|
|
8
10
|
value
|
|
9
|
-
for name, value in vars(
|
|
11
|
+
for name, value in vars(class_).items()
|
|
10
12
|
if not callable(value) and not name.startswith("_")
|
|
11
13
|
]
|
datahub/emitter/mce_builder.py
CHANGED
|
@@ -440,6 +440,10 @@ def can_add_aspect_to_snapshot(
|
|
|
440
440
|
|
|
441
441
|
|
|
442
442
|
def can_add_aspect(mce: MetadataChangeEventClass, AspectType: Type[Aspect]) -> bool:
|
|
443
|
+
# TODO: This is specific to snapshot types. We have a more general method
|
|
444
|
+
# in `entity_supports_aspect`, which should be used instead. This method
|
|
445
|
+
# should be deprecated, and all usages should be replaced.
|
|
446
|
+
|
|
443
447
|
SnapshotType = type(mce.proposedSnapshot)
|
|
444
448
|
|
|
445
449
|
return can_add_aspect_to_snapshot(SnapshotType, AspectType)
|
datahub/emitter/mcp_builder.py
CHANGED
|
@@ -90,6 +90,25 @@ class ContainerKey(DatahubKey):
|
|
|
90
90
|
def as_urn(self) -> str:
|
|
91
91
|
return make_container_urn(guid=self.guid())
|
|
92
92
|
|
|
93
|
+
def parent_key(self) -> Optional["ContainerKey"]:
|
|
94
|
+
# Find the immediate base class of self.
|
|
95
|
+
# This is a bit of a hack, but it works.
|
|
96
|
+
base_classes = self.__class__.__bases__
|
|
97
|
+
if len(base_classes) != 1:
|
|
98
|
+
# TODO: Raise a more specific error.
|
|
99
|
+
raise ValueError(
|
|
100
|
+
f"Unable to determine parent key for {self.__class__}: {self}"
|
|
101
|
+
)
|
|
102
|
+
base_class = base_classes[0]
|
|
103
|
+
if base_class is DatahubKey or base_class is ContainerKey:
|
|
104
|
+
return None
|
|
105
|
+
|
|
106
|
+
# We need to use `__dict__` instead of `pydantic.BaseModel.dict()`
|
|
107
|
+
# in order to include "excluded" fields e.g. `backcompat_env_as_instance`.
|
|
108
|
+
# Tricky: this only works because DatahubKey is a BaseModel and hence
|
|
109
|
+
# allows extra fields.
|
|
110
|
+
return base_class(**self.__dict__)
|
|
111
|
+
|
|
93
112
|
|
|
94
113
|
# DEPRECATION: Keeping the `PlatformKey` name around for backwards compatibility.
|
|
95
114
|
PlatformKey = ContainerKey
|
datahub/entrypoints.py
CHANGED
|
@@ -14,6 +14,7 @@ from datahub.cli.cli_utils import (
|
|
|
14
14
|
make_shim_command,
|
|
15
15
|
)
|
|
16
16
|
from datahub.cli.config_utils import DATAHUB_CONFIG_PATH, write_gms_config
|
|
17
|
+
from datahub.cli.container_cli import container
|
|
17
18
|
from datahub.cli.delete_cli import delete
|
|
18
19
|
from datahub.cli.docker_cli import docker
|
|
19
20
|
from datahub.cli.env_utils import get_boolean_env_variable
|
|
@@ -180,6 +181,7 @@ datahub.add_command(properties)
|
|
|
180
181
|
datahub.add_command(forms)
|
|
181
182
|
datahub.add_command(datacontract)
|
|
182
183
|
datahub.add_command(assertions)
|
|
184
|
+
datahub.add_command(container)
|
|
183
185
|
|
|
184
186
|
try:
|
|
185
187
|
from datahub.cli.lite_cli import lite
|
|
@@ -25,6 +25,8 @@ def config_class(config_cls: Type) -> Callable[[Type], Type]:
|
|
|
25
25
|
# add the create method only if it has not been overridden from the base Source.create method
|
|
26
26
|
cls.create = classmethod(default_create)
|
|
27
27
|
|
|
28
|
+
# TODO: Once we're on Python 3.10, we should call abc.update_abstractmethods here.
|
|
29
|
+
|
|
28
30
|
return cls
|
|
29
31
|
|
|
30
32
|
return wrapper
|
|
@@ -173,8 +173,10 @@ class PluginRegistry(Generic[T]):
|
|
|
173
173
|
|
|
174
174
|
tp = self._ensure_not_lazy(key)
|
|
175
175
|
if isinstance(tp, ModuleNotFoundError):
|
|
176
|
+
# TODO: Once we're on Python 3.11 (with PEP 678), we can use .add_note()
|
|
177
|
+
# to enrich the error instead of wrapping it.
|
|
176
178
|
raise ConfigurationError(
|
|
177
|
-
f"{key} is disabled; try running
|
|
179
|
+
f"{key} is disabled due to a missing dependency: {tp.name}; try running `pip install '{__package_name__}[{key}]'`"
|
|
178
180
|
) from tp
|
|
179
181
|
elif isinstance(tp, Exception):
|
|
180
182
|
raise ConfigurationError(
|
datahub/ingestion/api/sink.py
CHANGED
|
@@ -110,6 +110,10 @@ class Sink(Generic[SinkConfig, SinkReportType], Closeable, metaclass=ABCMeta):
|
|
|
110
110
|
self.__post_init__()
|
|
111
111
|
|
|
112
112
|
def __post_init__(self) -> None:
|
|
113
|
+
"""Hook called after the sink's main initialization is complete.
|
|
114
|
+
|
|
115
|
+
Sink subclasses can override this method to customize initialization.
|
|
116
|
+
"""
|
|
113
117
|
pass
|
|
114
118
|
|
|
115
119
|
@classmethod
|
|
@@ -117,9 +121,17 @@ class Sink(Generic[SinkConfig, SinkReportType], Closeable, metaclass=ABCMeta):
|
|
|
117
121
|
return cls(ctx, cls.get_config_class().parse_obj(config_dict))
|
|
118
122
|
|
|
119
123
|
def handle_work_unit_start(self, workunit: WorkUnit) -> None:
|
|
124
|
+
"""Called at the start of each new workunit.
|
|
125
|
+
|
|
126
|
+
This method is deprecated and will be removed in a future release.
|
|
127
|
+
"""
|
|
120
128
|
pass
|
|
121
129
|
|
|
122
130
|
def handle_work_unit_end(self, workunit: WorkUnit) -> None:
|
|
131
|
+
"""Called at the end of each workunit.
|
|
132
|
+
|
|
133
|
+
This method is deprecated and will be removed in a future release.
|
|
134
|
+
"""
|
|
123
135
|
pass
|
|
124
136
|
|
|
125
137
|
@abstractmethod
|
datahub/ingestion/api/source.py
CHANGED
|
@@ -404,8 +404,11 @@ class Source(Closeable, metaclass=ABCMeta):
|
|
|
404
404
|
# Technically, this method should be abstract. However, the @config_class
|
|
405
405
|
# decorator automatically generates a create method at runtime if one is
|
|
406
406
|
# not defined. Python still treats the class as abstract because it thinks
|
|
407
|
-
# the create method is missing.
|
|
408
|
-
#
|
|
407
|
+
# the create method is missing.
|
|
408
|
+
#
|
|
409
|
+
# Once we're on Python 3.10, we can use the abc.update_abstractmethods(cls)
|
|
410
|
+
# method in the config_class decorator. That would allow us to make this
|
|
411
|
+
# method abstract.
|
|
409
412
|
raise NotImplementedError('sources must implement "create"')
|
|
410
413
|
|
|
411
414
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
@@ -738,11 +738,17 @@ class GlueSource(StatefulIngestionSourceBase):
|
|
|
738
738
|
self,
|
|
739
739
|
) -> Tuple[List[Mapping[str, Any]], List[Dict]]:
|
|
740
740
|
all_databases = [*self.get_all_databases()]
|
|
741
|
-
all_tables = [
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
741
|
+
all_tables = []
|
|
742
|
+
for database in all_databases:
|
|
743
|
+
try:
|
|
744
|
+
for tables in self.get_tables_from_database(database):
|
|
745
|
+
all_tables.append(tables)
|
|
746
|
+
except Exception as e:
|
|
747
|
+
self.report.failure(
|
|
748
|
+
message="Failed to get tables from database",
|
|
749
|
+
context=database["Name"],
|
|
750
|
+
exc=e,
|
|
751
|
+
)
|
|
746
752
|
return all_databases, all_tables
|
|
747
753
|
|
|
748
754
|
def get_lineage_if_enabled(
|
|
@@ -1,11 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
|
-
from
|
|
4
|
-
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional
|
|
5
|
-
|
|
6
|
-
if TYPE_CHECKING:
|
|
7
|
-
from mypy_boto3_s3.service_resource import ObjectSummary
|
|
8
|
-
|
|
3
|
+
from typing import Optional
|
|
9
4
|
|
|
10
5
|
S3_PREFIXES = ["s3://", "s3n://", "s3a://"]
|
|
11
6
|
|
|
@@ -73,21 +68,3 @@ def get_key_prefix(s3_uri: str) -> str:
|
|
|
73
68
|
f"Not an S3 URI. Must start with one of the following prefixes: {str(S3_PREFIXES)}"
|
|
74
69
|
)
|
|
75
70
|
return strip_s3_prefix(s3_uri).split("/", maxsplit=1)[1]
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
def group_s3_objects_by_dirname(
|
|
79
|
-
s3_objects: Iterable["ObjectSummary"],
|
|
80
|
-
) -> Dict[str, List["ObjectSummary"]]:
|
|
81
|
-
"""
|
|
82
|
-
Groups S3 objects by their directory name.
|
|
83
|
-
|
|
84
|
-
If a s3_object in the root directory (i.e., s3://bucket/file.txt), it is grouped under '/'.
|
|
85
|
-
"""
|
|
86
|
-
grouped_s3_objs = defaultdict(list)
|
|
87
|
-
for obj in s3_objects:
|
|
88
|
-
if "/" in obj.key:
|
|
89
|
-
dirname = obj.key.rsplit("/", 1)[0]
|
|
90
|
-
else:
|
|
91
|
-
dirname = "/"
|
|
92
|
-
grouped_s3_objs[dirname].append(obj)
|
|
93
|
-
return grouped_s3_objs
|
|
@@ -2,7 +2,6 @@ import logging
|
|
|
2
2
|
import re
|
|
3
3
|
from base64 import b32decode
|
|
4
4
|
from collections import defaultdict
|
|
5
|
-
from itertools import groupby
|
|
6
5
|
from typing import Dict, Iterable, List, Optional, Set, Type, Union, cast
|
|
7
6
|
|
|
8
7
|
from google.cloud.bigquery.table import TableListItem
|
|
@@ -101,6 +100,7 @@ from datahub.metadata.schema_classes import (
|
|
|
101
100
|
from datahub.metadata.urns import TagUrn
|
|
102
101
|
from datahub.sql_parsing.schema_resolver import SchemaResolver
|
|
103
102
|
from datahub.utilities.file_backed_collections import FileBackedDict
|
|
103
|
+
from datahub.utilities.groupby import groupby_unsorted
|
|
104
104
|
from datahub.utilities.hive_schema_to_avro import (
|
|
105
105
|
HiveColumnToAvroConverter,
|
|
106
106
|
get_schema_fields_for_hive_column,
|
|
@@ -730,7 +730,7 @@ class BigQuerySchemaGenerator:
|
|
|
730
730
|
foreign_keys: List[BigqueryTableConstraint] = list(
|
|
731
731
|
filter(lambda x: x.type == "FOREIGN KEY", table.constraints)
|
|
732
732
|
)
|
|
733
|
-
for key, group in
|
|
733
|
+
for key, group in groupby_unsorted(
|
|
734
734
|
foreign_keys,
|
|
735
735
|
lambda x: f"{x.referenced_project_id}.{x.referenced_dataset}.{x.referenced_table_name}",
|
|
736
736
|
):
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import itertools
|
|
2
1
|
import logging
|
|
3
2
|
import re
|
|
4
3
|
from abc import abstractmethod
|
|
@@ -111,6 +110,7 @@ from datahub.sql_parsing.sqlglot_utils import (
|
|
|
111
110
|
parse_statements_and_pick,
|
|
112
111
|
try_format_query,
|
|
113
112
|
)
|
|
113
|
+
from datahub.utilities.groupby import groupby_unsorted
|
|
114
114
|
from datahub.utilities.lossy_collections import LossyList
|
|
115
115
|
from datahub.utilities.mapping import Constants, OperationProcessor
|
|
116
116
|
from datahub.utilities.time import datetime_to_ts_millis
|
|
@@ -1929,7 +1929,7 @@ class DBTSourceBase(StatefulIngestionSourceBase):
|
|
|
1929
1929
|
else None
|
|
1930
1930
|
),
|
|
1931
1931
|
)
|
|
1932
|
-
for downstream, upstreams in
|
|
1932
|
+
for downstream, upstreams in groupby_unsorted(
|
|
1933
1933
|
node.upstream_cll, lambda x: x.downstream_col
|
|
1934
1934
|
)
|
|
1935
1935
|
]
|
|
@@ -1322,14 +1322,14 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1322
1322
|
context=",".join(
|
|
1323
1323
|
[
|
|
1324
1324
|
dataset.name
|
|
1325
|
-
for dataset in workspace.independent_datasets
|
|
1325
|
+
for dataset in workspace.independent_datasets.values()
|
|
1326
1326
|
if dataset.name
|
|
1327
1327
|
]
|
|
1328
1328
|
),
|
|
1329
1329
|
)
|
|
1330
1330
|
return
|
|
1331
1331
|
|
|
1332
|
-
for dataset in workspace.independent_datasets:
|
|
1332
|
+
for dataset in workspace.independent_datasets.values():
|
|
1333
1333
|
yield from auto_workunit(
|
|
1334
1334
|
stream=self.mapper.to_datahub_dataset(
|
|
1335
1335
|
dataset=dataset,
|
|
@@ -1440,7 +1440,7 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1440
1440
|
|
|
1441
1441
|
yield from auto_workunit(self.emit_app(workspace=workspace))
|
|
1442
1442
|
|
|
1443
|
-
for dashboard in workspace.dashboards:
|
|
1443
|
+
for dashboard in workspace.dashboards.values():
|
|
1444
1444
|
try:
|
|
1445
1445
|
# Fetch PowerBi users for dashboards
|
|
1446
1446
|
dashboard.users = self.powerbi_client.get_dashboard_users(dashboard)
|
|
@@ -1459,7 +1459,7 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1459
1459
|
if wu is not None:
|
|
1460
1460
|
yield wu
|
|
1461
1461
|
|
|
1462
|
-
for report in workspace.reports:
|
|
1462
|
+
for report in workspace.reports.values():
|
|
1463
1463
|
for work_unit in self.mapper.report_to_datahub_work_units(
|
|
1464
1464
|
report, workspace
|
|
1465
1465
|
):
|