acryl-datahub 1.1.0.5rc1__py3-none-any.whl → 1.1.0.5rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (25) hide show
  1. {acryl_datahub-1.1.0.5rc1.dist-info → acryl_datahub-1.1.0.5rc3.dist-info}/METADATA +2601 -2601
  2. {acryl_datahub-1.1.0.5rc1.dist-info → acryl_datahub-1.1.0.5rc3.dist-info}/RECORD +25 -23
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/dataset/dataset.py +1 -1
  5. datahub/ingestion/api/decorators.py +1 -0
  6. datahub/ingestion/autogenerated/__init__.py +0 -0
  7. datahub/ingestion/source/cassandra/cassandra.py +1 -1
  8. datahub/ingestion/source/identity/azure_ad.py +1 -1
  9. datahub/ingestion/source/identity/okta.py +1 -1
  10. datahub/ingestion/source/mock_data/datahub_mock_data.py +6 -1
  11. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  12. datahub/ingestion/source/preset.py +1 -1
  13. datahub/ingestion/source/snowflake/snowflake_config.py +1 -0
  14. datahub/ingestion/source/snowflake/snowflake_queries.py +46 -32
  15. datahub/ingestion/source/snowflake/snowflake_v2.py +1 -1
  16. datahub/ingestion/source/sql/mssql/source.py +15 -15
  17. datahub/ingestion/source/sql/vertica.py +1 -1
  18. datahub/ingestion/source/state/stateful_ingestion_base.py +1 -1
  19. datahub/ingestion/source/superset.py +1 -1
  20. datahub/ingestion/source/unity/source.py +1 -1
  21. datahub/sql_parsing/sql_parsing_aggregator.py +5 -2
  22. {acryl_datahub-1.1.0.5rc1.dist-info → acryl_datahub-1.1.0.5rc3.dist-info}/WHEEL +0 -0
  23. {acryl_datahub-1.1.0.5rc1.dist-info → acryl_datahub-1.1.0.5rc3.dist-info}/entry_points.txt +0 -0
  24. {acryl_datahub-1.1.0.5rc1.dist-info → acryl_datahub-1.1.0.5rc3.dist-info}/licenses/LICENSE +0 -0
  25. {acryl_datahub-1.1.0.5rc1.dist-info → acryl_datahub-1.1.0.5rc3.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
- acryl_datahub-1.1.0.5rc1.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.1.0.5rc3.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=TZdOK9cFifb1yYky21qrv8R4D8Q8htQJWiKxMGKJpS0,323
4
+ datahub/_version.py,sha256=81T1DSxdYWctKOc3Yt4DKvZkQAVEO5Rw4fHuqucBno4,323
5
5
  datahub/entrypoints.py,sha256=H-YFTvxTJOgpWsFBVlxyb1opjkq-hjTzNmjy5Fq3RHg,8992
6
6
  datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -48,7 +48,7 @@ datahub/api/entities/dataprocess/dataprocess_instance.py,sha256=IhY-rcXs-r8EatwW
48
48
  datahub/api/entities/dataproduct/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
49
  datahub/api/entities/dataproduct/dataproduct.py,sha256=148TmItxDDyGNzfZdL8aDreSEtyAw79IN8N8oSmNOPE,21461
50
50
  datahub/api/entities/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
- datahub/api/entities/dataset/dataset.py,sha256=BLOn7o-A3OLD4D2JTq8vF_dwTTW9HHDnDwd6lhI1TMI,49512
51
+ datahub/api/entities/dataset/dataset.py,sha256=ed3-0moqef0Zgs_rwYxbskTa5BqmM2fH2_Z8U12wVRs,49526
52
52
  datahub/api/entities/external/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
53
  datahub/api/entities/external/external_entities.py,sha256=7HRt3Oz2sw_7BDxEVc6W7jK4UqHXLZuo_rGPJ5NdAoE,7727
54
54
  datahub/api/entities/external/external_tag.py,sha256=LKz4j2KnBvo8xhGLxA9g1DWtHfHUA7WqO2vOIsqwy44,4490
@@ -141,7 +141,7 @@ datahub/ingestion/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
141
141
  datahub/ingestion/api/closeable.py,sha256=k12AT--s4GDtZ-po_rVm5QKgvGIDteeRPByZPIOfecA,599
142
142
  datahub/ingestion/api/committable.py,sha256=4S6GuBzvX2vb1A8P506NbspOKfZ1621sBG8t0lvRb8o,886
143
143
  datahub/ingestion/api/common.py,sha256=26lXJiM4YfdnVH1xfe2bpZNp2VKCdJcJ8ynK7rhh0FY,3029
144
- datahub/ingestion/api/decorators.py,sha256=-0Kl9G_8DhmSP9Q0ryrWB3a-Odce8WTpvGETJOqRdgw,4015
144
+ datahub/ingestion/api/decorators.py,sha256=qsI2HkyruhVR7VU-K0sCZmpAR9kg49CDdL9Tu24QpsA,4016
145
145
  datahub/ingestion/api/global_context.py,sha256=OdSJg4a_RKE52nu8MSiEkK2UqRRDhDTyOleHEAzPKho,575
146
146
  datahub/ingestion/api/incremental_lineage_helper.py,sha256=7a6FTJ_uz4EEJS1vPtbYB2KvNlcZB3py28_FKxmRiSk,5993
147
147
  datahub/ingestion/api/incremental_properties_helper.py,sha256=KzdxdrQtaMV2XMHfPsCtRa7ffDGPA1w1hgPUjeenZBU,2514
@@ -158,6 +158,7 @@ datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL
158
158
  datahub/ingestion/api/auto_work_units/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
159
159
  datahub/ingestion/api/auto_work_units/auto_dataset_properties_aspect.py,sha256=ID_6N3nWl2qohsSGizUCqo3d2MNyDeVbyWroQpSOSsc,5059
160
160
  datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py,sha256=fMjPnyWEofIZV52E2AFYU3IgBJwyZvbygXxCJyEtcWI,4442
161
+ datahub/ingestion/autogenerated/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
161
162
  datahub/ingestion/extractor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
162
163
  datahub/ingestion/extractor/extractor_registry.py,sha256=f7CLfW3pr29QZkXSHbp7HjUrsdw7ejQJmot-tiSPcqc,342
163
164
  datahub/ingestion/extractor/json_ref_patch.py,sha256=4g3ZWHn7rwS74jUvSXJiGpi-UKHhiSYKKgBeU4E5ukE,1448
@@ -218,13 +219,13 @@ datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-
218
219
  datahub/ingestion/source/nifi.py,sha256=2jxSzuHIRQFF7DLoceGbUd_10lkhHRlqA9hhApSt3Yw,56882
219
220
  datahub/ingestion/source/openapi.py,sha256=VaR2xYaH1IhvRixpTBC7-168F74eIIyKiEKb5EqTO64,19253
220
221
  datahub/ingestion/source/openapi_parser.py,sha256=T87e2r-oPGgQl_FDMHnSGFZzApvWDCyKWnzIrVI5Alo,15420
221
- datahub/ingestion/source/preset.py,sha256=1goxuFoLw50dokr2gp1MhUrDJ8CFNX-wBZIvv7laEXA,3966
222
+ datahub/ingestion/source/preset.py,sha256=fncn-fgYcITsYEHVsvV6cGTQ9_xc_R06ejrw6ZbY3QA,3966
222
223
  datahub/ingestion/source/pulsar.py,sha256=u5F8QnCLJsht5-7XCiUTsnfhCPIpKVB_l32CgMCU-As,20187
223
224
  datahub/ingestion/source/redash.py,sha256=YxjSad-X_wPmxYH8dJmFz_VCFhiLTCTSlK99WdvcYiA,30653
224
225
  datahub/ingestion/source/salesforce.py,sha256=CQtDFv1OsbC1vyzNbKOc6GxhFQ5GdYj45hgAF0-oIcw,40487
225
226
  datahub/ingestion/source/source_registry.py,sha256=a2mLjJPLkSI-gYCTb_7U7Jo4D8jGknNQ_yScPIihXFk,1208
226
227
  datahub/ingestion/source/sql_queries.py,sha256=Ip7UZub7fgMh7P5jL_zJPY7lSkc9GGTy8GJ8lqZrcsE,9502
227
- datahub/ingestion/source/superset.py,sha256=dSXbsPj4_BY9O6esrJRt5WYcHj7QWoBk7PTfIFxS_Zw,48387
228
+ datahub/ingestion/source/superset.py,sha256=4wEjhBj_Zf__1EhXDE9ltbaR9NTMZVdXFviHLf5VFL4,48387
228
229
  datahub/ingestion/source/abs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
229
230
  datahub/ingestion/source/abs/config.py,sha256=mBQe0JTaP-Rcv4HnMUUySoYbSr4r3jDEMioxaXHnxXU,6709
230
231
  datahub/ingestion/source/abs/datalake_profiler_config.py,sha256=Rkf64evufyVGPiE4VK8QAjzBiJFu85tOGMmJ0lJZ2Og,3600
@@ -272,7 +273,7 @@ datahub/ingestion/source/bigquery_v2/queries.py,sha256=c1BpeQP8p8y-FOhmiQkkY2IqG
272
273
  datahub/ingestion/source/bigquery_v2/queries_extractor.py,sha256=_5cAXVU8b8T_nAPDsvN2JRd2dmM1t1J1mRylfKiPen4,19530
273
274
  datahub/ingestion/source/bigquery_v2/usage.py,sha256=A9c-ofclaRk0NSnc4IRaqJYqMPv6ecCld_TPy3V2qFs,40748
274
275
  datahub/ingestion/source/cassandra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
275
- datahub/ingestion/source/cassandra/cassandra.py,sha256=lKvPP0Uahi9xw_yh9cArPPtwvAauXolaEk-6f-jhpz4,14558
276
+ datahub/ingestion/source/cassandra/cassandra.py,sha256=k2EKpeUUZ0O_RwQ6NXM15vnTsY3OgST4e5LG8Eg6Coo,14534
276
277
  datahub/ingestion/source/cassandra/cassandra_api.py,sha256=b7MApc3_tEfHoj-6ub6snkcv_DweL1wi_TGJjAA1-yU,13516
277
278
  datahub/ingestion/source/cassandra/cassandra_config.py,sha256=Ga9915cDZukR5-u2tMNx5Jkf8eza2oAE5YS_sQIVEVQ,4222
278
279
  datahub/ingestion/source/cassandra/cassandra_profiling.py,sha256=ZqsAY8NFsrrLqOduV7Aem2eJLtc2_OU9tW4tc_dh0V8,10984
@@ -350,8 +351,8 @@ datahub/ingestion/source/iceberg/iceberg.py,sha256=BNDGooK9cmqpOjzkV1u4rpsduVPNW
350
351
  datahub/ingestion/source/iceberg/iceberg_common.py,sha256=CD_yHQ_wEgivyLQUTRO9BZJB29S7j5fUVllki-BPwUU,12292
351
352
  datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=9iwp2vpQTi4OMbIKoDZV5lAdvjMR0ls6Llpck9grJIE,9875
352
353
  datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
353
- datahub/ingestion/source/identity/azure_ad.py,sha256=9Hrvm4CSfc02yjnPUsCYSY4Qw9fXPnDFWLexab0mcpc,28559
354
- datahub/ingestion/source/identity/okta.py,sha256=jC21myJuMRTaPgj0OD9heaC-mz8ECjqpy2hSJwlUSwM,31943
354
+ datahub/ingestion/source/identity/azure_ad.py,sha256=V3z8PWvNHdb5rv_HTyx-mezX0pRUw3O3Z27MwvIhTpU,28559
355
+ datahub/ingestion/source/identity/okta.py,sha256=4t1F1xdPjWuPfy1esbh4FtfU9pl2rGdi7GosIRc7Dfc,31943
355
356
  datahub/ingestion/source/kafka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
356
357
  datahub/ingestion/source/kafka/kafka.py,sha256=CzuW6CNbBMlB9ILD2GvsNIbm8MbHd0_bRI-J3_CFyRs,23322
357
358
  datahub/ingestion/source/kafka/kafka_config.py,sha256=ijUB8PS5p-o3uLCHkAxAJAIM88s47rVaAUYXmi_lR4M,4406
@@ -387,7 +388,8 @@ datahub/ingestion/source/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
387
388
  datahub/ingestion/source/metadata/business_glossary.py,sha256=T_RJHst6iQRghJNmLLPeSBMEDsbEKf3yBldOAgMcGuo,19666
388
389
  datahub/ingestion/source/metadata/lineage.py,sha256=PA4JwSeQ-30XFMN4O5tPwIu-hZF1e-xMZ_CnEUE2c-Q,9595
389
390
  datahub/ingestion/source/mock_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
390
- datahub/ingestion/source/mock_data/datahub_mock_data.py,sha256=i19UFii3wOPG5CVQfV_20MwIRAJjr0TxsIiVjKcBND4,14314
391
+ datahub/ingestion/source/mock_data/datahub_mock_data.py,sha256=YQ0QUhzn1PyUPscS4sJOpo0pxRwmzXjcs2W0ZiPqsqI,14543
392
+ datahub/ingestion/source/mock_data/datahub_mock_data_report.py,sha256=sV_H7JgcuVbrpIBqtGse_BBigMdqP32ZXuanpeXmwVI,331
391
393
  datahub/ingestion/source/mock_data/table_naming_helper.py,sha256=oIC1vcOx76Vl63O9kcjP_iInBHyS-ATdN3Y932TCCZg,3283
392
394
  datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
393
395
  datahub/ingestion/source/neo4j/neo4j_source.py,sha256=JqKCwxBJfOrC8SF7CmDG0cseWxHk_7E2v4Diw3Q0-WM,14181
@@ -467,12 +469,12 @@ datahub/ingestion/source/snowflake/constants.py,sha256=XCW3vw4JfLn_s8-oXBX6WFNMP
467
469
  datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
468
470
  datahub/ingestion/source/snowflake/oauth_generator.py,sha256=fu2VnREGuJXeTqIV2jx4TwieVnznf83HQkrE0h2DGGM,3423
469
471
  datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81xxdeizJn9nJCZ_nMIXgk9N6pEk5o,4803
470
- datahub/ingestion/source/snowflake/snowflake_config.py,sha256=x73EnJHOrLWNjdiQ65HUgaToC0iAaFWmDtJSyufOl9A,20918
472
+ datahub/ingestion/source/snowflake/snowflake_config.py,sha256=LbDUzqKCXSMV6j0ClLGxAAMbxQFqsmmXjy0xgCVd7UE,20965
471
473
  datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=3-nP3HHCblUnUHYo_fvFp5VOAteCtR4GNjaUEvyNTNQ,18175
472
474
  datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
473
475
  datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=eWUlWMc5X2X_Y1I2peworFWLLsXQjryEHxPDuSqrowg,21683
474
476
  datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=PmQi-qDlRhdJ-PsJ7x-EScIiswWRAxDDOKHydvN3mTY,7404
475
- datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=PyuooQO6_OoUno3kzi7m_n1tv5V3GKSmJPXBk29pgWw,30480
477
+ datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=4ZNyBsFPGKatxT6B9CW-gEQt6rUMy2FkRX1seh6Ppog,31165
476
478
  datahub/ingestion/source/snowflake/snowflake_query.py,sha256=kqOxibplHyDhdioue8MeXBzeDS9d274-dspb-wyWMMI,38165
477
479
  datahub/ingestion/source/snowflake/snowflake_report.py,sha256=O-465aBA8uaYZ6WepP7i6cgK6Q1jXJPjDA1j9C8klus,6762
478
480
  datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=1yGBbs2aWIdHnrwgeTR7J2lqxbbBsIt8ejCLumIpLEA,27274
@@ -482,7 +484,7 @@ datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=WJfsP8w3HceUkM6GK
482
484
  datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=eA9xh-G1Ydr1OwUUtrbXUWp26hE1jF0zvyKNky_i_nQ,8887
483
485
  datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=ySFm7WDk8FW9KjCnX4HQfTqObIrlUS-V8WIHl3j0CTI,24848
484
486
  datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=w-SftpjHSOf_6S2T2srHJyJta3MHS0usmn4Z9jgx4QE,13858
485
- datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=BU9vSXgnaPyKqCHfrHUZxRZOyOHUYMlf0CBUm0OZXn4,34730
487
+ datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=eqOVRYmFiWQPAIDWUcOo5QZkG05xanlEbxz4aDTvSNM,34706
486
488
  datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
487
489
  datahub/ingestion/source/sql/athena.py,sha256=U-btrqnbOgrBptl6b--QJrio_AN-c6pF0yL-wbUt4P0,23986
488
490
  datahub/ingestion/source/sql/clickhouse.py,sha256=_uf-6fQYxI1-kMw0dNUvJf1f75XX0Qps4HMVlgzdpmo,25670
@@ -509,10 +511,10 @@ datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py,sha256=KOpbmDIE2h1hyYEsbVH
509
511
  datahub/ingestion/source/sql/teradata.py,sha256=9WdrxDy02lRJi9IZgsAATFsmxcQnIw5Gr6yCqHJQy5k,33507
510
512
  datahub/ingestion/source/sql/trino.py,sha256=zIfQ6GvW8Sbw4sxqsTcnibT51STka_nzNYvmld6HfHw,18947
511
513
  datahub/ingestion/source/sql/two_tier_sql_source.py,sha256=AB3Gtx4omAy_08zadHQpmUGmIGufkZ6o_ihWNnfvzYc,5783
512
- datahub/ingestion/source/sql/vertica.py,sha256=_9OgSgIgqBml0av063rb8nACiT3SAmzpw0ouyF91wv8,33382
514
+ datahub/ingestion/source/sql/vertica.py,sha256=kGfL6exvfKOY5PeC97f7ukdY0Id_pe8Wn9gL0bQN8dE,33358
513
515
  datahub/ingestion/source/sql/mssql/__init__.py,sha256=1agpl8S_uDW40olkhCX_W19dbr5GO9qgjS3R7pLRZSk,87
514
516
  datahub/ingestion/source/sql/mssql/job_models.py,sha256=nAo3rciu-w2-dXCz6_ekDEbGMEjCMEfh8WvSfXoF2l0,9359
515
- datahub/ingestion/source/sql/mssql/source.py,sha256=2kCZZyiAOwAVKpr8NkiA-sM6ivMb8UAqQV004h6LVsw,42889
517
+ datahub/ingestion/source/sql/mssql/source.py,sha256=krV80XcNFu-N73xoUA-W2zLiop3pJP7zkb3Dvj4PDKU,42783
516
518
  datahub/ingestion/source/sql/stored_procedures/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
517
519
  datahub/ingestion/source/sql/stored_procedures/base.py,sha256=n0l5OaTuW-m3TRvkxs3TqvgMeWF6BagzW3tjyWUcC1A,8631
518
520
  datahub/ingestion/source/sql/stored_procedures/lineage.py,sha256=fryLhuAlsjr9SHIjHJ-PmtCMx89bjzWVnJZ3f1bwQVU,1905
@@ -524,7 +526,7 @@ datahub/ingestion/source/state/profiling_state_handler.py,sha256=jDMiIrAq8k4GrYo
524
526
  datahub/ingestion/source/state/redundant_run_skip_handler.py,sha256=h28twxcsMNvI74bUjAKleRYid8kfIyWS7Y11aBldDlY,9435
525
527
  datahub/ingestion/source/state/sql_common_state.py,sha256=OtJpJfMTBSgyR37dn3w-nnZwlc0nFNb2GoUzIWhnyAc,143
526
528
  datahub/ingestion/source/state/stale_entity_removal_handler.py,sha256=Lr2HYGx_b2FQ8A36s7s11tl-4-mGIM13bfy5JbQ3LtM,14890
527
- datahub/ingestion/source/state/stateful_ingestion_base.py,sha256=N0Qzp2t2qAf27WndhsvGbCYCd29dSrLY3TSfbO0hoKA,17369
529
+ datahub/ingestion/source/state/stateful_ingestion_base.py,sha256=M9J2-JMjNR1KYSDPOTIa8zSCu6fFEufmGBVh4XrcNy0,17345
528
530
  datahub/ingestion/source/state/usage_common_state.py,sha256=TJyb0CpwibsduJYI854EFdtrwWnz7JC-IkzKUXVGDx0,983
529
531
  datahub/ingestion/source/state/use_case_handler.py,sha256=3g8ddTvGXHe0dCiyTkyFeNmR8a3bhwywtIt8EpK5oQs,1271
530
532
  datahub/ingestion/source/state_provider/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -547,7 +549,7 @@ datahub/ingestion/source/unity/proxy.py,sha256=E8ZhWIY3j7gjEz8ttWOeHlom0jCMJXkWH
547
549
  datahub/ingestion/source/unity/proxy_profiling.py,sha256=WLqvYP6MziaisA4LYL4T_GA-kPt6Xdde7bfaYsjYw40,9663
548
550
  datahub/ingestion/source/unity/proxy_types.py,sha256=qrvHiwPzl5cPX-KRvcIGGeJVdr0I8XUQmoAI6ErZ-v8,9371
549
551
  datahub/ingestion/source/unity/report.py,sha256=XFT9oQfvEB4RkTvWGgFOoQuLPUN_AIoPXZ79xeDhGHQ,2831
550
- datahub/ingestion/source/unity/source.py,sha256=uJBjgZ7qhJpn25t0ZOcLuZ0vn2Uz4n9AiideRGCFADw,49027
552
+ datahub/ingestion/source/unity/source.py,sha256=79a7qzFdO66sx6q0E_9Sd3smyQXS8OiRzJFG8Cbcrlk,49003
551
553
  datahub/ingestion/source/unity/tag_entities.py,sha256=iWl6nRAWSye1hoFDx_Xh4aT53PN0sGzlX7n1-oTVUv8,11568
552
554
  datahub/ingestion/source/unity/usage.py,sha256=0wETBAaZvHI_EGgBlxX3bKsVHEAdnUV8_bKI_lbyWjY,11500
553
555
  datahub/ingestion/source/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -967,7 +969,7 @@ datahub/sql_parsing/fingerprint_utils.py,sha256=3hGiexaQXnE7eZLxo-t7hlTyVQz7womb
967
969
  datahub/sql_parsing/query_types.py,sha256=FKjDzszZzsrCfYfm7dgD6T_8865qxWl767fdGyHWBh4,2720
968
970
  datahub/sql_parsing/schema_resolver.py,sha256=ISuingLcQnOJZkNXBkc73uPwYUbbOtERAjgGhJajDiQ,10782
969
971
  datahub/sql_parsing/split_statements.py,sha256=OIQXA9e4k3G9Z1y7rbgdtZhMWt4FPnq41cE8Jkm9cBY,9542
970
- datahub/sql_parsing/sql_parsing_aggregator.py,sha256=tqFZsE-7owUiU0q49nmkTt50CU4vn8ffUbNcTv9nRbc,71431
972
+ datahub/sql_parsing/sql_parsing_aggregator.py,sha256=Y70tMjCZTHf86g7REwi0A3jSvwQzv5qsfEfaDyiqnhw,71595
971
973
  datahub/sql_parsing/sql_parsing_common.py,sha256=cZ4WvVyHZuXDGjnBvKMX2_fz2EMextB5WQWcK0_saBo,3155
972
974
  datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
973
975
  datahub/sql_parsing/sqlglot_lineage.py,sha256=jchSPPYkFtHpyTRTWR5K0YQM6LIgWR5MtyVNQ6zA2Ig,59915
@@ -1078,8 +1080,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1078
1080
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1079
1081
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1080
1082
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1081
- acryl_datahub-1.1.0.5rc1.dist-info/METADATA,sha256=sKbnEOEUAOv5hgUfoytH-EIKqUc7QxK96ENS7SZUIPs,182347
1082
- acryl_datahub-1.1.0.5rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1083
- acryl_datahub-1.1.0.5rc1.dist-info/entry_points.txt,sha256=bnGf6eX9UhiW8yVHtt6MJCVcmLErvrVQxTJAayA-PKc,9885
1084
- acryl_datahub-1.1.0.5rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1085
- acryl_datahub-1.1.0.5rc1.dist-info/RECORD,,
1083
+ acryl_datahub-1.1.0.5rc3.dist-info/METADATA,sha256=8c04d-AKh-9Af0x9FeL24ah-XHUXxMCyGaklXQqhHLA,182347
1084
+ acryl_datahub-1.1.0.5rc3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1085
+ acryl_datahub-1.1.0.5rc3.dist-info/entry_points.txt,sha256=bnGf6eX9UhiW8yVHtt6MJCVcmLErvrVQxTJAayA-PKc,9885
1086
+ acryl_datahub-1.1.0.5rc3.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1087
+ acryl_datahub-1.1.0.5rc3.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.1.0.5rc1"
3
+ __version__ = "1.1.0.5rc3"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -383,7 +383,7 @@ class Dataset(StrictModel):
383
383
  urn: Optional[str] = None
384
384
  description: Optional[str] = None
385
385
  name: Optional[str] = None
386
- schema_metadata: Optional[SchemaSpecification] = Field(alias="schema")
386
+ schema_metadata: Optional[SchemaSpecification] = Field(default=None, alias="schema")
387
387
  downstreams: Optional[List[str]] = None
388
388
  properties: Optional[Dict[str, str]] = None
389
389
  subtype: Optional[str] = None
@@ -104,6 +104,7 @@ def capability(
104
104
  for base in cls.__bases__
105
105
  ):
106
106
  cls.__capabilities = {}
107
+
107
108
  cls.get_capabilities = lambda: cls.__capabilities.values()
108
109
 
109
110
  # If the superclasses have capability annotations, copy those over.
File without changes
@@ -80,7 +80,7 @@ class KeyspaceKey(ContainerKey):
80
80
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
81
81
  @capability(
82
82
  SourceCapability.DELETION_DETECTION,
83
- "Optionally enabled via `stateful_ingestion.remove_stale_metadata`",
83
+ "Enabled by default via stateful ingestion",
84
84
  supported=True,
85
85
  )
86
86
  class CassandraSource(StatefulIngestionSourceBase):
@@ -167,7 +167,7 @@ class AzureADSourceReport(StaleEntityRemovalSourceReport):
167
167
  @config_class(AzureADConfig)
168
168
  @support_status(SupportStatus.CERTIFIED)
169
169
  @capability(
170
- SourceCapability.DELETION_DETECTION, "Optionally enabled via stateful_ingestion"
170
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
171
171
  )
172
172
  class AzureADSource(StatefulIngestionSourceBase):
173
173
  """
@@ -202,7 +202,7 @@ class OktaSourceReport(StaleEntityRemovalSourceReport):
202
202
  @support_status(SupportStatus.CERTIFIED)
203
203
  @capability(SourceCapability.DESCRIPTIONS, "Optionally enabled via configuration")
204
204
  @capability(
205
- SourceCapability.DELETION_DETECTION, "Optionally enabled via stateful_ingestion"
205
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
206
206
  )
207
207
  class OktaSource(StatefulIngestionSourceBase):
208
208
  """
@@ -15,6 +15,9 @@ from datahub.ingestion.api.decorators import (
15
15
  )
16
16
  from datahub.ingestion.api.source import Source, SourceReport
17
17
  from datahub.ingestion.api.workunit import MetadataWorkUnit
18
+ from datahub.ingestion.source.mock_data.datahub_mock_data_report import (
19
+ DataHubMockDataReport,
20
+ )
18
21
  from datahub.ingestion.source.mock_data.table_naming_helper import TableNamingHelper
19
22
  from datahub.metadata.schema_classes import (
20
23
  DatasetLineageTypeClass,
@@ -117,13 +120,15 @@ class DataHubMockDataSource(Source):
117
120
  def __init__(self, ctx: PipelineContext, config: DataHubMockDataConfig):
118
121
  self.ctx = ctx
119
122
  self.config = config
120
- self.report = SourceReport()
123
+ self.report = DataHubMockDataReport()
121
124
 
122
125
  def get_workunits(self) -> Iterable[MetadataWorkUnit]:
123
126
  # We don't want any implicit aspects to be produced
124
127
  # so we are not using get_workunits_internal
125
128
  if self.config.gen_1.emit_lineage:
126
129
  for wu in self._data_gen_1():
130
+ if self.report.first_urn_seen is None:
131
+ self.report.first_urn_seen = wu.get_urn()
127
132
  self.report.report_workunit(wu)
128
133
  yield wu
129
134
 
@@ -0,0 +1,12 @@
1
+ from dataclasses import dataclass, field
2
+ from typing import Optional
3
+
4
+ from datahub.ingestion.api.source import SourceReport
5
+
6
+
7
+ @dataclass
8
+ class DataHubMockDataReport(SourceReport):
9
+ first_urn_seen: Optional[str] = field(
10
+ default=None,
11
+ metadata={"description": "The first URN encountered during ingestion"},
12
+ )
@@ -71,7 +71,7 @@ class PresetConfig(SupersetConfig):
71
71
  @config_class(PresetConfig)
72
72
  @support_status(SupportStatus.CERTIFIED)
73
73
  @capability(
74
- SourceCapability.DELETION_DETECTION, "Optionally enabled via stateful_ingestion"
74
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
75
75
  )
76
76
  class PresetSource(SupersetSource):
77
77
  """
@@ -49,6 +49,7 @@ DEFAULT_TEMP_TABLES_PATTERNS = [
49
49
  rf".*\.SEGMENT_{UUID_REGEX}", # segment
50
50
  rf".*\.STAGING_.*_{UUID_REGEX}", # stitch
51
51
  r".*\.(GE_TMP_|GE_TEMP_|GX_TEMP_)[0-9A-F]{8}", # great expectations
52
+ r".*\.SNOWPARK_TEMP_TABLE_.+", # snowpark
52
53
  ]
53
54
 
54
55
 
@@ -63,7 +63,10 @@ from datahub.sql_parsing.sqlglot_lineage import (
63
63
  DownstreamColumnRef,
64
64
  )
65
65
  from datahub.sql_parsing.sqlglot_utils import get_query_fingerprint
66
- from datahub.utilities.file_backed_collections import ConnectionWrapper, FileBackedList
66
+ from datahub.utilities.file_backed_collections import (
67
+ ConnectionWrapper,
68
+ FileBackedList,
69
+ )
67
70
  from datahub.utilities.perf_timer import PerfTimer
68
71
 
69
72
  logger = logging.getLogger(__name__)
@@ -124,6 +127,8 @@ class SnowflakeQueriesExtractorReport(Report):
124
127
  users_fetch_timer: PerfTimer = dataclasses.field(default_factory=PerfTimer)
125
128
 
126
129
  audit_log_load_timer: PerfTimer = dataclasses.field(default_factory=PerfTimer)
130
+ aggregator_generate_timer: PerfTimer = dataclasses.field(default_factory=PerfTimer)
131
+
127
132
  sql_aggregator: Optional[SqlAggregatorReport] = None
128
133
 
129
134
  num_ddl_queries_dropped: int = 0
@@ -243,6 +248,12 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
243
248
  audit_log_file = self.local_temp_path / "audit_log.sqlite"
244
249
  use_cached_audit_log = audit_log_file.exists()
245
250
 
251
+ if self.config.local_temp_path is None:
252
+ self._exit_stack.callback(lambda: audit_log_file.unlink(missing_ok=True))
253
+
254
+ shared_connection = self._exit_stack.enter_context(
255
+ ConnectionWrapper(audit_log_file)
256
+ )
246
257
  queries: FileBackedList[
247
258
  Union[
248
259
  KnownLineageMapping,
@@ -251,27 +262,16 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
251
262
  TableSwap,
252
263
  ObservedQuery,
253
264
  ]
254
- ]
265
+ ] = self._exit_stack.enter_context(FileBackedList(shared_connection))
266
+
255
267
  if use_cached_audit_log:
256
- logger.info("Using cached audit log")
257
- shared_connection = ConnectionWrapper(audit_log_file)
258
- queries = FileBackedList(shared_connection)
268
+ logger.info(f"Using cached audit log at {audit_log_file}")
259
269
  else:
260
- audit_log_file.unlink(missing_ok=True)
261
-
262
- shared_connection = ConnectionWrapper(audit_log_file)
263
- queries = FileBackedList(shared_connection)
264
- entry: Union[
265
- KnownLineageMapping,
266
- PreparsedQuery,
267
- TableRename,
268
- TableSwap,
269
- ObservedQuery,
270
- ]
270
+ logger.info(f"Fetching audit log into {audit_log_file}")
271
271
 
272
272
  with self.report.copy_history_fetch_timer:
273
- for entry in self.fetch_copy_history():
274
- queries.append(entry)
273
+ for copy_entry in self.fetch_copy_history():
274
+ queries.append(copy_entry)
275
275
 
276
276
  with self.report.query_log_fetch_timer:
277
277
  for entry in self.fetch_query_log(users):
@@ -281,13 +281,11 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
281
281
  for i, query in enumerate(queries):
282
282
  if i % 1000 == 0:
283
283
  logger.info(f"Added {i} query log entries to SQL aggregator")
284
+
284
285
  self.aggregator.add(query)
285
286
 
286
- yield from auto_workunit(self.aggregator.gen_metadata())
287
- if not use_cached_audit_log:
288
- queries.close()
289
- shared_connection.close()
290
- audit_log_file.unlink(missing_ok=True)
287
+ with self.report.aggregator_generate_timer:
288
+ yield from auto_workunit(self.aggregator.gen_metadata())
291
289
 
292
290
  def fetch_users(self) -> UsersMapping:
293
291
  users: UsersMapping = dict()
@@ -403,8 +401,9 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
403
401
 
404
402
  # TODO need to map snowflake query types to ours
405
403
  query_text: str = res["query_text"]
404
+ snowflake_query_type: str = res["query_type"]
406
405
  query_type: QueryType = SNOWFLAKE_QUERY_TYPE_MAPPING.get(
407
- res["query_type"], QueryType.UNKNOWN
406
+ snowflake_query_type, QueryType.UNKNOWN
408
407
  )
409
408
 
410
409
  direct_objects_accessed = res["direct_objects_accessed"]
@@ -421,7 +420,7 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
421
420
  res["session_id"],
422
421
  timestamp,
423
422
  object_modified_by_ddl,
424
- res["query_type"],
423
+ snowflake_query_type,
425
424
  )
426
425
  if known_ddl_entry:
427
426
  return known_ddl_entry
@@ -436,6 +435,16 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
436
435
  res["user_name"], users.get(res["user_name"])
437
436
  )
438
437
  )
438
+ extra_info = {
439
+ "snowflake_query_id": res["query_id"],
440
+ "snowflake_root_query_id": res["root_query_id"],
441
+ "snowflake_query_type": res["query_type"],
442
+ "snowflake_role_name": res["role_name"],
443
+ "query_duration": res["query_duration"],
444
+ "rows_inserted": res["rows_inserted"],
445
+ "rows_updated": res["rows_updated"],
446
+ "rows_deleted": res["rows_deleted"],
447
+ }
439
448
 
440
449
  # There are a couple cases when we'd want to prefer our own SQL parsing
441
450
  # over Snowflake's metadata.
@@ -470,6 +479,7 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
470
479
  query_hash=get_query_fingerprint(
471
480
  query_text, self.identifiers.platform, fast=True
472
481
  ),
482
+ extra_info=extra_info,
473
483
  )
474
484
 
475
485
  upstreams = []
@@ -556,6 +566,7 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
556
566
  timestamp=timestamp,
557
567
  session_id=res["session_id"],
558
568
  query_type=query_type,
569
+ extra_info=extra_info,
559
570
  )
560
571
  return entry
561
572
 
@@ -667,7 +678,7 @@ def _build_enriched_query_log_query(
667
678
  start_time_millis = int(start_time.timestamp() * 1000)
668
679
  end_time_millis = int(end_time.timestamp() * 1000)
669
680
 
670
- users_filter = ""
681
+ users_filter = "TRUE"
671
682
  if deny_usernames:
672
683
  user_not_in = ",".join(f"'{user.upper()}'" for user in deny_usernames)
673
684
  users_filter = f"user_name NOT IN ({user_not_in})"
@@ -694,10 +705,10 @@ fingerprinted_queries as (
694
705
  FROM
695
706
  snowflake.account_usage.query_history
696
707
  WHERE
697
- query_history.start_time >= to_timestamp_ltz({start_time_millis}, 3)
698
- AND query_history.start_time < to_timestamp_ltz({end_time_millis}, 3)
708
+ query_history.start_time >= to_timestamp_ltz({start_time_millis}, 3) -- {start_time.isoformat()}
709
+ AND query_history.start_time < to_timestamp_ltz({end_time_millis}, 3) -- {end_time.isoformat()}
699
710
  AND execution_status = 'SUCCESS'
700
- AND {users_filter or "TRUE"}
711
+ AND {users_filter}
701
712
  )
702
713
  , deduplicated_queries as (
703
714
  SELECT
@@ -715,6 +726,7 @@ fingerprinted_queries as (
715
726
  , raw_access_history AS (
716
727
  SELECT
717
728
  query_id,
729
+ root_query_id,
718
730
  query_start_time,
719
731
  user_name,
720
732
  direct_objects_accessed,
@@ -723,9 +735,9 @@ fingerprinted_queries as (
723
735
  FROM
724
736
  snowflake.account_usage.access_history
725
737
  WHERE
726
- query_start_time >= to_timestamp_ltz({start_time_millis}, 3)
727
- AND query_start_time < to_timestamp_ltz({end_time_millis}, 3)
728
- AND {users_filter or "TRUE"}
738
+ query_start_time >= to_timestamp_ltz({start_time_millis}, 3) -- {start_time.isoformat()}
739
+ AND query_start_time < to_timestamp_ltz({end_time_millis}, 3) -- {end_time.isoformat()}
740
+ AND {users_filter}
729
741
  AND query_id IN (
730
742
  SELECT query_id FROM deduplicated_queries
731
743
  )
@@ -734,6 +746,7 @@ fingerprinted_queries as (
734
746
  -- TODO: Add table filter clause.
735
747
  SELECT
736
748
  query_id,
749
+ root_query_id,
737
750
  query_start_time,
738
751
  ARRAY_SLICE(
739
752
  FILTER(direct_objects_accessed, o -> o:objectDomain IN {SnowflakeQuery.ACCESS_HISTORY_TABLE_VIEW_DOMAINS_FILTER}),
@@ -764,6 +777,7 @@ fingerprinted_queries as (
764
777
  q.rows_deleted AS "ROWS_DELETED",
765
778
  q.user_name AS "USER_NAME",
766
779
  q.role_name AS "ROLE_NAME",
780
+ a.root_query_id,
767
781
  a.direct_objects_accessed,
768
782
  a.objects_modified,
769
783
  a.object_modified_by_ddl
@@ -118,7 +118,7 @@ logger: logging.Logger = logging.getLogger(__name__)
118
118
  )
119
119
  @capability(
120
120
  SourceCapability.DELETION_DETECTION,
121
- "Optionally enabled via `stateful_ingestion.remove_stale_metadata`",
121
+ "Enabled by default via stateful ingestion",
122
122
  supported=True,
123
123
  )
124
124
  @capability(
@@ -936,25 +936,25 @@ class SQLServerSource(SQLAlchemySource):
936
936
  url = self.config.get_sql_alchemy_url()
937
937
  logger.debug(f"sql_alchemy_url={url}")
938
938
  engine = create_engine(url, **self.config.options)
939
- with engine.connect() as conn:
940
- if self.config.database and self.config.database != "":
941
- inspector = inspect(conn)
942
- yield inspector
943
- else:
939
+
940
+ if self.config.database and self.config.database != "":
941
+ inspector = inspect(engine)
942
+ yield inspector
943
+ else:
944
+ with engine.begin() as conn:
944
945
  databases = conn.execute(
945
946
  "SELECT name FROM master.sys.databases WHERE name NOT IN \
946
947
  ('master', 'model', 'msdb', 'tempdb', 'Resource', \
947
948
  'distribution' , 'reportserver', 'reportservertempdb'); "
948
- )
949
- for db in databases:
950
- if self.config.database_pattern.allowed(db["name"]):
951
- url = self.config.get_sql_alchemy_url(current_db=db["name"])
952
- with create_engine(
953
- url, **self.config.options
954
- ).connect() as conn:
955
- inspector = inspect(conn)
956
- self.current_database = db["name"]
957
- yield inspector
949
+ ).fetchall()
950
+
951
+ for db in databases:
952
+ if self.config.database_pattern.allowed(db["name"]):
953
+ url = self.config.get_sql_alchemy_url(current_db=db["name"])
954
+ engine = create_engine(url, **self.config.options)
955
+ inspector = inspect(engine)
956
+ self.current_database = db["name"]
957
+ yield inspector
958
958
 
959
959
  def get_identifier(
960
960
  self, *, schema: str, entity: str, inspector: Inspector, **kwargs: Any
@@ -116,7 +116,7 @@ class VerticaConfig(BasicSQLAlchemyConfig):
116
116
  )
117
117
  @capability(
118
118
  SourceCapability.DELETION_DETECTION,
119
- "Optionally enabled via `stateful_ingestion.remove_stale_metadata`",
119
+ "Enabled by default via stateful ingestion",
120
120
  supported=True,
121
121
  )
122
122
  class VerticaSource(SQLAlchemySource):
@@ -179,7 +179,7 @@ class StatefulIngestionReport(SourceReport):
179
179
 
180
180
  @capability(
181
181
  SourceCapability.DELETION_DETECTION,
182
- "Optionally enabled via `stateful_ingestion.remove_stale_metadata`",
182
+ "Enabled by default via stateful ingestion",
183
183
  supported=True,
184
184
  )
185
185
  class StatefulIngestionSourceBase(Source):
@@ -272,7 +272,7 @@ def get_filter_name(filter_obj):
272
272
  @config_class(SupersetConfig)
273
273
  @support_status(SupportStatus.CERTIFIED)
274
274
  @capability(
275
- SourceCapability.DELETION_DETECTION, "Optionally enabled via stateful_ingestion"
275
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
276
276
  )
277
277
  @capability(SourceCapability.DOMAINS, "Enabled by `domain` config to assign domain_key")
278
278
  @capability(SourceCapability.LINEAGE_COARSE, "Supported by default")
@@ -159,7 +159,7 @@ logger: logging.Logger = logging.getLogger(__name__)
159
159
  )
160
160
  @capability(
161
161
  SourceCapability.DELETION_DETECTION,
162
- "Optionally enabled via `stateful_ingestion.remove_stale_metadata`",
162
+ "Enabled by default via stateful ingestion",
163
163
  supported=True,
164
164
  )
165
165
  @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
@@ -58,6 +58,7 @@ from datahub.sql_parsing.tool_meta_extractor import (
58
58
  ToolMetaExtractorReport,
59
59
  )
60
60
  from datahub.utilities.cooperative_timeout import CooperativeTimeoutError
61
+ from datahub.utilities.dedup_list import deduplicate_list
61
62
  from datahub.utilities.file_backed_collections import (
62
63
  ConnectionWrapper,
63
64
  FileBackedDict,
@@ -140,6 +141,7 @@ class QueryMetadata:
140
141
 
141
142
  used_temp_tables: bool = True
142
143
 
144
+ extra_info: Optional[dict] = None
143
145
  origin: Optional[Urn] = None
144
146
 
145
147
  def make_created_audit_stamp(self) -> models.AuditStampClass:
@@ -263,7 +265,7 @@ class PreparsedQuery:
263
265
  query_type_props: QueryTypeProps = dataclasses.field(
264
266
  default_factory=lambda: QueryTypeProps()
265
267
  )
266
- # Use this to store addtitional key-value information about query for debugging
268
+ # Use this to store additional key-value information about the query for debugging.
267
269
  extra_info: Optional[dict] = None
268
270
  origin: Optional[Urn] = None
269
271
 
@@ -948,6 +950,7 @@ class SqlParsingAggregator(Closeable):
948
950
  column_usage=parsed.column_usage or {},
949
951
  confidence_score=parsed.confidence_score,
950
952
  used_temp_tables=session_has_temp_tables,
953
+ extra_info=parsed.extra_info,
951
954
  origin=parsed.origin,
952
955
  )
953
956
  )
@@ -1706,7 +1709,7 @@ class SqlParsingAggregator(Closeable):
1706
1709
  )
1707
1710
 
1708
1711
  merged_query_text = ";\n\n".join(
1709
- [q.formatted_query_string for q in ordered_queries]
1712
+ deduplicate_list([q.formatted_query_string for q in ordered_queries])
1710
1713
  )
1711
1714
 
1712
1715
  resolved_query = dataclasses.replace(