acryl-datahub 1.1.0.4rc2__py3-none-any.whl → 1.1.0.5rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (38) hide show
  1. {acryl_datahub-1.1.0.4rc2.dist-info → acryl_datahub-1.1.0.5rc1.dist-info}/METADATA +2526 -2526
  2. {acryl_datahub-1.1.0.4rc2.dist-info → acryl_datahub-1.1.0.5rc1.dist-info}/RECORD +38 -35
  3. {acryl_datahub-1.1.0.4rc2.dist-info → acryl_datahub-1.1.0.5rc1.dist-info}/entry_points.txt +1 -0
  4. datahub/_version.py +1 -1
  5. datahub/emitter/rest_emitter.py +18 -1
  6. datahub/ingestion/api/sink.py +3 -0
  7. datahub/ingestion/run/pipeline.py +1 -1
  8. datahub/ingestion/sink/datahub_rest.py +12 -0
  9. datahub/ingestion/source/bigquery_v2/bigquery.py +17 -0
  10. datahub/ingestion/source/dremio/dremio_api.py +98 -68
  11. datahub/ingestion/source/dremio/dremio_config.py +2 -0
  12. datahub/ingestion/source/dremio/dremio_reporting.py +23 -2
  13. datahub/ingestion/source/dremio/dremio_source.py +90 -77
  14. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  15. datahub/ingestion/source/ge_data_profiler.py +48 -8
  16. datahub/ingestion/source/mock_data/__init__.py +0 -0
  17. datahub/ingestion/source/mock_data/datahub_mock_data.py +384 -0
  18. datahub/ingestion/source/mock_data/table_naming_helper.py +91 -0
  19. datahub/ingestion/source/preset.py +1 -1
  20. datahub/ingestion/source/redshift/redshift.py +17 -0
  21. datahub/ingestion/source/usage/clickhouse_usage.py +1 -0
  22. datahub/ingestion/source/usage/starburst_trino_usage.py +3 -0
  23. datahub/metadata/_internal_schema_classes.py +3 -0
  24. datahub/metadata/schema.avsc +2 -0
  25. datahub/metadata/schemas/ContainerProperties.avsc +2 -0
  26. datahub/metadata/schemas/DataFlowInfo.avsc +2 -0
  27. datahub/metadata/schemas/DataJobInfo.avsc +2 -0
  28. datahub/metadata/schemas/DataProcessKey.avsc +2 -0
  29. datahub/metadata/schemas/DatasetKey.avsc +2 -0
  30. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +2 -0
  31. datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -0
  32. datahub/metadata/schemas/MLModelGroupKey.avsc +2 -0
  33. datahub/metadata/schemas/MLModelKey.avsc +2 -0
  34. datahub/metadata/schemas/MetadataChangeEvent.avsc +2 -0
  35. datahub/utilities/stats_collections.py +4 -0
  36. {acryl_datahub-1.1.0.4rc2.dist-info → acryl_datahub-1.1.0.5rc1.dist-info}/WHEEL +0 -0
  37. {acryl_datahub-1.1.0.4rc2.dist-info → acryl_datahub-1.1.0.5rc1.dist-info}/licenses/LICENSE +0 -0
  38. {acryl_datahub-1.1.0.4rc2.dist-info → acryl_datahub-1.1.0.5rc1.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
- acryl_datahub-1.1.0.4rc2.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.1.0.5rc1.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=M8rnTXUKlrf1evY-7XYua75EUQ0Yuv4TaMLJrEwD1g4,323
4
+ datahub/_version.py,sha256=TZdOK9cFifb1yYky21qrv8R4D8Q8htQJWiKxMGKJpS0,323
5
5
  datahub/entrypoints.py,sha256=H-YFTvxTJOgpWsFBVlxyb1opjkq-hjTzNmjy5Fq3RHg,8992
6
6
  datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -132,7 +132,7 @@ datahub/emitter/mcp_builder.py,sha256=8IwJAlolQkPpMqQJPLtGrsUqAcuFNs98nrI5iYUxga
132
132
  datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
133
133
  datahub/emitter/request_helper.py,sha256=2Sij9VJqgA7xZI6I7IuxsA8ioakbz0FJ3gvazxU_z3M,5738
134
134
  datahub/emitter/response_helper.py,sha256=qGm45n43CepW7j6kP9wTXuP-U-SZnn7hQdJTdVaoqhQ,7504
135
- datahub/emitter/rest_emitter.py,sha256=ctPrtQ1S9wsy_lqE9LopP5pvmLn83Mu5R1mfSf9umdY,37467
135
+ datahub/emitter/rest_emitter.py,sha256=WrL-ldOJf2LoKv_5behyffsB6vVXjkT8xTdWMtpExtE,38101
136
136
  datahub/emitter/serialization_helper.py,sha256=q12Avmf70Vy4ttQGMJoTKlE5EsybMKNg2w3MQeZiHvk,3652
137
137
  datahub/emitter/sql_parsing_builder.py,sha256=Cr5imZrm3dYDSCACt5MFscgHCtVbHTD6IjUmsvsKoEs,11991
138
138
  datahub/emitter/synchronized_file_emitter.py,sha256=s4ATuxalI4GDAkrZTaGSegxBdvvNPZ9jRSdtElU0kNs,1805
@@ -150,7 +150,7 @@ datahub/ingestion/api/pipeline_run_listener.py,sha256=5uBP__LbMQxJ2utlf07cIzQINq
150
150
  datahub/ingestion/api/registry.py,sha256=LbdZr89465Lj7ptQRVB4vI1JR1igWABvQFj9-WX63bI,7454
151
151
  datahub/ingestion/api/report.py,sha256=eM_TWWz6iJNd-c_S2_4eg2qKLGYP8vSROb_TMiCwBhY,4644
152
152
  datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
153
- datahub/ingestion/api/sink.py,sha256=nfal7nsYY1AT2WQRjqO48uAHitpjax7TsRVzYXnqbeM,4918
153
+ datahub/ingestion/api/sink.py,sha256=GZt48PV56FAhNoma-V5EwwRZvezhb40YH_zprm8_Yo0,4961
154
154
  datahub/ingestion/api/source.py,sha256=hYwh4LHcG5RS6xQ9QAh5Zlijjl6r1JaibKPb75Hne7A,19518
155
155
  datahub/ingestion/api/source_helpers.py,sha256=XT9y5HgfVeF52jrX39vlLn1SdXpLVyT2Su8oGNsddYo,21148
156
156
  datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
@@ -189,7 +189,7 @@ datahub/ingestion/reporting/file_reporter.py,sha256=tiWukmMxHrTQI3rOAumsq6lRlw8T
189
189
  datahub/ingestion/reporting/reporting_provider_registry.py,sha256=jTYSh3T4sensjnHQfPLiIcbA2dG8w0px9ghChAJjGdU,310
190
190
  datahub/ingestion/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
191
191
  datahub/ingestion/run/connection.py,sha256=mngNzr5aRLUDa5Izqxa0xkdDEqEqcDuacWSKIlkdvPc,1483
192
- datahub/ingestion/run/pipeline.py,sha256=h_WAAgORQDVeMO0FmYeLBsAzN9WsDU8yKGwBDzLC2FM,29910
192
+ datahub/ingestion/run/pipeline.py,sha256=TYE1Vm144uHFmqEsrJcbrD0fcg2M-ZvYEGGGbIp1Rmk,29943
193
193
  datahub/ingestion/run/pipeline_config.py,sha256=joG1j9OlwJhb8zqv4TY6_FSzOaKOx6xsBu255A5lP8g,4101
194
194
  datahub/ingestion/run/sink_callback.py,sha256=xZAzaKkR0dcQP838pMJWsA52oaQXV5BiqXkpxEpJ_9U,2856
195
195
  datahub/ingestion/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -197,7 +197,7 @@ datahub/ingestion/sink/blackhole.py,sha256=-jYcWo4i8q7312bCIoHrGr7nT9JdPvA7c4jvS
197
197
  datahub/ingestion/sink/console.py,sha256=TZfhA0Ec2eNCrMH7RRy2JOdUE-U-hkoIQrPm1CmKLQs,591
198
198
  datahub/ingestion/sink/datahub_kafka.py,sha256=_cjuXu5I6G0zJ2UK7hMbaKjMPZXeIwRMgm7CVeTiNtc,2578
199
199
  datahub/ingestion/sink/datahub_lite.py,sha256=7u2aWm7ENLshKHl-PkjJg6Mrw4bWs8sTfKIBz4mm8Ak,1879
200
- datahub/ingestion/sink/datahub_rest.py,sha256=xXCYqYMc9DuNQv22DBYrEpJFvGBrKVPmZuaZRbMOONA,13089
200
+ datahub/ingestion/sink/datahub_rest.py,sha256=DOhtTHqKpmqgI3rUY9ri2QZAyXYDFINWMG6ne7VYUXI,13463
201
201
  datahub/ingestion/sink/file.py,sha256=SxXJPJpkIGoaqRjCcSmj2ZE3xE4rLlBABBGwpTj5LWI,3271
202
202
  datahub/ingestion/sink/sink_registry.py,sha256=JRBWx8qEYg0ubSTyhqwgSWctgxwyp6fva9GoN2LwBao,490
203
203
  datahub/ingestion/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -207,7 +207,7 @@ datahub/ingestion/source/demo_data.py,sha256=PbtCHlZx3wrKlOPPgkWhDQuPm7ZfIx2neXJ
207
207
  datahub/ingestion/source/elastic_search.py,sha256=2dwIcSbYMaq_RoSnxLGz4Q_20oJ8AGgMKunVIBIgYM8,23406
208
208
  datahub/ingestion/source/feast.py,sha256=rAqT7huVgi4c7iRU9qSbohPbNRrxZVw4PIvnfxNsiUk,18798
209
209
  datahub/ingestion/source/file.py,sha256=sHCWbtrQcXMMYPs_LUqofx0mk6IFN0G7Lyk9b0yRZMI,16082
210
- datahub/ingestion/source/ge_data_profiler.py,sha256=Y_sdKK4Ot6MOpSKNfkkCJhiL7hqcjpU0hcDqXpfcNA0,66162
210
+ datahub/ingestion/source/ge_data_profiler.py,sha256=dvwTLK95xx1vuLPzigredqXiv0nyZVKas1dP7zcy3jU,67807
211
211
  datahub/ingestion/source/ge_profiling_config.py,sha256=sG_0BwPDRG3I4PnhfWGHf9AbePLDWG0kKcKEtlXHTuk,11544
212
212
  datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suabri1yV5geaAPjzg2eORDc,2559
213
213
  datahub/ingestion/source/ldap.py,sha256=PKoA5pVjuIxFfW1TcbYNIWSm7-C7shK2FDn7Zo5mrVM,18705
@@ -218,7 +218,7 @@ datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-
218
218
  datahub/ingestion/source/nifi.py,sha256=2jxSzuHIRQFF7DLoceGbUd_10lkhHRlqA9hhApSt3Yw,56882
219
219
  datahub/ingestion/source/openapi.py,sha256=VaR2xYaH1IhvRixpTBC7-168F74eIIyKiEKb5EqTO64,19253
220
220
  datahub/ingestion/source/openapi_parser.py,sha256=T87e2r-oPGgQl_FDMHnSGFZzApvWDCyKWnzIrVI5Alo,15420
221
- datahub/ingestion/source/preset.py,sha256=bbh0ZWiAZMy2zuJDmaRY07_OuGJ9tdtKjwvIxqbY5II,3964
221
+ datahub/ingestion/source/preset.py,sha256=1goxuFoLw50dokr2gp1MhUrDJ8CFNX-wBZIvv7laEXA,3966
222
222
  datahub/ingestion/source/pulsar.py,sha256=u5F8QnCLJsht5-7XCiUTsnfhCPIpKVB_l32CgMCU-As,20187
223
223
  datahub/ingestion/source/redash.py,sha256=YxjSad-X_wPmxYH8dJmFz_VCFhiLTCTSlK99WdvcYiA,30653
224
224
  datahub/ingestion/source/salesforce.py,sha256=CQtDFv1OsbC1vyzNbKOc6GxhFQ5GdYj45hgAF0-oIcw,40487
@@ -252,7 +252,7 @@ datahub/ingestion/source/azure/abs_folder_utils.py,sha256=7skXus-4fSIoKpqCeU-GG0
252
252
  datahub/ingestion/source/azure/abs_utils.py,sha256=KdAlCK-PMrn35kFHxz5vrsjajyx2PD5GRgoBKdoRvcg,2075
253
253
  datahub/ingestion/source/azure/azure_common.py,sha256=Zl0pPuE6L3QcM5B1P0LsPthZmD0h7fUUS0kg2okl6IY,4053
254
254
  datahub/ingestion/source/bigquery_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
255
- datahub/ingestion/source/bigquery_v2/bigquery.py,sha256=nv_lHwcX07vRrrGamVIpqcAtqJ1tKscq6XVC4vwsRAk,13943
255
+ datahub/ingestion/source/bigquery_v2/bigquery.py,sha256=dslbjihZMg9Utt8V8DYIucqQfychl_MB-gaDTmsMqe0,15005
256
256
  datahub/ingestion/source/bigquery_v2/bigquery_audit.py,sha256=kEwWhq3ch6WT4q4hcX8-fvQh28KgrNfspFwIytO3vQA,25103
257
257
  datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py,sha256=LuGJ6LgPViLIfDQfylxlQ3CA7fZYM5MDt8M-7sfzm84,5096
258
258
  datahub/ingestion/source/bigquery_v2/bigquery_config.py,sha256=bG1soRawCLhJw_15L2fJmFfj1kntTthV6ng4LZOnwko,21916
@@ -308,15 +308,15 @@ datahub/ingestion/source/delta_lake/delta_lake_utils.py,sha256=VqIDPEXepOnlk4oWM
308
308
  datahub/ingestion/source/delta_lake/report.py,sha256=uR4e4QA_jv8lL3CV-wE5t43H8pUqrGmx_ItLqN9flPI,587
309
309
  datahub/ingestion/source/delta_lake/source.py,sha256=1OxdbH_KcC6WFbf78XueKphnmCcIGizUepQ-LQK_hbk,13968
310
310
  datahub/ingestion/source/dremio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
311
- datahub/ingestion/source/dremio/dremio_api.py,sha256=h4rjnRacggFXzIQVVsKFNgTUixUZh2gPHH4_7rSGx2g,33413
311
+ datahub/ingestion/source/dremio/dremio_api.py,sha256=_xtiftEFWfu1uqbh_W8j99oqJc4wah_M_4ho6W_XdzM,35001
312
312
  datahub/ingestion/source/dremio/dremio_aspects.py,sha256=oWV2_mSpq3Bh42YJ1QVbAyp-Uihf2WIT6VsHGsGTgzk,18248
313
- datahub/ingestion/source/dremio/dremio_config.py,sha256=5SP66ewGYN0OnyWgpU33EZOmtICsclTtBX5DSYLwl3c,5782
313
+ datahub/ingestion/source/dremio/dremio_config.py,sha256=xugXSYoqXuMo9q5LTjSWCx2P376fGxIl7Nc2cI-K_OQ,5882
314
314
  datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py,sha256=MQk8BAHLufN69CntFfOV8K59A_AvLC-vwMS33Jw8bBg,3069
315
315
  datahub/ingestion/source/dremio/dremio_entities.py,sha256=1gZrNqTp3Pm6vqGDQaWt3HkxEuHKxpGYQ4geVoFvxWI,15147
316
316
  datahub/ingestion/source/dremio/dremio_profiling.py,sha256=TAcnpo8ZRKhLDHnQSJzJg3YdwTSyEa73LUAzENs7wG4,12287
317
- datahub/ingestion/source/dremio/dremio_reporting.py,sha256=BvdQA_T-VXl9EjOmj-D2NlM9pXZ4UdKXKGRpYsk1Eqw,1607
318
- datahub/ingestion/source/dremio/dremio_source.py,sha256=_52Z0ifntbhYNwlrMs6jZ59CI4aVpQzL0K16Sv7Xm8Y,24471
319
- datahub/ingestion/source/dremio/dremio_sql_queries.py,sha256=W0rcXawlwJOHNYr5o73rilMijtFOO3cVkn6pY-JLc6o,8186
317
+ datahub/ingestion/source/dremio/dremio_reporting.py,sha256=YRKM6PvoJYHLBXmOGwkgou_8x8_oA2xaqTWWoVuwFMY,2247
318
+ datahub/ingestion/source/dremio/dremio_source.py,sha256=baUW3f6Y7WWbHXo9GqmBzZqXilMo1MbG3hvDS-bwthI,25164
319
+ datahub/ingestion/source/dremio/dremio_sql_queries.py,sha256=wA1hqKk9cKMJDyEdZRQcDDLZPGYwuNqrvleUHTkWgrQ,10508
320
320
  datahub/ingestion/source/dynamodb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
321
321
  datahub/ingestion/source/dynamodb/data_reader.py,sha256=vC77KpcP8LJN0g8wsPRDVw4sebv0ZWIP3tJkEIHaomA,3120
322
322
  datahub/ingestion/source/dynamodb/dynamodb.py,sha256=vM3Ia5rZidqOcdPPigpuo6-7Ipoof8eF3RwxJ3SX2Ck,22771
@@ -386,6 +386,9 @@ datahub/ingestion/source/looker/view_upstream.py,sha256=4FCjZaU6p2G7npB2RJpP4Gv2
386
386
  datahub/ingestion/source/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
387
387
  datahub/ingestion/source/metadata/business_glossary.py,sha256=T_RJHst6iQRghJNmLLPeSBMEDsbEKf3yBldOAgMcGuo,19666
388
388
  datahub/ingestion/source/metadata/lineage.py,sha256=PA4JwSeQ-30XFMN4O5tPwIu-hZF1e-xMZ_CnEUE2c-Q,9595
389
+ datahub/ingestion/source/mock_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
390
+ datahub/ingestion/source/mock_data/datahub_mock_data.py,sha256=i19UFii3wOPG5CVQfV_20MwIRAJjr0TxsIiVjKcBND4,14314
391
+ datahub/ingestion/source/mock_data/table_naming_helper.py,sha256=oIC1vcOx76Vl63O9kcjP_iInBHyS-ATdN3Y932TCCZg,3283
389
392
  datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
390
393
  datahub/ingestion/source/neo4j/neo4j_source.py,sha256=JqKCwxBJfOrC8SF7CmDG0cseWxHk_7E2v4Diw3Q0-WM,14181
391
394
  datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -428,7 +431,7 @@ datahub/ingestion/source/redshift/lineage.py,sha256=IPF8vHy2MFyhK-hu2-lxV2-kcnNA
428
431
  datahub/ingestion/source/redshift/lineage_v2.py,sha256=dbTvuaJBV5yvCWM_oEAqZIA1JOlGxLJOexbEB47A_xE,17962
429
432
  datahub/ingestion/source/redshift/profile.py,sha256=H1Xtc2rXScUv4w0b2BbM7POjYEwqIql_rpWvlumY_EM,4309
430
433
  datahub/ingestion/source/redshift/query.py,sha256=vVIuNUaU4a7AfMFJZlgLuqi0cGVl0gVz8xZUSnPhWvs,47845
431
- datahub/ingestion/source/redshift/redshift.py,sha256=bM9pow8J6oX9jlTh029xsWGFDQ61lyXHdSz3Av9Et0M,43621
434
+ datahub/ingestion/source/redshift/redshift.py,sha256=p6rOOCjxNnPpTn-vFjgISMMjtUTzu6K-OrfWOIaIuJI,44683
432
435
  datahub/ingestion/source/redshift/redshift_data_reader.py,sha256=zc69jwXHdF-w8J4Hq-ZQ6BjHQ75Ij2iNDMpoRJlcmlU,1724
433
436
  datahub/ingestion/source/redshift/redshift_schema.py,sha256=7F-l_omOuKMuGE_rBWXVPG_GWXFKnCMzC4frNxZB9cs,24800
434
437
  datahub/ingestion/source/redshift/report.py,sha256=O3QFozHlmMbH9b7KxbqhgTgr_0tCryj6FIzMiN6kRxw,3044
@@ -548,8 +551,8 @@ datahub/ingestion/source/unity/source.py,sha256=uJBjgZ7qhJpn25t0ZOcLuZ0vn2Uz4n9A
548
551
  datahub/ingestion/source/unity/tag_entities.py,sha256=iWl6nRAWSye1hoFDx_Xh4aT53PN0sGzlX7n1-oTVUv8,11568
549
552
  datahub/ingestion/source/unity/usage.py,sha256=0wETBAaZvHI_EGgBlxX3bKsVHEAdnUV8_bKI_lbyWjY,11500
550
553
  datahub/ingestion/source/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
551
- datahub/ingestion/source/usage/clickhouse_usage.py,sha256=6HtLuDjJ7__dLJmV-RwNKmdDh3Pns_nItizoulsvJPM,10161
552
- datahub/ingestion/source/usage/starburst_trino_usage.py,sha256=K412PkoPU3elilOP7iYby2NYfJxakEvryj78nKDI_IA,10681
554
+ datahub/ingestion/source/usage/clickhouse_usage.py,sha256=M6YVQqwJoFqJPxlTr62lFwxfDeX2-_9Diw6qtcq2XWM,10244
555
+ datahub/ingestion/source/usage/starburst_trino_usage.py,sha256=EnxKQ6IMt0o3VLvqfFJAE-mYMnLponnKGZEsVeGet1c,10802
553
556
  datahub/ingestion/source/usage/usage_common.py,sha256=uuCgIduhlRL2zIAN8rymZ5cZn1WF6akZ-ZbbaVYo9_w,9813
554
557
  datahub/ingestion/source/vertexai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
555
558
  datahub/ingestion/source/vertexai/vertexai.py,sha256=RuHda0mbc1DElYZIZ_W_hvkN7Eg4LIvI1fRFMvpHPB0,56012
@@ -608,8 +611,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
608
611
  datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
609
612
  datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
610
613
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
611
- datahub/metadata/_internal_schema_classes.py,sha256=_TONNRIlWJ5xKruHRzmgjg73T5BUc8Z1xgw4yBaaZ5Q,1019193
612
- datahub/metadata/schema.avsc,sha256=ICovNj8D1XGTQUGA7x9gbjNTJ9li00a6MSz8_UvZN2s,707166
614
+ datahub/metadata/_internal_schema_classes.py,sha256=UACA9XSSdTueZTUW4v_4OWIsWga3T99I3gnGAPCe2w8,1019257
615
+ datahub/metadata/schema.avsc,sha256=GsQZCPSD3_KKXvGALSzgTgIONL0r3tXme9M4rXQy_q4,707244
613
616
  datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
614
617
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
615
618
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
@@ -711,7 +714,7 @@ datahub/metadata/schemas/ChartQuery.avsc,sha256=6qz8Q5wa5h0GS6QcwfzVZrRi26PoEwXy
711
714
  datahub/metadata/schemas/ChartUsageStatistics.avsc,sha256=FjEEPj_19jUvJcS9cyZtHqByQyPdt2xhmxfFdw9mSM4,5881
712
715
  datahub/metadata/schemas/Container.avsc,sha256=pEpRQgClrJRm59eAiltc4YoP0pg7TG6Bu-ENCh11-mw,813
713
716
  datahub/metadata/schemas/ContainerKey.avsc,sha256=5wUgUbR1zzNcSpeDOHm4XWFf_xDtXFuIikbjKXMFe80,939
714
- datahub/metadata/schemas/ContainerProperties.avsc,sha256=PfWz9Nd6ib3dVzdPwJp62iLyqynsYrWrrLdqk5oWN4g,5038
717
+ datahub/metadata/schemas/ContainerProperties.avsc,sha256=BPhrRf4I7UImQhB-55Qby82MXKuDQL0cwlvdg0aZEHs,5112
715
718
  datahub/metadata/schemas/CorpGroupEditableInfo.avsc,sha256=sItWMAGfQdYwmF5xLzMnYGl_rtXvGlxPbEjmRm1E6u4,1290
716
719
  datahub/metadata/schemas/CorpGroupInfo.avsc,sha256=G83lndR7C6WPfccFg7qFE5Ely5vrdxC8x7zJB3fW3Sw,4740
717
720
  datahub/metadata/schemas/CorpGroupKey.avsc,sha256=B_RMHAFF_nd86qtO2p1slAZnxruCmBM7DUfILAU_UaI,953
@@ -728,7 +731,7 @@ datahub/metadata/schemas/DashboardUsageStatistics.avsc,sha256=pUAKqs49Wy5pAL92g_
728
731
  datahub/metadata/schemas/DataContractKey.avsc,sha256=m0ej_Wu7NcuZQCRwQI3Sidfv9bUy5mvuhlpgax6i1xA,511
729
732
  datahub/metadata/schemas/DataContractProperties.avsc,sha256=RCxuJMlZwqEE0iHTpuXvcH6zRFoOt7ysQFPrJRp3RqE,4763
730
733
  datahub/metadata/schemas/DataContractStatus.avsc,sha256=5yvT43AIB13Dn_h0-4s7fsL7BTuXhkK5pi2KJug4_qg,1029
731
- datahub/metadata/schemas/DataFlowInfo.avsc,sha256=hcceLpSVLSx5N4YEPTDtXMOqLF0bWCQE5WNltQ-PJXo,4884
734
+ datahub/metadata/schemas/DataFlowInfo.avsc,sha256=SOXI26Vgv3O-t9z6_ff0gzVL5-M1XBlCOUCI9vB-jpk,4958
732
735
  datahub/metadata/schemas/DataFlowKey.avsc,sha256=lIXr1oVJIHxOEibTx1YWFhGY2VQyWs9AW65eePPmdXI,1345
733
736
  datahub/metadata/schemas/DataHubAccessTokenInfo.avsc,sha256=WS77M5w7GJFxUAiyXaxUvBqO0XFV2FnKPxXSXYbXHTE,1646
734
737
  datahub/metadata/schemas/DataHubAccessTokenKey.avsc,sha256=3EspNIxgb_I4WwV0a2o4NJOB5yODVr9J-wZzkZanEgo,483
@@ -755,7 +758,7 @@ datahub/metadata/schemas/DataHubUpgradeRequest.avsc,sha256=3xYsf3XVHBcclHdRnLVMo
755
758
  datahub/metadata/schemas/DataHubUpgradeResult.avsc,sha256=VydVb4yqjIviR73-T6TooF6OiahwxTHciP97NuF4qvI,1385
756
759
  datahub/metadata/schemas/DataHubViewInfo.avsc,sha256=U3fBIoG9ietLUpOknfQGNekqBdPQYwvhhv9RQv6gEeg,11642
757
760
  datahub/metadata/schemas/DataHubViewKey.avsc,sha256=p53axIdSVbubo3r23Vpsed7NqRcQBMGveVikEHAVAok,424
758
- datahub/metadata/schemas/DataJobInfo.avsc,sha256=gU8aGuDp1PDlu-fBTH04CDofx9qpegxGMwG46UaGLSs,7414
761
+ datahub/metadata/schemas/DataJobInfo.avsc,sha256=Bc9qdDcXI0GQdEgNTpgHaBbnrppDKQ-1xR26diOSVIQ,7488
759
762
  datahub/metadata/schemas/DataJobInputOutput.avsc,sha256=H1O8eAzZV34tvULdu67iBSWkdn08rt7wS208b8Nisbk,15268
760
763
  datahub/metadata/schemas/DataJobKey.avsc,sha256=S7egH8jWjKW52MG6Pg7plDoP15XfTTiMde5V6nR6ycE,1624
761
764
  datahub/metadata/schemas/DataPlatformInfo.avsc,sha256=WGPFumBNHbR75vsLrivnRCbBc8vSCuxDw2UlylMieh4,2686
@@ -770,7 +773,7 @@ datahub/metadata/schemas/DataProcessInstanceOutput.avsc,sha256=xyGBUf3vFHrMLtmZj
770
773
  datahub/metadata/schemas/DataProcessInstanceProperties.avsc,sha256=2qsDFeSA2-ag5IVetgD8mW2k--F6CwmYXM3KOE6edU8,3836
771
774
  datahub/metadata/schemas/DataProcessInstanceRelationships.avsc,sha256=VhBpnyGGvO06WEnM6zy4PmjiT0nivRQfkSdJCUgIavw,2358
772
775
  datahub/metadata/schemas/DataProcessInstanceRunEvent.avsc,sha256=zwTYULEnpMbqwkLN8NbXW9PQWFG4X6TZkZwTQ1Wb53Y,6713
773
- datahub/metadata/schemas/DataProcessKey.avsc,sha256=3N6xt_rxPZMzi7XZQz-4OLnqMQJ88Oxj5OAr4buDVPY,2448
776
+ datahub/metadata/schemas/DataProcessKey.avsc,sha256=ZZE2HN4mwZtm_TJNcdohFS97WXytFq9HAs_-shor6sY,2518
774
777
  datahub/metadata/schemas/DataProductKey.avsc,sha256=SyjmL2ieea1P6uipXst37mD5NdGPTqDvJAL3CVo91wk,661
775
778
  datahub/metadata/schemas/DataProductProperties.avsc,sha256=Lc3duV7YMJLvo_RwckLbW4bbmPrhSS1D-bxVVboNX2c,6930
776
779
  datahub/metadata/schemas/DataTransformLogic.avsc,sha256=nHTH6UzJ2Zz88N2aWa96hawLUR20HP7eSynfPtI1kzg,2111
@@ -779,7 +782,7 @@ datahub/metadata/schemas/DataTypeKey.avsc,sha256=Gs5uc_azwg10e36ZbwDTFQMevr0IfiF
779
782
  datahub/metadata/schemas/DatahubIngestionCheckpoint.avsc,sha256=m2Zyrx3ZWDc5gHuwbmBSRJ3JN4NFkpUhDEKM2Yeuqrw,5681
780
783
  datahub/metadata/schemas/DatahubIngestionRunSummary.avsc,sha256=_Ek7NqfJVTLqlM0NR9BRA57N9_ejwDdQvz7B1tVxSEE,9367
781
784
  datahub/metadata/schemas/DatasetDeprecation.avsc,sha256=ucXxaDcAUib9_y0k5qOINMn5VK2X3trHK2dcpNcsR2Q,1256
782
- datahub/metadata/schemas/DatasetKey.avsc,sha256=kaP4yGbUk2kHivbh5H-RnSFcmiZsEfHsKVYV5VNweos,3403
785
+ datahub/metadata/schemas/DatasetKey.avsc,sha256=xNh2Zbg1POuHD3qu42-__zNVhKWx6QkA2LY7Dlk0YSY,3473
783
786
  datahub/metadata/schemas/DatasetProfile.avsc,sha256=3ZCU9JD6l2razACp0AY6LLMgnkMTj6D_5Xk9np6WWRM,9965
784
787
  datahub/metadata/schemas/DatasetProperties.avsc,sha256=DFJn75feqaoQk84zin_o_lqsFFhqkwya5LGC5LLJXbU,4209
785
788
  datahub/metadata/schemas/DatasetUpstreamLineage.avsc,sha256=PjAWPbsqwH7FjX2kFDy0dE6ENYOwRynH9vJerWisr2A,5365
@@ -832,7 +835,7 @@ datahub/metadata/schemas/GlossaryTermKey.avsc,sha256=00paBmYoFIlIUebS0X6BL9y5xqn
832
835
  datahub/metadata/schemas/GlossaryTerms.avsc,sha256=ogOFO6Hr2Xb7s1JHqxsCPY8r_qY_9kwu69k5-E3j2BM,7123
833
836
  datahub/metadata/schemas/GroupMembership.avsc,sha256=wT3Hbpv2Z7V4X_-rIoed0cukAOMyYEL93udK8mMCjn0,557
834
837
  datahub/metadata/schemas/IcebergCatalogInfo.avsc,sha256=X9Ejqzn1DyxNIth7vDhtPjGG4xMPQMhl7f-S7fBFxek,691
835
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc,sha256=9v2iVfg1GTr9cw1o7tEDXNPqXWDD9I98NMNo0PdJ2ic,2635
838
+ datahub/metadata/schemas/IcebergWarehouseInfo.avsc,sha256=0m7cQm8cCnBWNI5jGGgr5ZdOg66RQGWSf3gf8ay53So,2705
836
839
  datahub/metadata/schemas/IncidentInfo.avsc,sha256=L8xldmWyOW4Ml2Fm9XTRL13lP1CAEP0kgXsd_jLZaEU,12425
837
840
  datahub/metadata/schemas/IncidentKey.avsc,sha256=Pip__DyNNTal7NxryM3kFi9qHlwntp1rIA8Al8Zz264,542
838
841
  datahub/metadata/schemas/IncidentSource.avsc,sha256=lY_SarA3cM55KNENcB5z1Gu2MygxEl9l7R8LdMak9AQ,1199
@@ -848,17 +851,17 @@ datahub/metadata/schemas/MLFeatureTableKey.avsc,sha256=hVxNEqsx4GgG11GVryn9ms16O
848
851
  datahub/metadata/schemas/MLFeatureTableProperties.avsc,sha256=BtrqcsxoQXObPZXSGRNYtIBJCoeHkMK_Zr_imBWF2Zk,2008
849
852
  datahub/metadata/schemas/MLHyperParam.avsc,sha256=dE6i5r6LTYMNrQe9yy-jKoP09GOJUf__1bO69ldpydc,833
850
853
  datahub/metadata/schemas/MLMetric.avsc,sha256=y8WPVVwjhu3YGtqpFFJYNYK8w778RRL_d2sHG1Dc7uM,804
851
- datahub/metadata/schemas/MLModelDeploymentKey.avsc,sha256=9fSHt_AvnRv983oxABSf-EZYz-Uyn5BW7DLpQ61v-8Y,2615
854
+ datahub/metadata/schemas/MLModelDeploymentKey.avsc,sha256=EcA0z4sQfqe3IJ8PO8cGW34XMxc9Q2BbCBjgkJTaznE,2685
852
855
  datahub/metadata/schemas/MLModelDeploymentProperties.avsc,sha256=7IlGrMmX8nfgezvaZyrXskCTCRlwvRzGOYUOpFV3r6Y,5480
853
856
  datahub/metadata/schemas/MLModelFactorPrompts.avsc,sha256=8kX-P4F4mVLFT980z3MwIautt1_6uA-c_Z87nYNDK-k,2712
854
- datahub/metadata/schemas/MLModelGroupKey.avsc,sha256=Hhk6L63fITV8Qu7h9a7khrfkUrRPcUMFvccrVreUT2Y,2709
857
+ datahub/metadata/schemas/MLModelGroupKey.avsc,sha256=zIyIHI-23i_oQMbc1sigar9sJNJsa6CYfHHy-nH5IXE,2779
855
858
  datahub/metadata/schemas/MLModelGroupProperties.avsc,sha256=AZ5Pohk3_pCctQ4hcE1UOURQFYHQne0dw_lRUpOu5WY,6924
856
- datahub/metadata/schemas/MLModelKey.avsc,sha256=zKFzorbGgKGO56ILU2UmwUjRQRoRJ9rhu0yhTdUIfD8,3078
859
+ datahub/metadata/schemas/MLModelKey.avsc,sha256=zwoY9opTL5tMm5aoRHoWcNv5DjERYS-hWR05kVFlcTw,3148
857
860
  datahub/metadata/schemas/MLModelProperties.avsc,sha256=hDCBHxGe-cmCBeU1k0ANuQlKjtZsDcTfl2X_jWmtFqo,12355
858
861
  datahub/metadata/schemas/MLPrimaryKeyKey.avsc,sha256=F3lgpMnHBhXsqGncHE9x06P-0RiNCrzbUUWlMkPJxFI,1132
859
862
  datahub/metadata/schemas/MLPrimaryKeyProperties.avsc,sha256=URIuOpS93RVk8MZVcbZ-dmTwu_cN3KSOKxSR8fm-eTo,6744
860
863
  datahub/metadata/schemas/MLTrainingRunProperties.avsc,sha256=WGgj0MuQrGD4UgvyHCJHzTnHja2LlJTOr1gLu8SySj0,4269
861
- datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=bApzACHQ70FESJuQ-24dfwk3H3R255f1uG9ZSY89RwM,376924
864
+ datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=kwEwLUSPuqdrx_7uOX2XnEZ6Olm4p2ezYt0bTQSgaTk,377034
862
865
  datahub/metadata/schemas/MetadataChangeLog.avsc,sha256=Cf5eECeShCA_XHFr2MRhRQpPE61F6Xv-z1jjoBLJLgc,12239
863
866
  datahub/metadata/schemas/MetadataChangeProposal.avsc,sha256=tvO5cGIqZAIvUbMon1RAKgSY4E0jvBqT5VmLWAuNGkY,9770
864
867
  datahub/metadata/schemas/Metrics.avsc,sha256=O7DJGjOwmHbb1x_Zj7AuM_HaHKjBvkfJKfUsX8icXD4,690
@@ -1025,7 +1028,7 @@ datahub/utilities/sql_formatter.py,sha256=tYXIsKjKmpKh0JXGxeAPrHkUWYd1SwJNLjUZsf
1025
1028
  datahub/utilities/sqlalchemy_query_combiner.py,sha256=oxW20uXz8hV1Zb4fLXvTQ7c3LjACBsrF58TR2_-RSps,14982
1026
1029
  datahub/utilities/sqlalchemy_type_converter.py,sha256=H4S4xnnyPozDBHFhBh4rjjoXa5novFzYIUBJy2KSrVc,9805
1027
1030
  datahub/utilities/sqllineage_patch.py,sha256=0Buh50bmEqJFg1HFRCknCnePo1cecI4JmGxVhM_jh2g,1976
1028
- datahub/utilities/stats_collections.py,sha256=CxaTcrF7J6am7iX5jPhFKne535UcyDk_oreVwR013fU,1625
1031
+ datahub/utilities/stats_collections.py,sha256=9QDEk40UxhmQwDS6I63Gp6fcIBqmXVinKl7x2xHCD34,1702
1029
1032
  datahub/utilities/str_enum.py,sha256=EsqCLPbrqyQ2YU_wt7QP-a6P5fnpIshXJ3AI8gLBlVA,474
1030
1033
  datahub/utilities/tee_io.py,sha256=jBrsUfTPTk9IICntfGOG0HR-Fjp8BQMde-FPQ4r3kuI,601
1031
1034
  datahub/utilities/threaded_iterator_executor.py,sha256=6BpCE0os3d-uMYxHBilPQC-JvEBkU6JQY4bGs06JKYI,2004
@@ -1075,8 +1078,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1075
1078
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1076
1079
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1077
1080
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1078
- acryl_datahub-1.1.0.4rc2.dist-info/METADATA,sha256=5mGKtVP2MPiCtTWr84Dn6JQYyPYttuv8wAlpbeIfx28,182347
1079
- acryl_datahub-1.1.0.4rc2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1080
- acryl_datahub-1.1.0.4rc2.dist-info/entry_points.txt,sha256=-N2PGtn1uwKR7-VM9spziE_RNyOdKm_XNpOWL1lnaj4,9790
1081
- acryl_datahub-1.1.0.4rc2.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1082
- acryl_datahub-1.1.0.4rc2.dist-info/RECORD,,
1081
+ acryl_datahub-1.1.0.5rc1.dist-info/METADATA,sha256=sKbnEOEUAOv5hgUfoytH-EIKqUc7QxK96ENS7SZUIPs,182347
1082
+ acryl_datahub-1.1.0.5rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1083
+ acryl_datahub-1.1.0.5rc1.dist-info/entry_points.txt,sha256=bnGf6eX9UhiW8yVHtt6MJCVcmLErvrVQxTJAayA-PKc,9885
1084
+ acryl_datahub-1.1.0.5rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1085
+ acryl_datahub-1.1.0.5rc1.dist-info/RECORD,,
@@ -39,6 +39,7 @@ datahub-business-glossary = datahub.ingestion.source.metadata.business_glossary:
39
39
  datahub-debug = datahub.ingestion.source.debug.datahub_debug:DataHubDebugSource
40
40
  datahub-gc = datahub.ingestion.source.gc.datahub_gc:DataHubGcSource
41
41
  datahub-lineage-file = datahub.ingestion.source.metadata.lineage:LineageFileSource
42
+ datahub-mock-data = datahub.ingestion.source.mock_data.datahub_mock_data:DataHubMockDataSource
42
43
  dbt = datahub.ingestion.source.dbt.dbt_core:DBTCoreSource
43
44
  dbt-cloud = datahub.ingestion.source.dbt.dbt_cloud:DBTCloudSource
44
45
  delta-lake = datahub.ingestion.source.delta_lake:DeltaLakeSource
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.1.0.4rc2"
3
+ __version__ = "1.1.0.5rc1"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -4,6 +4,7 @@ import functools
4
4
  import json
5
5
  import logging
6
6
  import os
7
+ import re
7
8
  import time
8
9
  from collections import defaultdict
9
10
  from dataclasses import dataclass
@@ -104,6 +105,22 @@ BATCH_INGEST_MAX_PAYLOAD_LENGTH = int(
104
105
  )
105
106
 
106
107
 
108
+ def preserve_unicode_escapes(obj: Any) -> Any:
109
+ """Recursively convert unicode characters back to escape sequences"""
110
+ if isinstance(obj, dict):
111
+ return {k: preserve_unicode_escapes(v) for k, v in obj.items()}
112
+ elif isinstance(obj, list):
113
+ return [preserve_unicode_escapes(item) for item in obj]
114
+ elif isinstance(obj, str):
115
+ # Convert non-ASCII characters back to \u escapes
116
+ def escape_unicode(match: Any) -> Any:
117
+ return f"\\u{ord(match.group(0)):04x}"
118
+
119
+ return re.sub(r"[^\x00-\x7F]", escape_unicode, obj)
120
+ else:
121
+ return obj
122
+
123
+
107
124
  class EmitMode(ConfigEnum):
108
125
  # Fully synchronous processing that updates both primary storage (SQL) and search storage (Elasticsearch) before returning.
109
126
  # Provides the strongest consistency guarantee but with the highest cost. Best for critical operations where immediate
@@ -611,7 +628,7 @@ class DataHubRestEmitter(Closeable, Emitter):
611
628
  else:
612
629
  url = f"{self._gms_server}/aspects?action=ingestProposal"
613
630
 
614
- mcp_obj = pre_json_transform(mcp.to_obj())
631
+ mcp_obj = preserve_unicode_escapes(pre_json_transform(mcp.to_obj()))
615
632
  payload_dict = {
616
633
  "proposal": mcp_obj,
617
634
  "async": "true"
@@ -147,6 +147,9 @@ class Sink(Generic[SinkConfig, SinkReportType], Closeable, metaclass=ABCMeta):
147
147
  def close(self) -> None:
148
148
  pass
149
149
 
150
+ def flush(self) -> None:
151
+ pass
152
+
150
153
  def configured(self) -> str:
151
154
  """Override this method to output a human-readable and scrubbed version of the configured sink"""
152
155
  return ""
@@ -502,7 +502,7 @@ class Pipeline:
502
502
  self._handle_uncaught_pipeline_exception(exc)
503
503
  finally:
504
504
  clear_global_warnings()
505
-
505
+ self.sink.flush()
506
506
  self._notify_reporters_on_ingestion_completion()
507
507
 
508
508
  def transform(self, records: Iterable[RecordEnvelope]) -> Iterable[RecordEnvelope]:
@@ -5,6 +5,7 @@ import functools
5
5
  import logging
6
6
  import os
7
7
  import threading
8
+ import time
8
9
  import uuid
9
10
  from enum import auto
10
11
  from typing import List, Optional, Tuple, Union
@@ -346,6 +347,17 @@ class DatahubRestSink(Sink[DatahubRestSinkConfig, DataHubRestSinkReport]):
346
347
  RecordEnvelope(item, metadata={}), NoopWriteCallback()
347
348
  )
348
349
 
350
+ def flush(self) -> None:
351
+ """Wait for all pending records to be written."""
352
+ i = 0
353
+ while self.report.pending_requests > 0:
354
+ time.sleep(0.1)
355
+ i += 1
356
+ if i % 1000 == 0:
357
+ logger.info(
358
+ f"Waiting for {self.report.pending_requests} records to be written"
359
+ )
360
+
349
361
  def close(self):
350
362
  with self.report.main_thread_blocking_timer:
351
363
  self.executor.shutdown()
@@ -4,6 +4,7 @@ import logging
4
4
  import os
5
5
  from typing import Iterable, List, Optional
6
6
 
7
+ from datahub.configuration.common import AllowDenyPattern
7
8
  from datahub.ingestion.api.common import PipelineContext
8
9
  from datahub.ingestion.api.decorators import (
9
10
  SupportStatus,
@@ -242,7 +243,23 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
242
243
  ).workunit_processor,
243
244
  ]
244
245
 
246
+ def _warn_deprecated_configs(self):
247
+ if (
248
+ self.config.match_fully_qualified_names is not None
249
+ and not self.config.match_fully_qualified_names
250
+ and self.config.schema_pattern is not None
251
+ and self.config.schema_pattern != AllowDenyPattern.allow_all()
252
+ ):
253
+ self.report.report_warning(
254
+ message="Please update `schema_pattern` to match against fully qualified schema name `<database_name>.<schema_name>` and set config `match_fully_qualified_names : True`."
255
+ "Current default `match_fully_qualified_names: False` is only to maintain backward compatibility. "
256
+ "The config option `match_fully_qualified_names` will be removed in future and the default behavior will be like `match_fully_qualified_names: True`.",
257
+ context="Config option deprecation warning",
258
+ title="Config option deprecation warning",
259
+ )
260
+
245
261
  def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
262
+ self._warn_deprecated_configs()
246
263
  projects = get_projects(
247
264
  self.bq_schema_extractor.schema_api,
248
265
  self.report,
@@ -21,6 +21,7 @@ from datahub.ingestion.source.dremio.dremio_datahub_source_mapping import (
21
21
  )
22
22
  from datahub.ingestion.source.dremio.dremio_reporting import DremioSourceReport
23
23
  from datahub.ingestion.source.dremio.dremio_sql_queries import DremioSQLQueries
24
+ from datahub.utilities.perf_timer import PerfTimer
24
25
 
25
26
  logger = logging.getLogger(__name__)
26
27
 
@@ -54,6 +55,8 @@ class DremioAPIOperations:
54
55
  self.deny_schema_pattern: List[str] = connection_args.schema_pattern.deny
55
56
  self._max_workers: int = connection_args.max_workers
56
57
  self.is_dremio_cloud = connection_args.is_dremio_cloud
58
+ self.start_time = connection_args.start_time
59
+ self.end_time = connection_args.end_time
57
60
  self.report = report
58
61
  self.session = requests.Session()
59
62
  if connection_args.is_dremio_cloud:
@@ -233,47 +236,71 @@ class DremioAPIOperations:
233
236
 
234
237
  def get(self, url: str) -> Dict:
235
238
  """execute a get request on dremio"""
236
- response = self.session.get(
237
- url=(self.base_url + url),
238
- verify=self._verify,
239
- timeout=self._timeout,
240
- )
241
- return response.json()
239
+ logger.debug(f"GET request to {self.base_url + url}")
240
+ self.report.api_calls_total += 1
241
+ self.report.api_calls_by_method_and_path["GET " + url] += 1
242
+
243
+ with PerfTimer() as timer:
244
+ response = self.session.get(
245
+ url=(self.base_url + url),
246
+ verify=self._verify,
247
+ timeout=self._timeout,
248
+ )
249
+ self.report.api_call_secs_by_method_and_path["GET " + url] += (
250
+ timer.elapsed_seconds()
251
+ )
252
+ # response.raise_for_status() # Enabling this line, makes integration tests to fail
253
+ return response.json()
242
254
 
243
255
  def post(self, url: str, data: str) -> Dict:
244
256
  """execute a get request on dremio"""
245
- response = self.session.post(
246
- url=(self.base_url + url),
247
- data=data,
248
- verify=self._verify,
249
- timeout=self._timeout,
250
- )
251
- return response.json()
257
+ logger.debug(f"POST request to {self.base_url + url}")
258
+ self.report.api_calls_total += 1
259
+ self.report.api_calls_by_method_and_path["POST " + url] += 1
260
+
261
+ with PerfTimer() as timer:
262
+ response = self.session.post(
263
+ url=(self.base_url + url),
264
+ data=data,
265
+ verify=self._verify,
266
+ timeout=self._timeout,
267
+ )
268
+ self.report.api_call_secs_by_method_and_path["POST " + url] += (
269
+ timer.elapsed_seconds()
270
+ )
271
+ # response.raise_for_status() # Enabling this line, makes integration tests to fail
272
+ return response.json()
252
273
 
253
274
  def execute_query(self, query: str, timeout: int = 3600) -> List[Dict[str, Any]]:
254
275
  """Execute SQL query with timeout and error handling"""
255
276
  try:
256
- response = self.post(url="/sql", data=json.dumps({"sql": query}))
277
+ with PerfTimer() as timer:
278
+ logger.info(f"Executing query: {query}")
279
+ response = self.post(url="/sql", data=json.dumps({"sql": query}))
257
280
 
258
- if "errorMessage" in response:
259
- self.report.failure(
260
- message="SQL Error", context=f"{response['errorMessage']}"
261
- )
262
- raise DremioAPIException(f"SQL Error: {response['errorMessage']}")
281
+ if "errorMessage" in response:
282
+ self.report.failure(
283
+ message="SQL Error", context=f"{response['errorMessage']}"
284
+ )
285
+ raise DremioAPIException(f"SQL Error: {response['errorMessage']}")
263
286
 
264
- job_id = response["id"]
287
+ job_id = response["id"]
265
288
 
266
- with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
267
- future = executor.submit(self.fetch_results, job_id)
268
- try:
269
- return future.result(timeout=timeout)
270
- except concurrent.futures.TimeoutError:
271
- self.cancel_query(job_id)
272
- raise DremioAPIException(
273
- f"Query execution timed out after {timeout} seconds"
274
- ) from None
275
- except RuntimeError as e:
276
- raise DremioAPIException() from e
289
+ with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
290
+ future = executor.submit(self.fetch_results, job_id)
291
+ try:
292
+ result = future.result(timeout=timeout)
293
+ logger.info(
294
+ f"Query executed in {timer.elapsed_seconds()} seconds with {len(result)} results"
295
+ )
296
+ return result
297
+ except concurrent.futures.TimeoutError:
298
+ self.cancel_query(job_id)
299
+ raise DremioAPIException(
300
+ f"Query execution timed out after {timeout} seconds"
301
+ ) from None
302
+ except RuntimeError as e:
303
+ raise DremioAPIException() from e
277
304
 
278
305
  except requests.RequestException as e:
279
306
  raise DremioAPIException("Error executing query") from e
@@ -603,10 +630,25 @@ class DremioAPIOperations:
603
630
  return parents_list
604
631
 
605
632
  def extract_all_queries(self) -> List[Dict[str, Any]]:
633
+ # Convert datetime objects to string format for SQL queries
634
+ start_timestamp_str = None
635
+ end_timestamp_str = None
636
+
637
+ if self.start_time:
638
+ start_timestamp_str = self.start_time.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
639
+ if self.end_time:
640
+ end_timestamp_str = self.end_time.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
641
+
606
642
  if self.edition == DremioEdition.CLOUD:
607
- jobs_query = DremioSQLQueries.QUERY_ALL_JOBS_CLOUD
643
+ jobs_query = DremioSQLQueries.get_query_all_jobs_cloud(
644
+ start_timestamp_millis=start_timestamp_str,
645
+ end_timestamp_millis=end_timestamp_str,
646
+ )
608
647
  else:
609
- jobs_query = DremioSQLQueries.QUERY_ALL_JOBS
648
+ jobs_query = DremioSQLQueries.get_query_all_jobs(
649
+ start_timestamp_millis=start_timestamp_str,
650
+ end_timestamp_millis=end_timestamp_str,
651
+ )
610
652
 
611
653
  return self.execute_query(query=jobs_query)
612
654
 
@@ -685,6 +727,27 @@ class DremioAPIOperations:
685
727
 
686
728
  return any(re.match(regex_pattern, path, re.IGNORECASE) for path in paths)
687
729
 
730
+ def _could_match_pattern(self, pattern: str, path_components: List[str]) -> bool:
731
+ """
732
+ Check if a container path could potentially match a schema pattern.
733
+ This handles hierarchical path matching for container filtering.
734
+ """
735
+ if pattern == ".*":
736
+ return True
737
+
738
+ current_path = ".".join(path_components)
739
+
740
+ # Handle simple .* patterns (like "a.b.c.*")
741
+ if pattern.endswith(".*") and not any(c in pattern for c in "^$[](){}+?\\"):
742
+ # Simple dotstar pattern - check prefix matching
743
+ pattern_prefix = pattern[:-2] # Remove ".*"
744
+ return current_path.lower().startswith(
745
+ pattern_prefix.lower()
746
+ ) or pattern_prefix.lower().startswith(current_path.lower())
747
+ else:
748
+ # Complex regex pattern - use existing regex matching logic
749
+ return self._check_pattern_match(pattern, [current_path], allow_prefix=True)
750
+
688
751
  def should_include_container(self, path: List[str], name: str) -> bool:
689
752
  """
690
753
  Helper method to check if a container should be included based on schema patterns.
@@ -711,41 +774,8 @@ class DremioAPIOperations:
711
774
 
712
775
  # Check allow patterns
713
776
  for pattern in self.allow_schema_pattern:
714
- # For patterns with wildcards, check if this path is a parent of the pattern
715
- if "*" in pattern:
716
- pattern_parts = pattern.split(".")
717
- path_parts = path_components
718
-
719
- # If pattern has exact same number of parts, check each component
720
- if len(pattern_parts) == len(path_parts):
721
- matches = True
722
- for p_part, c_part in zip(pattern_parts, path_parts):
723
- if p_part != "*" and p_part.lower() != c_part.lower():
724
- matches = False
725
- break
726
- if matches:
727
- self.report.report_container_scanned(full_path)
728
- return True
729
- # Otherwise check if current path is prefix match
730
- else:
731
- # Remove the trailing wildcard if present
732
- if pattern_parts[-1] == "*":
733
- pattern_parts = pattern_parts[:-1]
734
-
735
- for i in range(len(path_parts)):
736
- current_path = ".".join(path_parts[: i + 1])
737
- pattern_prefix = ".".join(pattern_parts[: i + 1])
738
-
739
- if pattern_prefix.startswith(current_path):
740
- self.report.report_container_scanned(full_path)
741
- return True
742
-
743
- # Direct pattern matching
744
- if self._check_pattern_match(
745
- pattern=pattern,
746
- paths=[full_path],
747
- allow_prefix=True,
748
- ):
777
+ # Check if current path could potentially match this pattern
778
+ if self._could_match_pattern(pattern, path_components):
749
779
  self.report.report_container_scanned(full_path)
750
780
  return True
751
781
 
@@ -9,6 +9,7 @@ from datahub.configuration.source_common import (
9
9
  EnvConfigMixin,
10
10
  PlatformInstanceConfigMixin,
11
11
  )
12
+ from datahub.configuration.time_window_config import BaseTimeWindowConfig
12
13
  from datahub.ingestion.source.ge_profiling_config import GEProfilingBaseConfig
13
14
  from datahub.ingestion.source.state.stale_entity_removal_handler import (
14
15
  StatefulStaleMetadataRemovalConfig,
@@ -118,6 +119,7 @@ class DremioSourceMapping(EnvConfigMixin, PlatformInstanceConfigMixin, ConfigMod
118
119
  class DremioSourceConfig(
119
120
  DremioConnectionConfig,
120
121
  StatefulIngestionConfigBase,
122
+ BaseTimeWindowConfig,
121
123
  EnvConfigMixin,
122
124
  PlatformInstanceConfigMixin,
123
125
  ):