acryl-datahub 1.1.0.4rc2__py3-none-any.whl → 1.1.0.5rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.0.4rc2.dist-info → acryl_datahub-1.1.0.5rc1.dist-info}/METADATA +2526 -2526
- {acryl_datahub-1.1.0.4rc2.dist-info → acryl_datahub-1.1.0.5rc1.dist-info}/RECORD +38 -35
- {acryl_datahub-1.1.0.4rc2.dist-info → acryl_datahub-1.1.0.5rc1.dist-info}/entry_points.txt +1 -0
- datahub/_version.py +1 -1
- datahub/emitter/rest_emitter.py +18 -1
- datahub/ingestion/api/sink.py +3 -0
- datahub/ingestion/run/pipeline.py +1 -1
- datahub/ingestion/sink/datahub_rest.py +12 -0
- datahub/ingestion/source/bigquery_v2/bigquery.py +17 -0
- datahub/ingestion/source/dremio/dremio_api.py +98 -68
- datahub/ingestion/source/dremio/dremio_config.py +2 -0
- datahub/ingestion/source/dremio/dremio_reporting.py +23 -2
- datahub/ingestion/source/dremio/dremio_source.py +90 -77
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/ge_data_profiler.py +48 -8
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +384 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +91 -0
- datahub/ingestion/source/preset.py +1 -1
- datahub/ingestion/source/redshift/redshift.py +17 -0
- datahub/ingestion/source/usage/clickhouse_usage.py +1 -0
- datahub/ingestion/source/usage/starburst_trino_usage.py +3 -0
- datahub/metadata/_internal_schema_classes.py +3 -0
- datahub/metadata/schema.avsc +2 -0
- datahub/metadata/schemas/ContainerProperties.avsc +2 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +2 -0
- datahub/metadata/schemas/DataJobInfo.avsc +2 -0
- datahub/metadata/schemas/DataProcessKey.avsc +2 -0
- datahub/metadata/schemas/DatasetKey.avsc +2 -0
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +2 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +2 -0
- datahub/metadata/schemas/MLModelKey.avsc +2 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +2 -0
- datahub/utilities/stats_collections.py +4 -0
- {acryl_datahub-1.1.0.4rc2.dist-info → acryl_datahub-1.1.0.5rc1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.0.4rc2.dist-info → acryl_datahub-1.1.0.5rc1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.0.4rc2.dist-info → acryl_datahub-1.1.0.5rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
acryl_datahub-1.1.0.
|
|
1
|
+
acryl_datahub-1.1.0.5rc1.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=TZdOK9cFifb1yYky21qrv8R4D8Q8htQJWiKxMGKJpS0,323
|
|
5
5
|
datahub/entrypoints.py,sha256=H-YFTvxTJOgpWsFBVlxyb1opjkq-hjTzNmjy5Fq3RHg,8992
|
|
6
6
|
datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -132,7 +132,7 @@ datahub/emitter/mcp_builder.py,sha256=8IwJAlolQkPpMqQJPLtGrsUqAcuFNs98nrI5iYUxga
|
|
|
132
132
|
datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
|
|
133
133
|
datahub/emitter/request_helper.py,sha256=2Sij9VJqgA7xZI6I7IuxsA8ioakbz0FJ3gvazxU_z3M,5738
|
|
134
134
|
datahub/emitter/response_helper.py,sha256=qGm45n43CepW7j6kP9wTXuP-U-SZnn7hQdJTdVaoqhQ,7504
|
|
135
|
-
datahub/emitter/rest_emitter.py,sha256=
|
|
135
|
+
datahub/emitter/rest_emitter.py,sha256=WrL-ldOJf2LoKv_5behyffsB6vVXjkT8xTdWMtpExtE,38101
|
|
136
136
|
datahub/emitter/serialization_helper.py,sha256=q12Avmf70Vy4ttQGMJoTKlE5EsybMKNg2w3MQeZiHvk,3652
|
|
137
137
|
datahub/emitter/sql_parsing_builder.py,sha256=Cr5imZrm3dYDSCACt5MFscgHCtVbHTD6IjUmsvsKoEs,11991
|
|
138
138
|
datahub/emitter/synchronized_file_emitter.py,sha256=s4ATuxalI4GDAkrZTaGSegxBdvvNPZ9jRSdtElU0kNs,1805
|
|
@@ -150,7 +150,7 @@ datahub/ingestion/api/pipeline_run_listener.py,sha256=5uBP__LbMQxJ2utlf07cIzQINq
|
|
|
150
150
|
datahub/ingestion/api/registry.py,sha256=LbdZr89465Lj7ptQRVB4vI1JR1igWABvQFj9-WX63bI,7454
|
|
151
151
|
datahub/ingestion/api/report.py,sha256=eM_TWWz6iJNd-c_S2_4eg2qKLGYP8vSROb_TMiCwBhY,4644
|
|
152
152
|
datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
|
|
153
|
-
datahub/ingestion/api/sink.py,sha256=
|
|
153
|
+
datahub/ingestion/api/sink.py,sha256=GZt48PV56FAhNoma-V5EwwRZvezhb40YH_zprm8_Yo0,4961
|
|
154
154
|
datahub/ingestion/api/source.py,sha256=hYwh4LHcG5RS6xQ9QAh5Zlijjl6r1JaibKPb75Hne7A,19518
|
|
155
155
|
datahub/ingestion/api/source_helpers.py,sha256=XT9y5HgfVeF52jrX39vlLn1SdXpLVyT2Su8oGNsddYo,21148
|
|
156
156
|
datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
|
|
@@ -189,7 +189,7 @@ datahub/ingestion/reporting/file_reporter.py,sha256=tiWukmMxHrTQI3rOAumsq6lRlw8T
|
|
|
189
189
|
datahub/ingestion/reporting/reporting_provider_registry.py,sha256=jTYSh3T4sensjnHQfPLiIcbA2dG8w0px9ghChAJjGdU,310
|
|
190
190
|
datahub/ingestion/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
191
191
|
datahub/ingestion/run/connection.py,sha256=mngNzr5aRLUDa5Izqxa0xkdDEqEqcDuacWSKIlkdvPc,1483
|
|
192
|
-
datahub/ingestion/run/pipeline.py,sha256=
|
|
192
|
+
datahub/ingestion/run/pipeline.py,sha256=TYE1Vm144uHFmqEsrJcbrD0fcg2M-ZvYEGGGbIp1Rmk,29943
|
|
193
193
|
datahub/ingestion/run/pipeline_config.py,sha256=joG1j9OlwJhb8zqv4TY6_FSzOaKOx6xsBu255A5lP8g,4101
|
|
194
194
|
datahub/ingestion/run/sink_callback.py,sha256=xZAzaKkR0dcQP838pMJWsA52oaQXV5BiqXkpxEpJ_9U,2856
|
|
195
195
|
datahub/ingestion/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -197,7 +197,7 @@ datahub/ingestion/sink/blackhole.py,sha256=-jYcWo4i8q7312bCIoHrGr7nT9JdPvA7c4jvS
|
|
|
197
197
|
datahub/ingestion/sink/console.py,sha256=TZfhA0Ec2eNCrMH7RRy2JOdUE-U-hkoIQrPm1CmKLQs,591
|
|
198
198
|
datahub/ingestion/sink/datahub_kafka.py,sha256=_cjuXu5I6G0zJ2UK7hMbaKjMPZXeIwRMgm7CVeTiNtc,2578
|
|
199
199
|
datahub/ingestion/sink/datahub_lite.py,sha256=7u2aWm7ENLshKHl-PkjJg6Mrw4bWs8sTfKIBz4mm8Ak,1879
|
|
200
|
-
datahub/ingestion/sink/datahub_rest.py,sha256=
|
|
200
|
+
datahub/ingestion/sink/datahub_rest.py,sha256=DOhtTHqKpmqgI3rUY9ri2QZAyXYDFINWMG6ne7VYUXI,13463
|
|
201
201
|
datahub/ingestion/sink/file.py,sha256=SxXJPJpkIGoaqRjCcSmj2ZE3xE4rLlBABBGwpTj5LWI,3271
|
|
202
202
|
datahub/ingestion/sink/sink_registry.py,sha256=JRBWx8qEYg0ubSTyhqwgSWctgxwyp6fva9GoN2LwBao,490
|
|
203
203
|
datahub/ingestion/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -207,7 +207,7 @@ datahub/ingestion/source/demo_data.py,sha256=PbtCHlZx3wrKlOPPgkWhDQuPm7ZfIx2neXJ
|
|
|
207
207
|
datahub/ingestion/source/elastic_search.py,sha256=2dwIcSbYMaq_RoSnxLGz4Q_20oJ8AGgMKunVIBIgYM8,23406
|
|
208
208
|
datahub/ingestion/source/feast.py,sha256=rAqT7huVgi4c7iRU9qSbohPbNRrxZVw4PIvnfxNsiUk,18798
|
|
209
209
|
datahub/ingestion/source/file.py,sha256=sHCWbtrQcXMMYPs_LUqofx0mk6IFN0G7Lyk9b0yRZMI,16082
|
|
210
|
-
datahub/ingestion/source/ge_data_profiler.py,sha256=
|
|
210
|
+
datahub/ingestion/source/ge_data_profiler.py,sha256=dvwTLK95xx1vuLPzigredqXiv0nyZVKas1dP7zcy3jU,67807
|
|
211
211
|
datahub/ingestion/source/ge_profiling_config.py,sha256=sG_0BwPDRG3I4PnhfWGHf9AbePLDWG0kKcKEtlXHTuk,11544
|
|
212
212
|
datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suabri1yV5geaAPjzg2eORDc,2559
|
|
213
213
|
datahub/ingestion/source/ldap.py,sha256=PKoA5pVjuIxFfW1TcbYNIWSm7-C7shK2FDn7Zo5mrVM,18705
|
|
@@ -218,7 +218,7 @@ datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-
|
|
|
218
218
|
datahub/ingestion/source/nifi.py,sha256=2jxSzuHIRQFF7DLoceGbUd_10lkhHRlqA9hhApSt3Yw,56882
|
|
219
219
|
datahub/ingestion/source/openapi.py,sha256=VaR2xYaH1IhvRixpTBC7-168F74eIIyKiEKb5EqTO64,19253
|
|
220
220
|
datahub/ingestion/source/openapi_parser.py,sha256=T87e2r-oPGgQl_FDMHnSGFZzApvWDCyKWnzIrVI5Alo,15420
|
|
221
|
-
datahub/ingestion/source/preset.py,sha256=
|
|
221
|
+
datahub/ingestion/source/preset.py,sha256=1goxuFoLw50dokr2gp1MhUrDJ8CFNX-wBZIvv7laEXA,3966
|
|
222
222
|
datahub/ingestion/source/pulsar.py,sha256=u5F8QnCLJsht5-7XCiUTsnfhCPIpKVB_l32CgMCU-As,20187
|
|
223
223
|
datahub/ingestion/source/redash.py,sha256=YxjSad-X_wPmxYH8dJmFz_VCFhiLTCTSlK99WdvcYiA,30653
|
|
224
224
|
datahub/ingestion/source/salesforce.py,sha256=CQtDFv1OsbC1vyzNbKOc6GxhFQ5GdYj45hgAF0-oIcw,40487
|
|
@@ -252,7 +252,7 @@ datahub/ingestion/source/azure/abs_folder_utils.py,sha256=7skXus-4fSIoKpqCeU-GG0
|
|
|
252
252
|
datahub/ingestion/source/azure/abs_utils.py,sha256=KdAlCK-PMrn35kFHxz5vrsjajyx2PD5GRgoBKdoRvcg,2075
|
|
253
253
|
datahub/ingestion/source/azure/azure_common.py,sha256=Zl0pPuE6L3QcM5B1P0LsPthZmD0h7fUUS0kg2okl6IY,4053
|
|
254
254
|
datahub/ingestion/source/bigquery_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
255
|
-
datahub/ingestion/source/bigquery_v2/bigquery.py,sha256=
|
|
255
|
+
datahub/ingestion/source/bigquery_v2/bigquery.py,sha256=dslbjihZMg9Utt8V8DYIucqQfychl_MB-gaDTmsMqe0,15005
|
|
256
256
|
datahub/ingestion/source/bigquery_v2/bigquery_audit.py,sha256=kEwWhq3ch6WT4q4hcX8-fvQh28KgrNfspFwIytO3vQA,25103
|
|
257
257
|
datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py,sha256=LuGJ6LgPViLIfDQfylxlQ3CA7fZYM5MDt8M-7sfzm84,5096
|
|
258
258
|
datahub/ingestion/source/bigquery_v2/bigquery_config.py,sha256=bG1soRawCLhJw_15L2fJmFfj1kntTthV6ng4LZOnwko,21916
|
|
@@ -308,15 +308,15 @@ datahub/ingestion/source/delta_lake/delta_lake_utils.py,sha256=VqIDPEXepOnlk4oWM
|
|
|
308
308
|
datahub/ingestion/source/delta_lake/report.py,sha256=uR4e4QA_jv8lL3CV-wE5t43H8pUqrGmx_ItLqN9flPI,587
|
|
309
309
|
datahub/ingestion/source/delta_lake/source.py,sha256=1OxdbH_KcC6WFbf78XueKphnmCcIGizUepQ-LQK_hbk,13968
|
|
310
310
|
datahub/ingestion/source/dremio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
311
|
-
datahub/ingestion/source/dremio/dremio_api.py,sha256=
|
|
311
|
+
datahub/ingestion/source/dremio/dremio_api.py,sha256=_xtiftEFWfu1uqbh_W8j99oqJc4wah_M_4ho6W_XdzM,35001
|
|
312
312
|
datahub/ingestion/source/dremio/dremio_aspects.py,sha256=oWV2_mSpq3Bh42YJ1QVbAyp-Uihf2WIT6VsHGsGTgzk,18248
|
|
313
|
-
datahub/ingestion/source/dremio/dremio_config.py,sha256=
|
|
313
|
+
datahub/ingestion/source/dremio/dremio_config.py,sha256=xugXSYoqXuMo9q5LTjSWCx2P376fGxIl7Nc2cI-K_OQ,5882
|
|
314
314
|
datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py,sha256=MQk8BAHLufN69CntFfOV8K59A_AvLC-vwMS33Jw8bBg,3069
|
|
315
315
|
datahub/ingestion/source/dremio/dremio_entities.py,sha256=1gZrNqTp3Pm6vqGDQaWt3HkxEuHKxpGYQ4geVoFvxWI,15147
|
|
316
316
|
datahub/ingestion/source/dremio/dremio_profiling.py,sha256=TAcnpo8ZRKhLDHnQSJzJg3YdwTSyEa73LUAzENs7wG4,12287
|
|
317
|
-
datahub/ingestion/source/dremio/dremio_reporting.py,sha256=
|
|
318
|
-
datahub/ingestion/source/dremio/dremio_source.py,sha256=
|
|
319
|
-
datahub/ingestion/source/dremio/dremio_sql_queries.py,sha256=
|
|
317
|
+
datahub/ingestion/source/dremio/dremio_reporting.py,sha256=YRKM6PvoJYHLBXmOGwkgou_8x8_oA2xaqTWWoVuwFMY,2247
|
|
318
|
+
datahub/ingestion/source/dremio/dremio_source.py,sha256=baUW3f6Y7WWbHXo9GqmBzZqXilMo1MbG3hvDS-bwthI,25164
|
|
319
|
+
datahub/ingestion/source/dremio/dremio_sql_queries.py,sha256=wA1hqKk9cKMJDyEdZRQcDDLZPGYwuNqrvleUHTkWgrQ,10508
|
|
320
320
|
datahub/ingestion/source/dynamodb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
321
321
|
datahub/ingestion/source/dynamodb/data_reader.py,sha256=vC77KpcP8LJN0g8wsPRDVw4sebv0ZWIP3tJkEIHaomA,3120
|
|
322
322
|
datahub/ingestion/source/dynamodb/dynamodb.py,sha256=vM3Ia5rZidqOcdPPigpuo6-7Ipoof8eF3RwxJ3SX2Ck,22771
|
|
@@ -386,6 +386,9 @@ datahub/ingestion/source/looker/view_upstream.py,sha256=4FCjZaU6p2G7npB2RJpP4Gv2
|
|
|
386
386
|
datahub/ingestion/source/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
387
387
|
datahub/ingestion/source/metadata/business_glossary.py,sha256=T_RJHst6iQRghJNmLLPeSBMEDsbEKf3yBldOAgMcGuo,19666
|
|
388
388
|
datahub/ingestion/source/metadata/lineage.py,sha256=PA4JwSeQ-30XFMN4O5tPwIu-hZF1e-xMZ_CnEUE2c-Q,9595
|
|
389
|
+
datahub/ingestion/source/mock_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
390
|
+
datahub/ingestion/source/mock_data/datahub_mock_data.py,sha256=i19UFii3wOPG5CVQfV_20MwIRAJjr0TxsIiVjKcBND4,14314
|
|
391
|
+
datahub/ingestion/source/mock_data/table_naming_helper.py,sha256=oIC1vcOx76Vl63O9kcjP_iInBHyS-ATdN3Y932TCCZg,3283
|
|
389
392
|
datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
390
393
|
datahub/ingestion/source/neo4j/neo4j_source.py,sha256=JqKCwxBJfOrC8SF7CmDG0cseWxHk_7E2v4Diw3Q0-WM,14181
|
|
391
394
|
datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -428,7 +431,7 @@ datahub/ingestion/source/redshift/lineage.py,sha256=IPF8vHy2MFyhK-hu2-lxV2-kcnNA
|
|
|
428
431
|
datahub/ingestion/source/redshift/lineage_v2.py,sha256=dbTvuaJBV5yvCWM_oEAqZIA1JOlGxLJOexbEB47A_xE,17962
|
|
429
432
|
datahub/ingestion/source/redshift/profile.py,sha256=H1Xtc2rXScUv4w0b2BbM7POjYEwqIql_rpWvlumY_EM,4309
|
|
430
433
|
datahub/ingestion/source/redshift/query.py,sha256=vVIuNUaU4a7AfMFJZlgLuqi0cGVl0gVz8xZUSnPhWvs,47845
|
|
431
|
-
datahub/ingestion/source/redshift/redshift.py,sha256=
|
|
434
|
+
datahub/ingestion/source/redshift/redshift.py,sha256=p6rOOCjxNnPpTn-vFjgISMMjtUTzu6K-OrfWOIaIuJI,44683
|
|
432
435
|
datahub/ingestion/source/redshift/redshift_data_reader.py,sha256=zc69jwXHdF-w8J4Hq-ZQ6BjHQ75Ij2iNDMpoRJlcmlU,1724
|
|
433
436
|
datahub/ingestion/source/redshift/redshift_schema.py,sha256=7F-l_omOuKMuGE_rBWXVPG_GWXFKnCMzC4frNxZB9cs,24800
|
|
434
437
|
datahub/ingestion/source/redshift/report.py,sha256=O3QFozHlmMbH9b7KxbqhgTgr_0tCryj6FIzMiN6kRxw,3044
|
|
@@ -548,8 +551,8 @@ datahub/ingestion/source/unity/source.py,sha256=uJBjgZ7qhJpn25t0ZOcLuZ0vn2Uz4n9A
|
|
|
548
551
|
datahub/ingestion/source/unity/tag_entities.py,sha256=iWl6nRAWSye1hoFDx_Xh4aT53PN0sGzlX7n1-oTVUv8,11568
|
|
549
552
|
datahub/ingestion/source/unity/usage.py,sha256=0wETBAaZvHI_EGgBlxX3bKsVHEAdnUV8_bKI_lbyWjY,11500
|
|
550
553
|
datahub/ingestion/source/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
551
|
-
datahub/ingestion/source/usage/clickhouse_usage.py,sha256=
|
|
552
|
-
datahub/ingestion/source/usage/starburst_trino_usage.py,sha256=
|
|
554
|
+
datahub/ingestion/source/usage/clickhouse_usage.py,sha256=M6YVQqwJoFqJPxlTr62lFwxfDeX2-_9Diw6qtcq2XWM,10244
|
|
555
|
+
datahub/ingestion/source/usage/starburst_trino_usage.py,sha256=EnxKQ6IMt0o3VLvqfFJAE-mYMnLponnKGZEsVeGet1c,10802
|
|
553
556
|
datahub/ingestion/source/usage/usage_common.py,sha256=uuCgIduhlRL2zIAN8rymZ5cZn1WF6akZ-ZbbaVYo9_w,9813
|
|
554
557
|
datahub/ingestion/source/vertexai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
555
558
|
datahub/ingestion/source/vertexai/vertexai.py,sha256=RuHda0mbc1DElYZIZ_W_hvkN7Eg4LIvI1fRFMvpHPB0,56012
|
|
@@ -608,8 +611,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
|
|
|
608
611
|
datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
|
|
609
612
|
datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
|
|
610
613
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
611
|
-
datahub/metadata/_internal_schema_classes.py,sha256=
|
|
612
|
-
datahub/metadata/schema.avsc,sha256=
|
|
614
|
+
datahub/metadata/_internal_schema_classes.py,sha256=UACA9XSSdTueZTUW4v_4OWIsWga3T99I3gnGAPCe2w8,1019257
|
|
615
|
+
datahub/metadata/schema.avsc,sha256=GsQZCPSD3_KKXvGALSzgTgIONL0r3tXme9M4rXQy_q4,707244
|
|
613
616
|
datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
|
|
614
617
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
615
618
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
@@ -711,7 +714,7 @@ datahub/metadata/schemas/ChartQuery.avsc,sha256=6qz8Q5wa5h0GS6QcwfzVZrRi26PoEwXy
|
|
|
711
714
|
datahub/metadata/schemas/ChartUsageStatistics.avsc,sha256=FjEEPj_19jUvJcS9cyZtHqByQyPdt2xhmxfFdw9mSM4,5881
|
|
712
715
|
datahub/metadata/schemas/Container.avsc,sha256=pEpRQgClrJRm59eAiltc4YoP0pg7TG6Bu-ENCh11-mw,813
|
|
713
716
|
datahub/metadata/schemas/ContainerKey.avsc,sha256=5wUgUbR1zzNcSpeDOHm4XWFf_xDtXFuIikbjKXMFe80,939
|
|
714
|
-
datahub/metadata/schemas/ContainerProperties.avsc,sha256=
|
|
717
|
+
datahub/metadata/schemas/ContainerProperties.avsc,sha256=BPhrRf4I7UImQhB-55Qby82MXKuDQL0cwlvdg0aZEHs,5112
|
|
715
718
|
datahub/metadata/schemas/CorpGroupEditableInfo.avsc,sha256=sItWMAGfQdYwmF5xLzMnYGl_rtXvGlxPbEjmRm1E6u4,1290
|
|
716
719
|
datahub/metadata/schemas/CorpGroupInfo.avsc,sha256=G83lndR7C6WPfccFg7qFE5Ely5vrdxC8x7zJB3fW3Sw,4740
|
|
717
720
|
datahub/metadata/schemas/CorpGroupKey.avsc,sha256=B_RMHAFF_nd86qtO2p1slAZnxruCmBM7DUfILAU_UaI,953
|
|
@@ -728,7 +731,7 @@ datahub/metadata/schemas/DashboardUsageStatistics.avsc,sha256=pUAKqs49Wy5pAL92g_
|
|
|
728
731
|
datahub/metadata/schemas/DataContractKey.avsc,sha256=m0ej_Wu7NcuZQCRwQI3Sidfv9bUy5mvuhlpgax6i1xA,511
|
|
729
732
|
datahub/metadata/schemas/DataContractProperties.avsc,sha256=RCxuJMlZwqEE0iHTpuXvcH6zRFoOt7ysQFPrJRp3RqE,4763
|
|
730
733
|
datahub/metadata/schemas/DataContractStatus.avsc,sha256=5yvT43AIB13Dn_h0-4s7fsL7BTuXhkK5pi2KJug4_qg,1029
|
|
731
|
-
datahub/metadata/schemas/DataFlowInfo.avsc,sha256=
|
|
734
|
+
datahub/metadata/schemas/DataFlowInfo.avsc,sha256=SOXI26Vgv3O-t9z6_ff0gzVL5-M1XBlCOUCI9vB-jpk,4958
|
|
732
735
|
datahub/metadata/schemas/DataFlowKey.avsc,sha256=lIXr1oVJIHxOEibTx1YWFhGY2VQyWs9AW65eePPmdXI,1345
|
|
733
736
|
datahub/metadata/schemas/DataHubAccessTokenInfo.avsc,sha256=WS77M5w7GJFxUAiyXaxUvBqO0XFV2FnKPxXSXYbXHTE,1646
|
|
734
737
|
datahub/metadata/schemas/DataHubAccessTokenKey.avsc,sha256=3EspNIxgb_I4WwV0a2o4NJOB5yODVr9J-wZzkZanEgo,483
|
|
@@ -755,7 +758,7 @@ datahub/metadata/schemas/DataHubUpgradeRequest.avsc,sha256=3xYsf3XVHBcclHdRnLVMo
|
|
|
755
758
|
datahub/metadata/schemas/DataHubUpgradeResult.avsc,sha256=VydVb4yqjIviR73-T6TooF6OiahwxTHciP97NuF4qvI,1385
|
|
756
759
|
datahub/metadata/schemas/DataHubViewInfo.avsc,sha256=U3fBIoG9ietLUpOknfQGNekqBdPQYwvhhv9RQv6gEeg,11642
|
|
757
760
|
datahub/metadata/schemas/DataHubViewKey.avsc,sha256=p53axIdSVbubo3r23Vpsed7NqRcQBMGveVikEHAVAok,424
|
|
758
|
-
datahub/metadata/schemas/DataJobInfo.avsc,sha256=
|
|
761
|
+
datahub/metadata/schemas/DataJobInfo.avsc,sha256=Bc9qdDcXI0GQdEgNTpgHaBbnrppDKQ-1xR26diOSVIQ,7488
|
|
759
762
|
datahub/metadata/schemas/DataJobInputOutput.avsc,sha256=H1O8eAzZV34tvULdu67iBSWkdn08rt7wS208b8Nisbk,15268
|
|
760
763
|
datahub/metadata/schemas/DataJobKey.avsc,sha256=S7egH8jWjKW52MG6Pg7plDoP15XfTTiMde5V6nR6ycE,1624
|
|
761
764
|
datahub/metadata/schemas/DataPlatformInfo.avsc,sha256=WGPFumBNHbR75vsLrivnRCbBc8vSCuxDw2UlylMieh4,2686
|
|
@@ -770,7 +773,7 @@ datahub/metadata/schemas/DataProcessInstanceOutput.avsc,sha256=xyGBUf3vFHrMLtmZj
|
|
|
770
773
|
datahub/metadata/schemas/DataProcessInstanceProperties.avsc,sha256=2qsDFeSA2-ag5IVetgD8mW2k--F6CwmYXM3KOE6edU8,3836
|
|
771
774
|
datahub/metadata/schemas/DataProcessInstanceRelationships.avsc,sha256=VhBpnyGGvO06WEnM6zy4PmjiT0nivRQfkSdJCUgIavw,2358
|
|
772
775
|
datahub/metadata/schemas/DataProcessInstanceRunEvent.avsc,sha256=zwTYULEnpMbqwkLN8NbXW9PQWFG4X6TZkZwTQ1Wb53Y,6713
|
|
773
|
-
datahub/metadata/schemas/DataProcessKey.avsc,sha256=
|
|
776
|
+
datahub/metadata/schemas/DataProcessKey.avsc,sha256=ZZE2HN4mwZtm_TJNcdohFS97WXytFq9HAs_-shor6sY,2518
|
|
774
777
|
datahub/metadata/schemas/DataProductKey.avsc,sha256=SyjmL2ieea1P6uipXst37mD5NdGPTqDvJAL3CVo91wk,661
|
|
775
778
|
datahub/metadata/schemas/DataProductProperties.avsc,sha256=Lc3duV7YMJLvo_RwckLbW4bbmPrhSS1D-bxVVboNX2c,6930
|
|
776
779
|
datahub/metadata/schemas/DataTransformLogic.avsc,sha256=nHTH6UzJ2Zz88N2aWa96hawLUR20HP7eSynfPtI1kzg,2111
|
|
@@ -779,7 +782,7 @@ datahub/metadata/schemas/DataTypeKey.avsc,sha256=Gs5uc_azwg10e36ZbwDTFQMevr0IfiF
|
|
|
779
782
|
datahub/metadata/schemas/DatahubIngestionCheckpoint.avsc,sha256=m2Zyrx3ZWDc5gHuwbmBSRJ3JN4NFkpUhDEKM2Yeuqrw,5681
|
|
780
783
|
datahub/metadata/schemas/DatahubIngestionRunSummary.avsc,sha256=_Ek7NqfJVTLqlM0NR9BRA57N9_ejwDdQvz7B1tVxSEE,9367
|
|
781
784
|
datahub/metadata/schemas/DatasetDeprecation.avsc,sha256=ucXxaDcAUib9_y0k5qOINMn5VK2X3trHK2dcpNcsR2Q,1256
|
|
782
|
-
datahub/metadata/schemas/DatasetKey.avsc,sha256=
|
|
785
|
+
datahub/metadata/schemas/DatasetKey.avsc,sha256=xNh2Zbg1POuHD3qu42-__zNVhKWx6QkA2LY7Dlk0YSY,3473
|
|
783
786
|
datahub/metadata/schemas/DatasetProfile.avsc,sha256=3ZCU9JD6l2razACp0AY6LLMgnkMTj6D_5Xk9np6WWRM,9965
|
|
784
787
|
datahub/metadata/schemas/DatasetProperties.avsc,sha256=DFJn75feqaoQk84zin_o_lqsFFhqkwya5LGC5LLJXbU,4209
|
|
785
788
|
datahub/metadata/schemas/DatasetUpstreamLineage.avsc,sha256=PjAWPbsqwH7FjX2kFDy0dE6ENYOwRynH9vJerWisr2A,5365
|
|
@@ -832,7 +835,7 @@ datahub/metadata/schemas/GlossaryTermKey.avsc,sha256=00paBmYoFIlIUebS0X6BL9y5xqn
|
|
|
832
835
|
datahub/metadata/schemas/GlossaryTerms.avsc,sha256=ogOFO6Hr2Xb7s1JHqxsCPY8r_qY_9kwu69k5-E3j2BM,7123
|
|
833
836
|
datahub/metadata/schemas/GroupMembership.avsc,sha256=wT3Hbpv2Z7V4X_-rIoed0cukAOMyYEL93udK8mMCjn0,557
|
|
834
837
|
datahub/metadata/schemas/IcebergCatalogInfo.avsc,sha256=X9Ejqzn1DyxNIth7vDhtPjGG4xMPQMhl7f-S7fBFxek,691
|
|
835
|
-
datahub/metadata/schemas/IcebergWarehouseInfo.avsc,sha256=
|
|
838
|
+
datahub/metadata/schemas/IcebergWarehouseInfo.avsc,sha256=0m7cQm8cCnBWNI5jGGgr5ZdOg66RQGWSf3gf8ay53So,2705
|
|
836
839
|
datahub/metadata/schemas/IncidentInfo.avsc,sha256=L8xldmWyOW4Ml2Fm9XTRL13lP1CAEP0kgXsd_jLZaEU,12425
|
|
837
840
|
datahub/metadata/schemas/IncidentKey.avsc,sha256=Pip__DyNNTal7NxryM3kFi9qHlwntp1rIA8Al8Zz264,542
|
|
838
841
|
datahub/metadata/schemas/IncidentSource.avsc,sha256=lY_SarA3cM55KNENcB5z1Gu2MygxEl9l7R8LdMak9AQ,1199
|
|
@@ -848,17 +851,17 @@ datahub/metadata/schemas/MLFeatureTableKey.avsc,sha256=hVxNEqsx4GgG11GVryn9ms16O
|
|
|
848
851
|
datahub/metadata/schemas/MLFeatureTableProperties.avsc,sha256=BtrqcsxoQXObPZXSGRNYtIBJCoeHkMK_Zr_imBWF2Zk,2008
|
|
849
852
|
datahub/metadata/schemas/MLHyperParam.avsc,sha256=dE6i5r6LTYMNrQe9yy-jKoP09GOJUf__1bO69ldpydc,833
|
|
850
853
|
datahub/metadata/schemas/MLMetric.avsc,sha256=y8WPVVwjhu3YGtqpFFJYNYK8w778RRL_d2sHG1Dc7uM,804
|
|
851
|
-
datahub/metadata/schemas/MLModelDeploymentKey.avsc,sha256=
|
|
854
|
+
datahub/metadata/schemas/MLModelDeploymentKey.avsc,sha256=EcA0z4sQfqe3IJ8PO8cGW34XMxc9Q2BbCBjgkJTaznE,2685
|
|
852
855
|
datahub/metadata/schemas/MLModelDeploymentProperties.avsc,sha256=7IlGrMmX8nfgezvaZyrXskCTCRlwvRzGOYUOpFV3r6Y,5480
|
|
853
856
|
datahub/metadata/schemas/MLModelFactorPrompts.avsc,sha256=8kX-P4F4mVLFT980z3MwIautt1_6uA-c_Z87nYNDK-k,2712
|
|
854
|
-
datahub/metadata/schemas/MLModelGroupKey.avsc,sha256=
|
|
857
|
+
datahub/metadata/schemas/MLModelGroupKey.avsc,sha256=zIyIHI-23i_oQMbc1sigar9sJNJsa6CYfHHy-nH5IXE,2779
|
|
855
858
|
datahub/metadata/schemas/MLModelGroupProperties.avsc,sha256=AZ5Pohk3_pCctQ4hcE1UOURQFYHQne0dw_lRUpOu5WY,6924
|
|
856
|
-
datahub/metadata/schemas/MLModelKey.avsc,sha256=
|
|
859
|
+
datahub/metadata/schemas/MLModelKey.avsc,sha256=zwoY9opTL5tMm5aoRHoWcNv5DjERYS-hWR05kVFlcTw,3148
|
|
857
860
|
datahub/metadata/schemas/MLModelProperties.avsc,sha256=hDCBHxGe-cmCBeU1k0ANuQlKjtZsDcTfl2X_jWmtFqo,12355
|
|
858
861
|
datahub/metadata/schemas/MLPrimaryKeyKey.avsc,sha256=F3lgpMnHBhXsqGncHE9x06P-0RiNCrzbUUWlMkPJxFI,1132
|
|
859
862
|
datahub/metadata/schemas/MLPrimaryKeyProperties.avsc,sha256=URIuOpS93RVk8MZVcbZ-dmTwu_cN3KSOKxSR8fm-eTo,6744
|
|
860
863
|
datahub/metadata/schemas/MLTrainingRunProperties.avsc,sha256=WGgj0MuQrGD4UgvyHCJHzTnHja2LlJTOr1gLu8SySj0,4269
|
|
861
|
-
datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=
|
|
864
|
+
datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=kwEwLUSPuqdrx_7uOX2XnEZ6Olm4p2ezYt0bTQSgaTk,377034
|
|
862
865
|
datahub/metadata/schemas/MetadataChangeLog.avsc,sha256=Cf5eECeShCA_XHFr2MRhRQpPE61F6Xv-z1jjoBLJLgc,12239
|
|
863
866
|
datahub/metadata/schemas/MetadataChangeProposal.avsc,sha256=tvO5cGIqZAIvUbMon1RAKgSY4E0jvBqT5VmLWAuNGkY,9770
|
|
864
867
|
datahub/metadata/schemas/Metrics.avsc,sha256=O7DJGjOwmHbb1x_Zj7AuM_HaHKjBvkfJKfUsX8icXD4,690
|
|
@@ -1025,7 +1028,7 @@ datahub/utilities/sql_formatter.py,sha256=tYXIsKjKmpKh0JXGxeAPrHkUWYd1SwJNLjUZsf
|
|
|
1025
1028
|
datahub/utilities/sqlalchemy_query_combiner.py,sha256=oxW20uXz8hV1Zb4fLXvTQ7c3LjACBsrF58TR2_-RSps,14982
|
|
1026
1029
|
datahub/utilities/sqlalchemy_type_converter.py,sha256=H4S4xnnyPozDBHFhBh4rjjoXa5novFzYIUBJy2KSrVc,9805
|
|
1027
1030
|
datahub/utilities/sqllineage_patch.py,sha256=0Buh50bmEqJFg1HFRCknCnePo1cecI4JmGxVhM_jh2g,1976
|
|
1028
|
-
datahub/utilities/stats_collections.py,sha256=
|
|
1031
|
+
datahub/utilities/stats_collections.py,sha256=9QDEk40UxhmQwDS6I63Gp6fcIBqmXVinKl7x2xHCD34,1702
|
|
1029
1032
|
datahub/utilities/str_enum.py,sha256=EsqCLPbrqyQ2YU_wt7QP-a6P5fnpIshXJ3AI8gLBlVA,474
|
|
1030
1033
|
datahub/utilities/tee_io.py,sha256=jBrsUfTPTk9IICntfGOG0HR-Fjp8BQMde-FPQ4r3kuI,601
|
|
1031
1034
|
datahub/utilities/threaded_iterator_executor.py,sha256=6BpCE0os3d-uMYxHBilPQC-JvEBkU6JQY4bGs06JKYI,2004
|
|
@@ -1075,8 +1078,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1075
1078
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1076
1079
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1077
1080
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1078
|
-
acryl_datahub-1.1.0.
|
|
1079
|
-
acryl_datahub-1.1.0.
|
|
1080
|
-
acryl_datahub-1.1.0.
|
|
1081
|
-
acryl_datahub-1.1.0.
|
|
1082
|
-
acryl_datahub-1.1.0.
|
|
1081
|
+
acryl_datahub-1.1.0.5rc1.dist-info/METADATA,sha256=sKbnEOEUAOv5hgUfoytH-EIKqUc7QxK96ENS7SZUIPs,182347
|
|
1082
|
+
acryl_datahub-1.1.0.5rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
1083
|
+
acryl_datahub-1.1.0.5rc1.dist-info/entry_points.txt,sha256=bnGf6eX9UhiW8yVHtt6MJCVcmLErvrVQxTJAayA-PKc,9885
|
|
1084
|
+
acryl_datahub-1.1.0.5rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1085
|
+
acryl_datahub-1.1.0.5rc1.dist-info/RECORD,,
|
|
@@ -39,6 +39,7 @@ datahub-business-glossary = datahub.ingestion.source.metadata.business_glossary:
|
|
|
39
39
|
datahub-debug = datahub.ingestion.source.debug.datahub_debug:DataHubDebugSource
|
|
40
40
|
datahub-gc = datahub.ingestion.source.gc.datahub_gc:DataHubGcSource
|
|
41
41
|
datahub-lineage-file = datahub.ingestion.source.metadata.lineage:LineageFileSource
|
|
42
|
+
datahub-mock-data = datahub.ingestion.source.mock_data.datahub_mock_data:DataHubMockDataSource
|
|
42
43
|
dbt = datahub.ingestion.source.dbt.dbt_core:DBTCoreSource
|
|
43
44
|
dbt-cloud = datahub.ingestion.source.dbt.dbt_cloud:DBTCloudSource
|
|
44
45
|
delta-lake = datahub.ingestion.source.delta_lake:DeltaLakeSource
|
datahub/_version.py
CHANGED
datahub/emitter/rest_emitter.py
CHANGED
|
@@ -4,6 +4,7 @@ import functools
|
|
|
4
4
|
import json
|
|
5
5
|
import logging
|
|
6
6
|
import os
|
|
7
|
+
import re
|
|
7
8
|
import time
|
|
8
9
|
from collections import defaultdict
|
|
9
10
|
from dataclasses import dataclass
|
|
@@ -104,6 +105,22 @@ BATCH_INGEST_MAX_PAYLOAD_LENGTH = int(
|
|
|
104
105
|
)
|
|
105
106
|
|
|
106
107
|
|
|
108
|
+
def preserve_unicode_escapes(obj: Any) -> Any:
|
|
109
|
+
"""Recursively convert unicode characters back to escape sequences"""
|
|
110
|
+
if isinstance(obj, dict):
|
|
111
|
+
return {k: preserve_unicode_escapes(v) for k, v in obj.items()}
|
|
112
|
+
elif isinstance(obj, list):
|
|
113
|
+
return [preserve_unicode_escapes(item) for item in obj]
|
|
114
|
+
elif isinstance(obj, str):
|
|
115
|
+
# Convert non-ASCII characters back to \u escapes
|
|
116
|
+
def escape_unicode(match: Any) -> Any:
|
|
117
|
+
return f"\\u{ord(match.group(0)):04x}"
|
|
118
|
+
|
|
119
|
+
return re.sub(r"[^\x00-\x7F]", escape_unicode, obj)
|
|
120
|
+
else:
|
|
121
|
+
return obj
|
|
122
|
+
|
|
123
|
+
|
|
107
124
|
class EmitMode(ConfigEnum):
|
|
108
125
|
# Fully synchronous processing that updates both primary storage (SQL) and search storage (Elasticsearch) before returning.
|
|
109
126
|
# Provides the strongest consistency guarantee but with the highest cost. Best for critical operations where immediate
|
|
@@ -611,7 +628,7 @@ class DataHubRestEmitter(Closeable, Emitter):
|
|
|
611
628
|
else:
|
|
612
629
|
url = f"{self._gms_server}/aspects?action=ingestProposal"
|
|
613
630
|
|
|
614
|
-
mcp_obj = pre_json_transform(mcp.to_obj())
|
|
631
|
+
mcp_obj = preserve_unicode_escapes(pre_json_transform(mcp.to_obj()))
|
|
615
632
|
payload_dict = {
|
|
616
633
|
"proposal": mcp_obj,
|
|
617
634
|
"async": "true"
|
datahub/ingestion/api/sink.py
CHANGED
|
@@ -147,6 +147,9 @@ class Sink(Generic[SinkConfig, SinkReportType], Closeable, metaclass=ABCMeta):
|
|
|
147
147
|
def close(self) -> None:
|
|
148
148
|
pass
|
|
149
149
|
|
|
150
|
+
def flush(self) -> None:
|
|
151
|
+
pass
|
|
152
|
+
|
|
150
153
|
def configured(self) -> str:
|
|
151
154
|
"""Override this method to output a human-readable and scrubbed version of the configured sink"""
|
|
152
155
|
return ""
|
|
@@ -502,7 +502,7 @@ class Pipeline:
|
|
|
502
502
|
self._handle_uncaught_pipeline_exception(exc)
|
|
503
503
|
finally:
|
|
504
504
|
clear_global_warnings()
|
|
505
|
-
|
|
505
|
+
self.sink.flush()
|
|
506
506
|
self._notify_reporters_on_ingestion_completion()
|
|
507
507
|
|
|
508
508
|
def transform(self, records: Iterable[RecordEnvelope]) -> Iterable[RecordEnvelope]:
|
|
@@ -5,6 +5,7 @@ import functools
|
|
|
5
5
|
import logging
|
|
6
6
|
import os
|
|
7
7
|
import threading
|
|
8
|
+
import time
|
|
8
9
|
import uuid
|
|
9
10
|
from enum import auto
|
|
10
11
|
from typing import List, Optional, Tuple, Union
|
|
@@ -346,6 +347,17 @@ class DatahubRestSink(Sink[DatahubRestSinkConfig, DataHubRestSinkReport]):
|
|
|
346
347
|
RecordEnvelope(item, metadata={}), NoopWriteCallback()
|
|
347
348
|
)
|
|
348
349
|
|
|
350
|
+
def flush(self) -> None:
|
|
351
|
+
"""Wait for all pending records to be written."""
|
|
352
|
+
i = 0
|
|
353
|
+
while self.report.pending_requests > 0:
|
|
354
|
+
time.sleep(0.1)
|
|
355
|
+
i += 1
|
|
356
|
+
if i % 1000 == 0:
|
|
357
|
+
logger.info(
|
|
358
|
+
f"Waiting for {self.report.pending_requests} records to be written"
|
|
359
|
+
)
|
|
360
|
+
|
|
349
361
|
def close(self):
|
|
350
362
|
with self.report.main_thread_blocking_timer:
|
|
351
363
|
self.executor.shutdown()
|
|
@@ -4,6 +4,7 @@ import logging
|
|
|
4
4
|
import os
|
|
5
5
|
from typing import Iterable, List, Optional
|
|
6
6
|
|
|
7
|
+
from datahub.configuration.common import AllowDenyPattern
|
|
7
8
|
from datahub.ingestion.api.common import PipelineContext
|
|
8
9
|
from datahub.ingestion.api.decorators import (
|
|
9
10
|
SupportStatus,
|
|
@@ -242,7 +243,23 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
|
|
|
242
243
|
).workunit_processor,
|
|
243
244
|
]
|
|
244
245
|
|
|
246
|
+
def _warn_deprecated_configs(self):
|
|
247
|
+
if (
|
|
248
|
+
self.config.match_fully_qualified_names is not None
|
|
249
|
+
and not self.config.match_fully_qualified_names
|
|
250
|
+
and self.config.schema_pattern is not None
|
|
251
|
+
and self.config.schema_pattern != AllowDenyPattern.allow_all()
|
|
252
|
+
):
|
|
253
|
+
self.report.report_warning(
|
|
254
|
+
message="Please update `schema_pattern` to match against fully qualified schema name `<database_name>.<schema_name>` and set config `match_fully_qualified_names : True`."
|
|
255
|
+
"Current default `match_fully_qualified_names: False` is only to maintain backward compatibility. "
|
|
256
|
+
"The config option `match_fully_qualified_names` will be removed in future and the default behavior will be like `match_fully_qualified_names: True`.",
|
|
257
|
+
context="Config option deprecation warning",
|
|
258
|
+
title="Config option deprecation warning",
|
|
259
|
+
)
|
|
260
|
+
|
|
245
261
|
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
262
|
+
self._warn_deprecated_configs()
|
|
246
263
|
projects = get_projects(
|
|
247
264
|
self.bq_schema_extractor.schema_api,
|
|
248
265
|
self.report,
|
|
@@ -21,6 +21,7 @@ from datahub.ingestion.source.dremio.dremio_datahub_source_mapping import (
|
|
|
21
21
|
)
|
|
22
22
|
from datahub.ingestion.source.dremio.dremio_reporting import DremioSourceReport
|
|
23
23
|
from datahub.ingestion.source.dremio.dremio_sql_queries import DremioSQLQueries
|
|
24
|
+
from datahub.utilities.perf_timer import PerfTimer
|
|
24
25
|
|
|
25
26
|
logger = logging.getLogger(__name__)
|
|
26
27
|
|
|
@@ -54,6 +55,8 @@ class DremioAPIOperations:
|
|
|
54
55
|
self.deny_schema_pattern: List[str] = connection_args.schema_pattern.deny
|
|
55
56
|
self._max_workers: int = connection_args.max_workers
|
|
56
57
|
self.is_dremio_cloud = connection_args.is_dremio_cloud
|
|
58
|
+
self.start_time = connection_args.start_time
|
|
59
|
+
self.end_time = connection_args.end_time
|
|
57
60
|
self.report = report
|
|
58
61
|
self.session = requests.Session()
|
|
59
62
|
if connection_args.is_dremio_cloud:
|
|
@@ -233,47 +236,71 @@ class DremioAPIOperations:
|
|
|
233
236
|
|
|
234
237
|
def get(self, url: str) -> Dict:
|
|
235
238
|
"""execute a get request on dremio"""
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
)
|
|
241
|
-
|
|
239
|
+
logger.debug(f"GET request to {self.base_url + url}")
|
|
240
|
+
self.report.api_calls_total += 1
|
|
241
|
+
self.report.api_calls_by_method_and_path["GET " + url] += 1
|
|
242
|
+
|
|
243
|
+
with PerfTimer() as timer:
|
|
244
|
+
response = self.session.get(
|
|
245
|
+
url=(self.base_url + url),
|
|
246
|
+
verify=self._verify,
|
|
247
|
+
timeout=self._timeout,
|
|
248
|
+
)
|
|
249
|
+
self.report.api_call_secs_by_method_and_path["GET " + url] += (
|
|
250
|
+
timer.elapsed_seconds()
|
|
251
|
+
)
|
|
252
|
+
# response.raise_for_status() # Enabling this line, makes integration tests to fail
|
|
253
|
+
return response.json()
|
|
242
254
|
|
|
243
255
|
def post(self, url: str, data: str) -> Dict:
|
|
244
256
|
"""execute a get request on dremio"""
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
257
|
+
logger.debug(f"POST request to {self.base_url + url}")
|
|
258
|
+
self.report.api_calls_total += 1
|
|
259
|
+
self.report.api_calls_by_method_and_path["POST " + url] += 1
|
|
260
|
+
|
|
261
|
+
with PerfTimer() as timer:
|
|
262
|
+
response = self.session.post(
|
|
263
|
+
url=(self.base_url + url),
|
|
264
|
+
data=data,
|
|
265
|
+
verify=self._verify,
|
|
266
|
+
timeout=self._timeout,
|
|
267
|
+
)
|
|
268
|
+
self.report.api_call_secs_by_method_and_path["POST " + url] += (
|
|
269
|
+
timer.elapsed_seconds()
|
|
270
|
+
)
|
|
271
|
+
# response.raise_for_status() # Enabling this line, makes integration tests to fail
|
|
272
|
+
return response.json()
|
|
252
273
|
|
|
253
274
|
def execute_query(self, query: str, timeout: int = 3600) -> List[Dict[str, Any]]:
|
|
254
275
|
"""Execute SQL query with timeout and error handling"""
|
|
255
276
|
try:
|
|
256
|
-
|
|
277
|
+
with PerfTimer() as timer:
|
|
278
|
+
logger.info(f"Executing query: {query}")
|
|
279
|
+
response = self.post(url="/sql", data=json.dumps({"sql": query}))
|
|
257
280
|
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
281
|
+
if "errorMessage" in response:
|
|
282
|
+
self.report.failure(
|
|
283
|
+
message="SQL Error", context=f"{response['errorMessage']}"
|
|
284
|
+
)
|
|
285
|
+
raise DremioAPIException(f"SQL Error: {response['errorMessage']}")
|
|
263
286
|
|
|
264
|
-
|
|
287
|
+
job_id = response["id"]
|
|
265
288
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
289
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
|
|
290
|
+
future = executor.submit(self.fetch_results, job_id)
|
|
291
|
+
try:
|
|
292
|
+
result = future.result(timeout=timeout)
|
|
293
|
+
logger.info(
|
|
294
|
+
f"Query executed in {timer.elapsed_seconds()} seconds with {len(result)} results"
|
|
295
|
+
)
|
|
296
|
+
return result
|
|
297
|
+
except concurrent.futures.TimeoutError:
|
|
298
|
+
self.cancel_query(job_id)
|
|
299
|
+
raise DremioAPIException(
|
|
300
|
+
f"Query execution timed out after {timeout} seconds"
|
|
301
|
+
) from None
|
|
302
|
+
except RuntimeError as e:
|
|
303
|
+
raise DremioAPIException() from e
|
|
277
304
|
|
|
278
305
|
except requests.RequestException as e:
|
|
279
306
|
raise DremioAPIException("Error executing query") from e
|
|
@@ -603,10 +630,25 @@ class DremioAPIOperations:
|
|
|
603
630
|
return parents_list
|
|
604
631
|
|
|
605
632
|
def extract_all_queries(self) -> List[Dict[str, Any]]:
|
|
633
|
+
# Convert datetime objects to string format for SQL queries
|
|
634
|
+
start_timestamp_str = None
|
|
635
|
+
end_timestamp_str = None
|
|
636
|
+
|
|
637
|
+
if self.start_time:
|
|
638
|
+
start_timestamp_str = self.start_time.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
|
639
|
+
if self.end_time:
|
|
640
|
+
end_timestamp_str = self.end_time.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
|
641
|
+
|
|
606
642
|
if self.edition == DremioEdition.CLOUD:
|
|
607
|
-
jobs_query = DremioSQLQueries.
|
|
643
|
+
jobs_query = DremioSQLQueries.get_query_all_jobs_cloud(
|
|
644
|
+
start_timestamp_millis=start_timestamp_str,
|
|
645
|
+
end_timestamp_millis=end_timestamp_str,
|
|
646
|
+
)
|
|
608
647
|
else:
|
|
609
|
-
jobs_query = DremioSQLQueries.
|
|
648
|
+
jobs_query = DremioSQLQueries.get_query_all_jobs(
|
|
649
|
+
start_timestamp_millis=start_timestamp_str,
|
|
650
|
+
end_timestamp_millis=end_timestamp_str,
|
|
651
|
+
)
|
|
610
652
|
|
|
611
653
|
return self.execute_query(query=jobs_query)
|
|
612
654
|
|
|
@@ -685,6 +727,27 @@ class DremioAPIOperations:
|
|
|
685
727
|
|
|
686
728
|
return any(re.match(regex_pattern, path, re.IGNORECASE) for path in paths)
|
|
687
729
|
|
|
730
|
+
def _could_match_pattern(self, pattern: str, path_components: List[str]) -> bool:
|
|
731
|
+
"""
|
|
732
|
+
Check if a container path could potentially match a schema pattern.
|
|
733
|
+
This handles hierarchical path matching for container filtering.
|
|
734
|
+
"""
|
|
735
|
+
if pattern == ".*":
|
|
736
|
+
return True
|
|
737
|
+
|
|
738
|
+
current_path = ".".join(path_components)
|
|
739
|
+
|
|
740
|
+
# Handle simple .* patterns (like "a.b.c.*")
|
|
741
|
+
if pattern.endswith(".*") and not any(c in pattern for c in "^$[](){}+?\\"):
|
|
742
|
+
# Simple dotstar pattern - check prefix matching
|
|
743
|
+
pattern_prefix = pattern[:-2] # Remove ".*"
|
|
744
|
+
return current_path.lower().startswith(
|
|
745
|
+
pattern_prefix.lower()
|
|
746
|
+
) or pattern_prefix.lower().startswith(current_path.lower())
|
|
747
|
+
else:
|
|
748
|
+
# Complex regex pattern - use existing regex matching logic
|
|
749
|
+
return self._check_pattern_match(pattern, [current_path], allow_prefix=True)
|
|
750
|
+
|
|
688
751
|
def should_include_container(self, path: List[str], name: str) -> bool:
|
|
689
752
|
"""
|
|
690
753
|
Helper method to check if a container should be included based on schema patterns.
|
|
@@ -711,41 +774,8 @@ class DremioAPIOperations:
|
|
|
711
774
|
|
|
712
775
|
# Check allow patterns
|
|
713
776
|
for pattern in self.allow_schema_pattern:
|
|
714
|
-
#
|
|
715
|
-
if
|
|
716
|
-
pattern_parts = pattern.split(".")
|
|
717
|
-
path_parts = path_components
|
|
718
|
-
|
|
719
|
-
# If pattern has exact same number of parts, check each component
|
|
720
|
-
if len(pattern_parts) == len(path_parts):
|
|
721
|
-
matches = True
|
|
722
|
-
for p_part, c_part in zip(pattern_parts, path_parts):
|
|
723
|
-
if p_part != "*" and p_part.lower() != c_part.lower():
|
|
724
|
-
matches = False
|
|
725
|
-
break
|
|
726
|
-
if matches:
|
|
727
|
-
self.report.report_container_scanned(full_path)
|
|
728
|
-
return True
|
|
729
|
-
# Otherwise check if current path is prefix match
|
|
730
|
-
else:
|
|
731
|
-
# Remove the trailing wildcard if present
|
|
732
|
-
if pattern_parts[-1] == "*":
|
|
733
|
-
pattern_parts = pattern_parts[:-1]
|
|
734
|
-
|
|
735
|
-
for i in range(len(path_parts)):
|
|
736
|
-
current_path = ".".join(path_parts[: i + 1])
|
|
737
|
-
pattern_prefix = ".".join(pattern_parts[: i + 1])
|
|
738
|
-
|
|
739
|
-
if pattern_prefix.startswith(current_path):
|
|
740
|
-
self.report.report_container_scanned(full_path)
|
|
741
|
-
return True
|
|
742
|
-
|
|
743
|
-
# Direct pattern matching
|
|
744
|
-
if self._check_pattern_match(
|
|
745
|
-
pattern=pattern,
|
|
746
|
-
paths=[full_path],
|
|
747
|
-
allow_prefix=True,
|
|
748
|
-
):
|
|
777
|
+
# Check if current path could potentially match this pattern
|
|
778
|
+
if self._could_match_pattern(pattern, path_components):
|
|
749
779
|
self.report.report_container_scanned(full_path)
|
|
750
780
|
return True
|
|
751
781
|
|
|
@@ -9,6 +9,7 @@ from datahub.configuration.source_common import (
|
|
|
9
9
|
EnvConfigMixin,
|
|
10
10
|
PlatformInstanceConfigMixin,
|
|
11
11
|
)
|
|
12
|
+
from datahub.configuration.time_window_config import BaseTimeWindowConfig
|
|
12
13
|
from datahub.ingestion.source.ge_profiling_config import GEProfilingBaseConfig
|
|
13
14
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
14
15
|
StatefulStaleMetadataRemovalConfig,
|
|
@@ -118,6 +119,7 @@ class DremioSourceMapping(EnvConfigMixin, PlatformInstanceConfigMixin, ConfigMod
|
|
|
118
119
|
class DremioSourceConfig(
|
|
119
120
|
DremioConnectionConfig,
|
|
120
121
|
StatefulIngestionConfigBase,
|
|
122
|
+
BaseTimeWindowConfig,
|
|
121
123
|
EnvConfigMixin,
|
|
122
124
|
PlatformInstanceConfigMixin,
|
|
123
125
|
):
|