acryl-datahub 1.0.0.2rc5__py3-none-any.whl → 1.0.0.3rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0.2rc5.dist-info → acryl_datahub-1.0.0.3rc2.dist-info}/METADATA +2516 -2516
- {acryl_datahub-1.0.0.2rc5.dist-info → acryl_datahub-1.0.0.3rc2.dist-info}/RECORD +24 -23
- datahub/_version.py +1 -1
- datahub/emitter/mcp.py +5 -1
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +2 -1
- datahub/ingestion/source/hex/api.py +1 -20
- datahub/ingestion/source/mlflow.py +19 -6
- datahub/ingestion/source/powerbi/config.py +12 -0
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +153 -0
- datahub/ingestion/source/sigma/config.py +75 -6
- datahub/ingestion/source/sigma/sigma.py +16 -1
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/snowflake/snowflake_config.py +1 -0
- datahub/ingestion/source/snowflake/snowflake_queries.py +18 -4
- datahub/ingestion/source/snowflake/snowflake_query.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/sql_parsing/sqlglot_utils.py +16 -8
- datahub/testing/mcp_diff.py +15 -2
- {acryl_datahub-1.0.0.2rc5.dist-info → acryl_datahub-1.0.0.3rc2.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0.2rc5.dist-info → acryl_datahub-1.0.0.3rc2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0.2rc5.dist-info → acryl_datahub-1.0.0.3rc2.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.0.0.2rc5.dist-info → acryl_datahub-1.0.0.3rc2.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
acryl_datahub-1.0.0.
|
|
1
|
+
acryl_datahub-1.0.0.3rc2.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=wKoNLhdfRXZqqQqju-C7yvPFz3YKQceonahT8wrZq6Y,323
|
|
5
5
|
datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
|
|
6
6
|
datahub/errors.py,sha256=BzKdcmYseHOt36zfjJXc17WNutFhp9Y23cU_L6cIkxc,612
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -121,7 +121,7 @@ datahub/emitter/enum_helpers.py,sha256=QBOEUu_hDCvyL_v4ayNQV8XwJbf5zKyu0Xat0mI1K
|
|
|
121
121
|
datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
|
|
122
122
|
datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
|
|
123
123
|
datahub/emitter/mce_builder.py,sha256=i-iLLdnuy7h1JrzwC2sCtQthbso-cNj1uijOQZKHbeA,16717
|
|
124
|
-
datahub/emitter/mcp.py,sha256=
|
|
124
|
+
datahub/emitter/mcp.py,sha256=v7tKlIFX4s7f77KQYeFww8QbOQu6-qU609VeQiUkcsY,9796
|
|
125
125
|
datahub/emitter/mcp_builder.py,sha256=8IwJAlolQkPpMqQJPLtGrsUqAcuFNs98nrI5iYUxgaU,11920
|
|
126
126
|
datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
|
|
127
127
|
datahub/emitter/request_helper.py,sha256=HpI9a9W0TzoVbrs584rF8P8w-IT_iKLmvYmO_6IHhXs,1008
|
|
@@ -151,7 +151,7 @@ datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188m
|
|
|
151
151
|
datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL74lo,6327
|
|
152
152
|
datahub/ingestion/api/auto_work_units/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
153
153
|
datahub/ingestion/api/auto_work_units/auto_dataset_properties_aspect.py,sha256=ID_6N3nWl2qohsSGizUCqo3d2MNyDeVbyWroQpSOSsc,5059
|
|
154
|
-
datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py,sha256=
|
|
154
|
+
datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py,sha256=fMjPnyWEofIZV52E2AFYU3IgBJwyZvbygXxCJyEtcWI,4442
|
|
155
155
|
datahub/ingestion/extractor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
156
156
|
datahub/ingestion/extractor/extractor_registry.py,sha256=f7CLfW3pr29QZkXSHbp7HjUrsdw7ejQJmot-tiSPcqc,342
|
|
157
157
|
datahub/ingestion/extractor/json_ref_patch.py,sha256=4g3ZWHn7rwS74jUvSXJiGpi-UKHhiSYKKgBeU4E5ukE,1448
|
|
@@ -205,7 +205,7 @@ datahub/ingestion/source/ge_profiling_config.py,sha256=FlWfXoVoayabVXNMB9qETEU0G
|
|
|
205
205
|
datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suabri1yV5geaAPjzg2eORDc,2559
|
|
206
206
|
datahub/ingestion/source/ldap.py,sha256=CNr3foofIpoCXu_GGqfcajlQE2qkHr5isYwVcDutdkk,18695
|
|
207
207
|
datahub/ingestion/source/metabase.py,sha256=j8DRV2GvisezidL1JZ5HJLF_hdFdtvaoyDoEdEyh0Ks,32603
|
|
208
|
-
datahub/ingestion/source/mlflow.py,sha256=
|
|
208
|
+
datahub/ingestion/source/mlflow.py,sha256=fh7izN9jlSwbpGIrEyJktlmwFZR5vNG9z9L5VQ31k_4,33141
|
|
209
209
|
datahub/ingestion/source/mode.py,sha256=_FKZutF-59w0pYhko6HSVL3yjjYNd329-2DJmyfDqF8,64492
|
|
210
210
|
datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-OPSNGU,21237
|
|
211
211
|
datahub/ingestion/source/nifi.py,sha256=D1gBXxdpLuUQ0eurwofIR_SGg1rHGhwk3qxsWI1PT9c,56882
|
|
@@ -327,7 +327,7 @@ datahub/ingestion/source/git/git_import.py,sha256=5CT6vMDb0MDctCtShnxb3JVihULtvk
|
|
|
327
327
|
datahub/ingestion/source/grafana/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
328
328
|
datahub/ingestion/source/grafana/grafana_source.py,sha256=3pU3xodPgS5lmnjuQ_u7F0XPzD_Y8MnPlMxRJ86qz4g,4960
|
|
329
329
|
datahub/ingestion/source/hex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
330
|
-
datahub/ingestion/source/hex/api.py,sha256=
|
|
330
|
+
datahub/ingestion/source/hex/api.py,sha256=OVQNI_11NJJcNCT6OzSDEtVjNcom0vmes_KkjgzWCcI,11806
|
|
331
331
|
datahub/ingestion/source/hex/constants.py,sha256=8hUTMWyG5keTNfXoLu_Dh413Hw_mGGJX1atiiDZyKtg,271
|
|
332
332
|
datahub/ingestion/source/hex/hex.py,sha256=PIRl8fPkKtlHV7cqR4H8RKVYdTLgEFXHFzc3QAqJLhE,12733
|
|
333
333
|
datahub/ingestion/source/hex/mapper.py,sha256=N3mTlEcrOmhv9ia1dnHGFgFJD2ddyTtU3H5IUbb-UxU,13344
|
|
@@ -376,15 +376,16 @@ datahub/ingestion/source/metadata/lineage.py,sha256=2iK-hsORWm7NSvMZcG4D5hb8_PH5
|
|
|
376
376
|
datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
377
377
|
datahub/ingestion/source/neo4j/neo4j_source.py,sha256=O3jjdnsx7IyYPBLbxowL85Qo4zs4H-maMOH4-6ZNCk4,13063
|
|
378
378
|
datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
379
|
-
datahub/ingestion/source/powerbi/config.py,sha256
|
|
379
|
+
datahub/ingestion/source/powerbi/config.py,sha256=-gof-85gqS_cft2blp5Uw5TVypii4T_bl8XhTZUVlgc,24707
|
|
380
380
|
datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=-njW1kJOy-LY5JFwJLhVQ0bMBj9NQz5TZhQqsSi_KsM,2285
|
|
381
381
|
datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=5df3qvalCS9hZ46DPXs6XDcw9-IofGf8Eol_rUC7LHI,20329
|
|
382
382
|
datahub/ingestion/source/powerbi/powerbi.py,sha256=b9zNeT9aS7v2GWUL1SROnIMwQwAFX0YTO2UNQMLWItc,56450
|
|
383
383
|
datahub/ingestion/source/powerbi/m_query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
384
|
-
datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=
|
|
384
|
+
datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=l_L6DzOWMShOWGtVclcf4JtNWzSINuwJka59LjwRLCk,2091
|
|
385
385
|
datahub/ingestion/source/powerbi/m_query/native_sql_parser.py,sha256=zzKVDGeUM3Yv3-zNah4D6mSnr6jXsstNuLmzczcPQEE,3683
|
|
386
|
+
datahub/ingestion/source/powerbi/m_query/odbc.py,sha256=fZgl8-M5s3Y-3U9OVQs7ttc8FTDbzodIM2HJtFmPNI8,5405
|
|
386
387
|
datahub/ingestion/source/powerbi/m_query/parser.py,sha256=5KqhUwj9H9yL9ZMPP9oSeVGiZjvXjw6Iu_HrGr95E5M,5876
|
|
387
|
-
datahub/ingestion/source/powerbi/m_query/pattern_handler.py,sha256=
|
|
388
|
+
datahub/ingestion/source/powerbi/m_query/pattern_handler.py,sha256=MqZj7VBf9ppKYrA-dRaOVGFpotLFqZditwOD-6ynkFg,41635
|
|
388
389
|
datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=ISH8Xjx51q2S81fn2v5RhCCU-kRAW3juxM0rMFs4TDo,17413
|
|
389
390
|
datahub/ingestion/source/powerbi/m_query/tree_function.py,sha256=NIKNNHAE4kTJefTM1WR-StJi9NuingaRYn_mS_kV6A8,6180
|
|
390
391
|
datahub/ingestion/source/powerbi/m_query/validator.py,sha256=crG-VZy2XPieiDliP9yVMgiFcc8b2xbZyDFEATXqEAQ,1155
|
|
@@ -439,10 +440,10 @@ datahub/ingestion/source/schema_inference/json.py,sha256=p5S-3idn65V2uad5T8txs1U
|
|
|
439
440
|
datahub/ingestion/source/schema_inference/object.py,sha256=dhSOtxVJHbTDY0hWeHwdLYHnOsW07Omk7Y4DPeztie0,5847
|
|
440
441
|
datahub/ingestion/source/schema_inference/parquet.py,sha256=CdqsNuiabLLCulWbuPMssijeFmKLv3M5MKFIhlatpWA,3456
|
|
441
442
|
datahub/ingestion/source/sigma/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
442
|
-
datahub/ingestion/source/sigma/config.py,sha256=
|
|
443
|
+
datahub/ingestion/source/sigma/config.py,sha256=xpZXt4f05-sroWFv9SbzVhU1-iBeVfU1ocJKb-fy3aM,6333
|
|
443
444
|
datahub/ingestion/source/sigma/data_classes.py,sha256=YZkkzwftV34mq5c_4jlC2PCSiRKt4hvHjmqikLQhl1I,2012
|
|
444
|
-
datahub/ingestion/source/sigma/sigma.py,sha256=
|
|
445
|
-
datahub/ingestion/source/sigma/sigma_api.py,sha256=
|
|
445
|
+
datahub/ingestion/source/sigma/sigma.py,sha256=ZtPj8eu6hcJxyFcWizob4kRaxrpcqsWzh__lmuVZdt8,25212
|
|
446
|
+
datahub/ingestion/source/sigma/sigma_api.py,sha256=7PK5AQa838hYeaQ5L0dioi4n4bLrpN-r7COKTTNUYw8,19837
|
|
446
447
|
datahub/ingestion/source/slack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
447
448
|
datahub/ingestion/source/slack/slack.py,sha256=3N7Yp-u9DvBmo536Z6-pQTrJgSJ3i742GePSgjlBOUU,27616
|
|
448
449
|
datahub/ingestion/source/snowflake/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -450,19 +451,19 @@ datahub/ingestion/source/snowflake/constants.py,sha256=XCW3vw4JfLn_s8-oXBX6WFNMP
|
|
|
450
451
|
datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
|
|
451
452
|
datahub/ingestion/source/snowflake/oauth_generator.py,sha256=fu2VnREGuJXeTqIV2jx4TwieVnznf83HQkrE0h2DGGM,3423
|
|
452
453
|
datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81xxdeizJn9nJCZ_nMIXgk9N6pEk5o,4803
|
|
453
|
-
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=
|
|
454
|
+
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=SD2agFE64WgEDbQHPXQjAIP4gsHT1G9H8X_r-RvKGas,20804
|
|
454
455
|
datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=pEw2O9xoTSIWDiROlkF8k4oj5zBjkqTnynLvut08yhc,17796
|
|
455
456
|
datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
|
|
456
457
|
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=FBmiONx4EGHWV8RNJT6zHZyntKinPFFyd2oKbTUIbhE,21319
|
|
457
458
|
datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
|
|
458
|
-
datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=
|
|
459
|
-
datahub/ingestion/source/snowflake/snowflake_query.py,sha256=
|
|
459
|
+
datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=PY4Wy6i89nqRl92ARwXNqWwm-ifagkKbKKtxYWeswkk,29209
|
|
460
|
+
datahub/ingestion/source/snowflake/snowflake_query.py,sha256=JtTrfzGqM9mk2Fr-F1X0KXzc_8ot7rD3dD2vPEuzd0E,40411
|
|
460
461
|
datahub/ingestion/source/snowflake/snowflake_report.py,sha256=O-465aBA8uaYZ6WepP7i6cgK6Q1jXJPjDA1j9C8klus,6762
|
|
461
462
|
datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=1yGBbs2aWIdHnrwgeTR7J2lqxbbBsIt8ejCLumIpLEA,27274
|
|
462
463
|
datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=_37-AQyI4uGt4fu-d3v2eAWzQ3uG835ZQxMjFwGYCng,57193
|
|
463
464
|
datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=maZyFkfrbVogEFM0tTKRiNp9c_1muv6YfleSd3q0umI,6341
|
|
464
465
|
datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYhWeK4XafJq3ssFL9kcS03jEeWT4,5506
|
|
465
|
-
datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=
|
|
466
|
+
datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=eA9xh-G1Ydr1OwUUtrbXUWp26hE1jF0zvyKNky_i_nQ,8887
|
|
466
467
|
datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=ySFm7WDk8FW9KjCnX4HQfTqObIrlUS-V8WIHl3j0CTI,24848
|
|
467
468
|
datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=2lmvAeZELTjAzg4Y5E0oY41r1IzVEvg6OHAvVJftSFk,14081
|
|
468
469
|
datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=nAbudDVh9A0kqao3jnIdgBlFNhNk1WIxoU1cofeXkFQ,33905
|
|
@@ -940,7 +941,7 @@ datahub/sql_parsing/sql_parsing_aggregator.py,sha256=A3_0wSxBJSRowEaslptDpBoKO42
|
|
|
940
941
|
datahub/sql_parsing/sql_parsing_common.py,sha256=cZ4WvVyHZuXDGjnBvKMX2_fz2EMextB5WQWcK0_saBo,3155
|
|
941
942
|
datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
|
|
942
943
|
datahub/sql_parsing/sqlglot_lineage.py,sha256=l0kT8MuRIg96X7BNJaboMznF54b-yvM2nMTLyF2d0Nw,47446
|
|
943
|
-
datahub/sql_parsing/sqlglot_utils.py,sha256=
|
|
944
|
+
datahub/sql_parsing/sqlglot_utils.py,sha256=5cUiEWLWfVTI7uIxolAfOfNVo50qnklzhj86gxSFWqg,14943
|
|
944
945
|
datahub/sql_parsing/tool_meta_extractor.py,sha256=EV_g7sOchTSUm2p6wluNJqND7-rDYokVTqqFCM7hQ6c,7599
|
|
945
946
|
datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
946
947
|
datahub/telemetry/stats.py,sha256=TwaQisQlD2Bk0uw__pP6u3Ovz9r-Ip4pCwpnto4r5e0,959
|
|
@@ -952,7 +953,7 @@ datahub/testing/check_str_enum.py,sha256=yqk0XXHOGteN-IGqCp5JHy0Kca13BnI09ZqKc4N
|
|
|
952
953
|
datahub/testing/compare_metadata_json.py,sha256=mTU5evu7KLS3cx8OLOC1fFxj0eY1J1CGV2PEQZmapos,5361
|
|
953
954
|
datahub/testing/docker_utils.py,sha256=g169iy_jNR_mg0p8X31cChZqjOryutAIHUYLq3xqueY,2415
|
|
954
955
|
datahub/testing/doctest.py,sha256=1_8WEhHZ2eRQtw8vsXKzr9L5zzvs0Tcr6q4mnkyyvtw,295
|
|
955
|
-
datahub/testing/mcp_diff.py,sha256=
|
|
956
|
+
datahub/testing/mcp_diff.py,sha256=1BpQ3hST46cOQi1SmKdsto3j6x6Sk6yHm0vG1w9IDL0,10749
|
|
956
957
|
datahub/testing/pytest_hooks.py,sha256=eifmj0M68AIfjTn_-0vtaBkKl75vNKMjsbYX-pJqmGY,1417
|
|
957
958
|
datahub/upgrade/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
958
959
|
datahub/upgrade/upgrade.py,sha256=lf60_dCu51twObAL5E8NqdrW3_2lsnUJUaB9MSEVXwI,16638
|
|
@@ -1045,8 +1046,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1045
1046
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1046
1047
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1047
1048
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1048
|
-
acryl_datahub-1.0.0.
|
|
1049
|
-
acryl_datahub-1.0.0.
|
|
1050
|
-
acryl_datahub-1.0.0.
|
|
1051
|
-
acryl_datahub-1.0.0.
|
|
1052
|
-
acryl_datahub-1.0.0.
|
|
1049
|
+
acryl_datahub-1.0.0.3rc2.dist-info/METADATA,sha256=Iez_7GLl0EEt7MEDlMXlVb-A_-YB-RO4IZJRWSwuLjI,176855
|
|
1050
|
+
acryl_datahub-1.0.0.3rc2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
1051
|
+
acryl_datahub-1.0.0.3rc2.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
|
|
1052
|
+
acryl_datahub-1.0.0.3rc2.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1053
|
+
acryl_datahub-1.0.0.3rc2.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
datahub/emitter/mcp.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import json
|
|
3
|
-
from typing import TYPE_CHECKING, List, Optional, Sequence, Tuple, Union
|
|
3
|
+
from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union
|
|
4
4
|
|
|
5
5
|
from datahub.emitter.aspect import ASPECT_MAP, JSON_CONTENT_TYPE
|
|
6
6
|
from datahub.emitter.serialization_helper import post_json_transform, pre_json_transform
|
|
@@ -69,6 +69,7 @@ class MetadataChangeProposalWrapper:
|
|
|
69
69
|
aspectName: Union[None, str] = None
|
|
70
70
|
aspect: Union[None, _Aspect] = None
|
|
71
71
|
systemMetadata: Union[None, SystemMetadataClass] = None
|
|
72
|
+
headers: Union[None, Dict[str, str]] = None
|
|
72
73
|
|
|
73
74
|
def __post_init__(self) -> None:
|
|
74
75
|
if self.entityUrn and self.entityType == _ENTITY_TYPE_UNSET:
|
|
@@ -112,6 +113,7 @@ class MetadataChangeProposalWrapper:
|
|
|
112
113
|
auditHeader=self.auditHeader,
|
|
113
114
|
aspectName=self.aspectName,
|
|
114
115
|
systemMetadata=self.systemMetadata,
|
|
116
|
+
headers=self.headers,
|
|
115
117
|
)
|
|
116
118
|
|
|
117
119
|
def make_mcp(self) -> MetadataChangeProposalClass:
|
|
@@ -211,6 +213,7 @@ class MetadataChangeProposalWrapper:
|
|
|
211
213
|
aspectName=mcpc.aspectName,
|
|
212
214
|
aspect=aspect,
|
|
213
215
|
systemMetadata=mcpc.systemMetadata,
|
|
216
|
+
headers=mcpc.headers,
|
|
214
217
|
)
|
|
215
218
|
else:
|
|
216
219
|
return None
|
|
@@ -228,6 +231,7 @@ class MetadataChangeProposalWrapper:
|
|
|
228
231
|
changeType=mcl.changeType,
|
|
229
232
|
auditHeader=mcl.auditHeader,
|
|
230
233
|
systemMetadata=mcl.systemMetadata,
|
|
234
|
+
headers=mcl.headers,
|
|
231
235
|
)
|
|
232
236
|
return cls.try_from_mcpc(mcpc) or mcpc
|
|
233
237
|
|
|
@@ -23,6 +23,7 @@ class EnsureAspectSizeProcessor:
|
|
|
23
23
|
):
|
|
24
24
|
self.report = report
|
|
25
25
|
self.payload_constraint = payload_constraint
|
|
26
|
+
self.schema_size_constraint = int(self.payload_constraint * 0.985)
|
|
26
27
|
|
|
27
28
|
def ensure_dataset_profile_size(
|
|
28
29
|
self, dataset_urn: str, profile: DatasetProfileClass
|
|
@@ -68,7 +69,7 @@ class EnsureAspectSizeProcessor:
|
|
|
68
69
|
for field in schema.fields:
|
|
69
70
|
field_size = len(json.dumps(pre_json_transform(field.to_obj())))
|
|
70
71
|
logger.debug(f"Field {field.fieldPath} takes total {field_size}")
|
|
71
|
-
if total_fields_size + field_size < self.
|
|
72
|
+
if total_fields_size + field_size < self.schema_size_constraint:
|
|
72
73
|
accepted_fields.append(field)
|
|
73
74
|
total_fields_size += field_size
|
|
74
75
|
else:
|
|
@@ -27,6 +27,7 @@ logger = logging.getLogger(__name__)
|
|
|
27
27
|
|
|
28
28
|
# The following models were Claude-generated from Hex API OpenAPI definition https://static.hex.site/openapi.json
|
|
29
29
|
# To be exclusively used internally for the deserialization of the API response
|
|
30
|
+
# Model is incomplete and fields may have not been mapped if not used in the ingestion
|
|
30
31
|
|
|
31
32
|
|
|
32
33
|
class HexApiAppViewStats(BaseModel):
|
|
@@ -83,20 +84,10 @@ class HexApiUser(BaseModel):
|
|
|
83
84
|
email: str
|
|
84
85
|
|
|
85
86
|
|
|
86
|
-
class HexApiAccessType(StrEnum):
|
|
87
|
-
"""Access type enum."""
|
|
88
|
-
|
|
89
|
-
NONE = "NONE"
|
|
90
|
-
VIEW = "VIEW"
|
|
91
|
-
EDIT = "EDIT"
|
|
92
|
-
FULL_ACCESS = "FULL_ACCESS"
|
|
93
|
-
|
|
94
|
-
|
|
95
87
|
class HexApiUserAccess(BaseModel):
|
|
96
88
|
"""User access model."""
|
|
97
89
|
|
|
98
90
|
user: HexApiUser
|
|
99
|
-
access: Optional[HexApiAccessType] = None
|
|
100
91
|
|
|
101
92
|
|
|
102
93
|
class HexApiCollectionData(BaseModel):
|
|
@@ -109,13 +100,6 @@ class HexApiCollectionAccess(BaseModel):
|
|
|
109
100
|
"""Collection access model."""
|
|
110
101
|
|
|
111
102
|
collection: HexApiCollectionData
|
|
112
|
-
access: Optional[HexApiAccessType] = None
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
class HexApiAccessSettings(BaseModel):
|
|
116
|
-
"""Access settings model."""
|
|
117
|
-
|
|
118
|
-
access: Optional[HexApiAccessType] = None
|
|
119
103
|
|
|
120
104
|
|
|
121
105
|
class HexApiWeeklySchedule(BaseModel):
|
|
@@ -145,9 +129,6 @@ class HexApiSharing(BaseModel):
|
|
|
145
129
|
users: Optional[List[HexApiUserAccess]] = []
|
|
146
130
|
collections: Optional[List[HexApiCollectionAccess]] = []
|
|
147
131
|
groups: Optional[List[Any]] = []
|
|
148
|
-
workspace: Optional[HexApiAccessSettings] = None
|
|
149
|
-
public_web: Optional[HexApiAccessSettings] = Field(default=None, alias="publicWeb")
|
|
150
|
-
support: Optional[HexApiAccessSettings] = None
|
|
151
132
|
|
|
152
133
|
class Config:
|
|
153
134
|
extra = "ignore" # Allow extra fields in the JSON
|
|
@@ -7,6 +7,7 @@ from typing import Any, Callable, Iterable, List, Optional, Tuple, TypeVar, Unio
|
|
|
7
7
|
from mlflow import MlflowClient
|
|
8
8
|
from mlflow.entities import Dataset as MlflowDataset, Experiment, Run
|
|
9
9
|
from mlflow.entities.model_registry import ModelVersion, RegisteredModel
|
|
10
|
+
from mlflow.exceptions import MlflowException
|
|
10
11
|
from mlflow.store.entities import PagedList
|
|
11
12
|
from pydantic.fields import Field
|
|
12
13
|
|
|
@@ -589,8 +590,8 @@ class MLflowSource(StatefulIngestionSourceBase):
|
|
|
589
590
|
)
|
|
590
591
|
return runs
|
|
591
592
|
|
|
592
|
-
@staticmethod
|
|
593
593
|
def _traverse_mlflow_search_func(
|
|
594
|
+
self,
|
|
594
595
|
search_func: Callable[..., PagedList[T]],
|
|
595
596
|
**kwargs: Any,
|
|
596
597
|
) -> Iterable[T]:
|
|
@@ -598,12 +599,24 @@ class MLflowSource(StatefulIngestionSourceBase):
|
|
|
598
599
|
Utility to traverse an MLflow search_* functions which return PagedList.
|
|
599
600
|
"""
|
|
600
601
|
next_page_token = None
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
602
|
+
try:
|
|
603
|
+
while True:
|
|
604
|
+
paged_list = search_func(page_token=next_page_token, **kwargs)
|
|
605
|
+
yield from paged_list.to_list()
|
|
606
|
+
next_page_token = paged_list.token
|
|
607
|
+
if not next_page_token:
|
|
608
|
+
return
|
|
609
|
+
except MlflowException as e:
|
|
610
|
+
if e.error_code == "ENDPOINT_NOT_FOUND":
|
|
611
|
+
self.report.warning(
|
|
612
|
+
title="MLflow API Endpoint Not Found for Experiments.",
|
|
613
|
+
message="Please upgrade to version 1.28.0 or higher to ensure compatibility. Skipping ingestion for experiments and runs.",
|
|
614
|
+
context=None,
|
|
615
|
+
exc=e,
|
|
616
|
+
)
|
|
606
617
|
return
|
|
618
|
+
else:
|
|
619
|
+
raise # Only re-raise other exceptions
|
|
607
620
|
|
|
608
621
|
def _get_latest_version(self, registered_model: RegisteredModel) -> Optional[str]:
|
|
609
622
|
return (
|
|
@@ -192,6 +192,11 @@ class SupportedDataPlatform(Enum):
|
|
|
192
192
|
datahub_data_platform_name="mysql",
|
|
193
193
|
)
|
|
194
194
|
|
|
195
|
+
ODBC = DataPlatformPair(
|
|
196
|
+
powerbi_data_platform_name="Odbc",
|
|
197
|
+
datahub_data_platform_name="odbc",
|
|
198
|
+
)
|
|
199
|
+
|
|
195
200
|
|
|
196
201
|
@dataclass
|
|
197
202
|
class PowerBiDashboardSourceReport(StaleEntityRemovalSourceReport):
|
|
@@ -341,6 +346,13 @@ class PowerBiDashboardSourceConfig(
|
|
|
341
346
|
"For Google BigQuery the datasource's server is google bigquery project name. "
|
|
342
347
|
"For Databricks Unity Catalog the datasource's server is workspace FQDN.",
|
|
343
348
|
)
|
|
349
|
+
# ODBC DSN to platform mapping
|
|
350
|
+
dsn_to_platform_name: Dict[str, str] = pydantic.Field(
|
|
351
|
+
default={},
|
|
352
|
+
description="A mapping of ODBC DSN to DataHub data platform name. "
|
|
353
|
+
"For example with an ODBC connection string 'DSN=database' where the database type "
|
|
354
|
+
"is 'PostgreSQL' you would configure the mapping as 'database: postgres'.",
|
|
355
|
+
)
|
|
344
356
|
# deprecated warning
|
|
345
357
|
_dataset_type_mapping = pydantic_field_deprecated(
|
|
346
358
|
"dataset_type_mapping",
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Optional, Tuple, Union
|
|
3
|
+
|
|
4
|
+
server_patterns = [
|
|
5
|
+
r"Server=([^:]+)[:][0-9]+/.*",
|
|
6
|
+
r"SERVER=\{([^}]*)\}",
|
|
7
|
+
r"SERVER=([^;]*)",
|
|
8
|
+
r"HOST=\{([^}]*)\}",
|
|
9
|
+
r"HOST=([^;]*)",
|
|
10
|
+
r"DATA SOURCE=\{([^}]*)\}",
|
|
11
|
+
r"DATA SOURCE=([^;]*)",
|
|
12
|
+
r"DSN=\{([^}]*)\}",
|
|
13
|
+
r"DSN=([^;]*)",
|
|
14
|
+
r"Server=([^;]*)",
|
|
15
|
+
r"S3OutputLocation=([^;]*)",
|
|
16
|
+
r"HTTPPath=([^;]*)",
|
|
17
|
+
r"Host=([^;]*)",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
dsn_patterns = [
|
|
21
|
+
r"DSN\s*=\s*\"([^\"]+)\"",
|
|
22
|
+
r"DSN\s*=\s*\'([^\']+)\'",
|
|
23
|
+
r"DSN\s*=\s*([^;]+)",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
platform_patterns = {
|
|
27
|
+
"mysql": r"mysql",
|
|
28
|
+
"postgres": r"post(gre(s|sql)?|gres)",
|
|
29
|
+
"mssql": r"(sql\s*server|mssql|sqlncli)",
|
|
30
|
+
"oracle": r"oracle",
|
|
31
|
+
"db2": r"db2",
|
|
32
|
+
"sqlite": r"sqlite",
|
|
33
|
+
"access": r"(access|\.mdb|\.accdb)",
|
|
34
|
+
"excel": r"(excel|\.xls)",
|
|
35
|
+
"firebird": r"firebird",
|
|
36
|
+
"informix": r"informix",
|
|
37
|
+
"sybase": r"sybase",
|
|
38
|
+
"teradata": r"teradata",
|
|
39
|
+
"hadoop": r"(hadoop|hive)",
|
|
40
|
+
"snowflake": r"snowflake",
|
|
41
|
+
"redshift": r"redshift",
|
|
42
|
+
"bigquery": r"bigquery",
|
|
43
|
+
"athena": r"(athena|aws\s*athena)",
|
|
44
|
+
"databricks": r"(databricks|spark)",
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
powerbi_platform_names = {
|
|
48
|
+
"mysql": "MySQL",
|
|
49
|
+
"postgres": "PostgreSQL",
|
|
50
|
+
"mssql": "SQL Server",
|
|
51
|
+
"oracle": "Oracle",
|
|
52
|
+
"db2": "IBM DB2",
|
|
53
|
+
"sqlite": "SQLite",
|
|
54
|
+
"access": "Microsoft Access",
|
|
55
|
+
"excel": "Microsoft Excel",
|
|
56
|
+
"firebird": "Firebird",
|
|
57
|
+
"informix": "IBM Informix",
|
|
58
|
+
"sybase": "SAP Sybase",
|
|
59
|
+
"teradata": "Teradata",
|
|
60
|
+
"hadoop": "Hadoop",
|
|
61
|
+
"snowflake": "Snowflake",
|
|
62
|
+
"redshift": "Amazon Redshift",
|
|
63
|
+
"bigquery": "Google BigQuery",
|
|
64
|
+
"athena": "Amazon Athena",
|
|
65
|
+
"databricks": "Databricks",
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def extract_driver(connection_string: str) -> Union[str, None]:
|
|
70
|
+
"""
|
|
71
|
+
Parse an ODBC connection string and extract the driver name.
|
|
72
|
+
Handles whitespace in driver names and various connection string formats.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
connection_string (str): The ODBC connection string
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
str: The extracted driver name, or None if not found
|
|
79
|
+
"""
|
|
80
|
+
# Match DRIVER={driver name} pattern
|
|
81
|
+
driver_match = re.search(r"DRIVER=\{([^}]*)}", connection_string, re.IGNORECASE)
|
|
82
|
+
|
|
83
|
+
if driver_match:
|
|
84
|
+
return driver_match.group(1).strip()
|
|
85
|
+
|
|
86
|
+
# Alternative pattern for DRIVER=driver
|
|
87
|
+
driver_match = re.search(r"DRIVER=([^;]*)", connection_string, re.IGNORECASE)
|
|
88
|
+
|
|
89
|
+
if driver_match:
|
|
90
|
+
return driver_match.group(1).strip()
|
|
91
|
+
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def extract_dsn(connection_string: str) -> Union[str, None]:
|
|
96
|
+
"""
|
|
97
|
+
Extract the DSN value from an ODBC connection string.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
connection_string (str): The ODBC connection string
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
str or None: The extracted DSN value, or None if not found
|
|
104
|
+
"""
|
|
105
|
+
for pattern in dsn_patterns:
|
|
106
|
+
match = re.search(pattern, connection_string, re.IGNORECASE)
|
|
107
|
+
if match:
|
|
108
|
+
return match.group(1).strip()
|
|
109
|
+
|
|
110
|
+
return None
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def extract_server(connection_string: str) -> Union[str, None]:
|
|
114
|
+
"""
|
|
115
|
+
Parse an ODBC connection string and extract the server name.
|
|
116
|
+
Handles various parameter names for server (SERVER, Host, Data Source, etc.)
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
connection_string (str): The ODBC connection string
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
str: The extracted server name, or None if not found
|
|
123
|
+
"""
|
|
124
|
+
for pattern in server_patterns:
|
|
125
|
+
server_match = re.search(pattern, connection_string, re.IGNORECASE)
|
|
126
|
+
if server_match:
|
|
127
|
+
return server_match.group(1).strip()
|
|
128
|
+
|
|
129
|
+
# Special case for Athena: extract from AwsRegion if no server found
|
|
130
|
+
region_match = re.search(r"AwsRegion=([^;]*)", connection_string, re.IGNORECASE)
|
|
131
|
+
if region_match:
|
|
132
|
+
return f"aws-athena-{region_match.group(1).strip()}"
|
|
133
|
+
|
|
134
|
+
# Special case for Databricks: try to extract hostname from JDBC URL
|
|
135
|
+
jdbc_match = re.search(r"jdbc:spark://([^:;/]+)", connection_string, re.IGNORECASE)
|
|
136
|
+
if jdbc_match:
|
|
137
|
+
return jdbc_match.group(1).strip()
|
|
138
|
+
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def extract_platform(connection_string: str) -> Tuple[Optional[str], Optional[str]]:
|
|
143
|
+
"""
|
|
144
|
+
Extract the database platform name from the ODBC driver name.
|
|
145
|
+
Returns the lowercase platform name.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
connection_string (str): The ODBC connection string
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
tuple: A tuple containing the normalized platform name and the corresponding
|
|
152
|
+
Power BI platform name, or None if not recognized.
|
|
153
|
+
"""
|
|
154
|
+
driver_name = extract_driver(connection_string)
|
|
155
|
+
if not driver_name:
|
|
156
|
+
return None, None
|
|
157
|
+
|
|
158
|
+
driver_lower = driver_name.lower()
|
|
159
|
+
|
|
160
|
+
for platform, pattern in platform_patterns.items():
|
|
161
|
+
if re.search(pattern, driver_lower):
|
|
162
|
+
return platform, powerbi_platform_names.get(platform)
|
|
163
|
+
|
|
164
|
+
return None, None
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def normalize_platform_name(platform: str) -> Tuple[Optional[str], Optional[str]]:
|
|
168
|
+
"""
|
|
169
|
+
Normalizes the platform name by matching it with predefined patterns and maps it to
|
|
170
|
+
a corresponding Power BI platform name.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
platform (str): The platform name to normalize
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
tuple: A tuple containing the normalized platform name and the corresponding
|
|
177
|
+
Power BI platform name, or None if not recognized.
|
|
178
|
+
"""
|
|
179
|
+
platform_lower = platform.lower()
|
|
180
|
+
|
|
181
|
+
for platform, pattern in platform_patterns.items():
|
|
182
|
+
if re.search(pattern, platform_lower):
|
|
183
|
+
return platform, powerbi_platform_names.get(platform)
|
|
184
|
+
|
|
185
|
+
return None, None
|