acryl-datahub 1.0.0.2rc5__py3-none-any.whl → 1.0.0.3rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (24) hide show
  1. {acryl_datahub-1.0.0.2rc5.dist-info → acryl_datahub-1.0.0.3rc2.dist-info}/METADATA +2516 -2516
  2. {acryl_datahub-1.0.0.2rc5.dist-info → acryl_datahub-1.0.0.3rc2.dist-info}/RECORD +24 -23
  3. datahub/_version.py +1 -1
  4. datahub/emitter/mcp.py +5 -1
  5. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +2 -1
  6. datahub/ingestion/source/hex/api.py +1 -20
  7. datahub/ingestion/source/mlflow.py +19 -6
  8. datahub/ingestion/source/powerbi/config.py +12 -0
  9. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  10. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  11. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +153 -0
  12. datahub/ingestion/source/sigma/config.py +75 -6
  13. datahub/ingestion/source/sigma/sigma.py +16 -1
  14. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  15. datahub/ingestion/source/snowflake/snowflake_config.py +1 -0
  16. datahub/ingestion/source/snowflake/snowflake_queries.py +18 -4
  17. datahub/ingestion/source/snowflake/snowflake_query.py +1 -1
  18. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  19. datahub/sql_parsing/sqlglot_utils.py +16 -8
  20. datahub/testing/mcp_diff.py +15 -2
  21. {acryl_datahub-1.0.0.2rc5.dist-info → acryl_datahub-1.0.0.3rc2.dist-info}/WHEEL +0 -0
  22. {acryl_datahub-1.0.0.2rc5.dist-info → acryl_datahub-1.0.0.3rc2.dist-info}/entry_points.txt +0 -0
  23. {acryl_datahub-1.0.0.2rc5.dist-info → acryl_datahub-1.0.0.3rc2.dist-info}/licenses/LICENSE +0 -0
  24. {acryl_datahub-1.0.0.2rc5.dist-info → acryl_datahub-1.0.0.3rc2.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
- acryl_datahub-1.0.0.2rc5.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.0.0.3rc2.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=TEFaI0SngUMeKPXQwQz9bnZDzmSywu7Y6e6m6k--k00,323
4
+ datahub/_version.py,sha256=wKoNLhdfRXZqqQqju-C7yvPFz3YKQceonahT8wrZq6Y,323
5
5
  datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
6
6
  datahub/errors.py,sha256=BzKdcmYseHOt36zfjJXc17WNutFhp9Y23cU_L6cIkxc,612
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -121,7 +121,7 @@ datahub/emitter/enum_helpers.py,sha256=QBOEUu_hDCvyL_v4ayNQV8XwJbf5zKyu0Xat0mI1K
121
121
  datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
122
122
  datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
123
123
  datahub/emitter/mce_builder.py,sha256=i-iLLdnuy7h1JrzwC2sCtQthbso-cNj1uijOQZKHbeA,16717
124
- datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
124
+ datahub/emitter/mcp.py,sha256=v7tKlIFX4s7f77KQYeFww8QbOQu6-qU609VeQiUkcsY,9796
125
125
  datahub/emitter/mcp_builder.py,sha256=8IwJAlolQkPpMqQJPLtGrsUqAcuFNs98nrI5iYUxgaU,11920
126
126
  datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
127
127
  datahub/emitter/request_helper.py,sha256=HpI9a9W0TzoVbrs584rF8P8w-IT_iKLmvYmO_6IHhXs,1008
@@ -151,7 +151,7 @@ datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188m
151
151
  datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL74lo,6327
152
152
  datahub/ingestion/api/auto_work_units/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
153
153
  datahub/ingestion/api/auto_work_units/auto_dataset_properties_aspect.py,sha256=ID_6N3nWl2qohsSGizUCqo3d2MNyDeVbyWroQpSOSsc,5059
154
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py,sha256=5jrl7cEyonce-YdWe1Iw6y3Okw5smJosqwOm5e-nvqM,4363
154
+ datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py,sha256=fMjPnyWEofIZV52E2AFYU3IgBJwyZvbygXxCJyEtcWI,4442
155
155
  datahub/ingestion/extractor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
156
156
  datahub/ingestion/extractor/extractor_registry.py,sha256=f7CLfW3pr29QZkXSHbp7HjUrsdw7ejQJmot-tiSPcqc,342
157
157
  datahub/ingestion/extractor/json_ref_patch.py,sha256=4g3ZWHn7rwS74jUvSXJiGpi-UKHhiSYKKgBeU4E5ukE,1448
@@ -205,7 +205,7 @@ datahub/ingestion/source/ge_profiling_config.py,sha256=FlWfXoVoayabVXNMB9qETEU0G
205
205
  datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suabri1yV5geaAPjzg2eORDc,2559
206
206
  datahub/ingestion/source/ldap.py,sha256=CNr3foofIpoCXu_GGqfcajlQE2qkHr5isYwVcDutdkk,18695
207
207
  datahub/ingestion/source/metabase.py,sha256=j8DRV2GvisezidL1JZ5HJLF_hdFdtvaoyDoEdEyh0Ks,32603
208
- datahub/ingestion/source/mlflow.py,sha256=2K5D95HLUhnx8jP54hK8aKNB0nPaCgXaUgO2PdL6Gto,32539
208
+ datahub/ingestion/source/mlflow.py,sha256=fh7izN9jlSwbpGIrEyJktlmwFZR5vNG9z9L5VQ31k_4,33141
209
209
  datahub/ingestion/source/mode.py,sha256=_FKZutF-59w0pYhko6HSVL3yjjYNd329-2DJmyfDqF8,64492
210
210
  datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-OPSNGU,21237
211
211
  datahub/ingestion/source/nifi.py,sha256=D1gBXxdpLuUQ0eurwofIR_SGg1rHGhwk3qxsWI1PT9c,56882
@@ -327,7 +327,7 @@ datahub/ingestion/source/git/git_import.py,sha256=5CT6vMDb0MDctCtShnxb3JVihULtvk
327
327
  datahub/ingestion/source/grafana/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
328
328
  datahub/ingestion/source/grafana/grafana_source.py,sha256=3pU3xodPgS5lmnjuQ_u7F0XPzD_Y8MnPlMxRJ86qz4g,4960
329
329
  datahub/ingestion/source/hex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
330
- datahub/ingestion/source/hex/api.py,sha256=JfFPD8O4z16fwZE_BdX5aCQztEq-tbzxJJ7aofH4DE4,12274
330
+ datahub/ingestion/source/hex/api.py,sha256=OVQNI_11NJJcNCT6OzSDEtVjNcom0vmes_KkjgzWCcI,11806
331
331
  datahub/ingestion/source/hex/constants.py,sha256=8hUTMWyG5keTNfXoLu_Dh413Hw_mGGJX1atiiDZyKtg,271
332
332
  datahub/ingestion/source/hex/hex.py,sha256=PIRl8fPkKtlHV7cqR4H8RKVYdTLgEFXHFzc3QAqJLhE,12733
333
333
  datahub/ingestion/source/hex/mapper.py,sha256=N3mTlEcrOmhv9ia1dnHGFgFJD2ddyTtU3H5IUbb-UxU,13344
@@ -376,15 +376,16 @@ datahub/ingestion/source/metadata/lineage.py,sha256=2iK-hsORWm7NSvMZcG4D5hb8_PH5
376
376
  datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
377
377
  datahub/ingestion/source/neo4j/neo4j_source.py,sha256=O3jjdnsx7IyYPBLbxowL85Qo4zs4H-maMOH4-6ZNCk4,13063
378
378
  datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
379
- datahub/ingestion/source/powerbi/config.py,sha256=bflLIq6rpZeJ7ULvN2gaAVcSHO5jTJ6vdNPvwo1LH7M,24212
379
+ datahub/ingestion/source/powerbi/config.py,sha256=-gof-85gqS_cft2blp5Uw5TVypii4T_bl8XhTZUVlgc,24707
380
380
  datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=-njW1kJOy-LY5JFwJLhVQ0bMBj9NQz5TZhQqsSi_KsM,2285
381
381
  datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=5df3qvalCS9hZ46DPXs6XDcw9-IofGf8Eol_rUC7LHI,20329
382
382
  datahub/ingestion/source/powerbi/powerbi.py,sha256=b9zNeT9aS7v2GWUL1SROnIMwQwAFX0YTO2UNQMLWItc,56450
383
383
  datahub/ingestion/source/powerbi/m_query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
384
- datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=yDi0C13ko2dVxdLJBYvUuGbT4Q2hxQRse3sL7Ul1ZU0,2050
384
+ datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=l_L6DzOWMShOWGtVclcf4JtNWzSINuwJka59LjwRLCk,2091
385
385
  datahub/ingestion/source/powerbi/m_query/native_sql_parser.py,sha256=zzKVDGeUM3Yv3-zNah4D6mSnr6jXsstNuLmzczcPQEE,3683
386
+ datahub/ingestion/source/powerbi/m_query/odbc.py,sha256=fZgl8-M5s3Y-3U9OVQs7ttc8FTDbzodIM2HJtFmPNI8,5405
386
387
  datahub/ingestion/source/powerbi/m_query/parser.py,sha256=5KqhUwj9H9yL9ZMPP9oSeVGiZjvXjw6Iu_HrGr95E5M,5876
387
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py,sha256=aOhAb8U4OEZnO4ufnb-Cm3KMpdy-JF6r9YMK3RNZs5A,35906
388
+ datahub/ingestion/source/powerbi/m_query/pattern_handler.py,sha256=MqZj7VBf9ppKYrA-dRaOVGFpotLFqZditwOD-6ynkFg,41635
388
389
  datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=ISH8Xjx51q2S81fn2v5RhCCU-kRAW3juxM0rMFs4TDo,17413
389
390
  datahub/ingestion/source/powerbi/m_query/tree_function.py,sha256=NIKNNHAE4kTJefTM1WR-StJi9NuingaRYn_mS_kV6A8,6180
390
391
  datahub/ingestion/source/powerbi/m_query/validator.py,sha256=crG-VZy2XPieiDliP9yVMgiFcc8b2xbZyDFEATXqEAQ,1155
@@ -439,10 +440,10 @@ datahub/ingestion/source/schema_inference/json.py,sha256=p5S-3idn65V2uad5T8txs1U
439
440
  datahub/ingestion/source/schema_inference/object.py,sha256=dhSOtxVJHbTDY0hWeHwdLYHnOsW07Omk7Y4DPeztie0,5847
440
441
  datahub/ingestion/source/schema_inference/parquet.py,sha256=CdqsNuiabLLCulWbuPMssijeFmKLv3M5MKFIhlatpWA,3456
441
442
  datahub/ingestion/source/sigma/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
442
- datahub/ingestion/source/sigma/config.py,sha256=yfdKQYvI5hKVl8gNAKIcJe-VW3klvdDqYbUP76gJQDI,3812
443
+ datahub/ingestion/source/sigma/config.py,sha256=xpZXt4f05-sroWFv9SbzVhU1-iBeVfU1ocJKb-fy3aM,6333
443
444
  datahub/ingestion/source/sigma/data_classes.py,sha256=YZkkzwftV34mq5c_4jlC2PCSiRKt4hvHjmqikLQhl1I,2012
444
- datahub/ingestion/source/sigma/sigma.py,sha256=ucODIa5KUGr3WSoo7VgCt8uFaKRbSDlwsdVMAcjPLpQ,24378
445
- datahub/ingestion/source/sigma/sigma_api.py,sha256=SVvbUs2vjueUdDa-3FzeMsaX5pNpApVI192P7EZzPcI,17870
445
+ datahub/ingestion/source/sigma/sigma.py,sha256=ZtPj8eu6hcJxyFcWizob4kRaxrpcqsWzh__lmuVZdt8,25212
446
+ datahub/ingestion/source/sigma/sigma_api.py,sha256=7PK5AQa838hYeaQ5L0dioi4n4bLrpN-r7COKTTNUYw8,19837
446
447
  datahub/ingestion/source/slack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
447
448
  datahub/ingestion/source/slack/slack.py,sha256=3N7Yp-u9DvBmo536Z6-pQTrJgSJ3i742GePSgjlBOUU,27616
448
449
  datahub/ingestion/source/snowflake/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -450,19 +451,19 @@ datahub/ingestion/source/snowflake/constants.py,sha256=XCW3vw4JfLn_s8-oXBX6WFNMP
450
451
  datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
451
452
  datahub/ingestion/source/snowflake/oauth_generator.py,sha256=fu2VnREGuJXeTqIV2jx4TwieVnznf83HQkrE0h2DGGM,3423
452
453
  datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81xxdeizJn9nJCZ_nMIXgk9N6pEk5o,4803
453
- datahub/ingestion/source/snowflake/snowflake_config.py,sha256=Jm3TW7ed9LYNOZ9egUwkHs2bQv_WlCD6D2QoVxIzxsI,20729
454
+ datahub/ingestion/source/snowflake/snowflake_config.py,sha256=SD2agFE64WgEDbQHPXQjAIP4gsHT1G9H8X_r-RvKGas,20804
454
455
  datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=pEw2O9xoTSIWDiROlkF8k4oj5zBjkqTnynLvut08yhc,17796
455
456
  datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
456
457
  datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=FBmiONx4EGHWV8RNJT6zHZyntKinPFFyd2oKbTUIbhE,21319
457
458
  datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
458
- datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=gX9E1Z_CemAZsuTDmtvqrxY7vBL2da75j7X8Xwhaf8Y,28441
459
- datahub/ingestion/source/snowflake/snowflake_query.py,sha256=0AMPQ_L7sgQtBizBNEe69-BUM8_wk1m8ystWivwKEMI,40409
459
+ datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=PY4Wy6i89nqRl92ARwXNqWwm-ifagkKbKKtxYWeswkk,29209
460
+ datahub/ingestion/source/snowflake/snowflake_query.py,sha256=JtTrfzGqM9mk2Fr-F1X0KXzc_8ot7rD3dD2vPEuzd0E,40411
460
461
  datahub/ingestion/source/snowflake/snowflake_report.py,sha256=O-465aBA8uaYZ6WepP7i6cgK6Q1jXJPjDA1j9C8klus,6762
461
462
  datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=1yGBbs2aWIdHnrwgeTR7J2lqxbbBsIt8ejCLumIpLEA,27274
462
463
  datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=_37-AQyI4uGt4fu-d3v2eAWzQ3uG835ZQxMjFwGYCng,57193
463
464
  datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=maZyFkfrbVogEFM0tTKRiNp9c_1muv6YfleSd3q0umI,6341
464
465
  datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYhWeK4XafJq3ssFL9kcS03jEeWT4,5506
465
- datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=1eLYTcgmfzDs9xktMTTE74L5SeNP48Qg3uLr9y-Ez3Y,8733
466
+ datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=eA9xh-G1Ydr1OwUUtrbXUWp26hE1jF0zvyKNky_i_nQ,8887
466
467
  datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=ySFm7WDk8FW9KjCnX4HQfTqObIrlUS-V8WIHl3j0CTI,24848
467
468
  datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=2lmvAeZELTjAzg4Y5E0oY41r1IzVEvg6OHAvVJftSFk,14081
468
469
  datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=nAbudDVh9A0kqao3jnIdgBlFNhNk1WIxoU1cofeXkFQ,33905
@@ -940,7 +941,7 @@ datahub/sql_parsing/sql_parsing_aggregator.py,sha256=A3_0wSxBJSRowEaslptDpBoKO42
940
941
  datahub/sql_parsing/sql_parsing_common.py,sha256=cZ4WvVyHZuXDGjnBvKMX2_fz2EMextB5WQWcK0_saBo,3155
941
942
  datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
942
943
  datahub/sql_parsing/sqlglot_lineage.py,sha256=l0kT8MuRIg96X7BNJaboMznF54b-yvM2nMTLyF2d0Nw,47446
943
- datahub/sql_parsing/sqlglot_utils.py,sha256=HP6awSU4ijmwjmTvGA_d0X_RO9O3rbGdkbVAWEhAcck,14667
944
+ datahub/sql_parsing/sqlglot_utils.py,sha256=5cUiEWLWfVTI7uIxolAfOfNVo50qnklzhj86gxSFWqg,14943
944
945
  datahub/sql_parsing/tool_meta_extractor.py,sha256=EV_g7sOchTSUm2p6wluNJqND7-rDYokVTqqFCM7hQ6c,7599
945
946
  datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
946
947
  datahub/telemetry/stats.py,sha256=TwaQisQlD2Bk0uw__pP6u3Ovz9r-Ip4pCwpnto4r5e0,959
@@ -952,7 +953,7 @@ datahub/testing/check_str_enum.py,sha256=yqk0XXHOGteN-IGqCp5JHy0Kca13BnI09ZqKc4N
952
953
  datahub/testing/compare_metadata_json.py,sha256=mTU5evu7KLS3cx8OLOC1fFxj0eY1J1CGV2PEQZmapos,5361
953
954
  datahub/testing/docker_utils.py,sha256=g169iy_jNR_mg0p8X31cChZqjOryutAIHUYLq3xqueY,2415
954
955
  datahub/testing/doctest.py,sha256=1_8WEhHZ2eRQtw8vsXKzr9L5zzvs0Tcr6q4mnkyyvtw,295
955
- datahub/testing/mcp_diff.py,sha256=Jk1NluXkKWEMLOE11mHE98lfYE_Gn2GeFuu5TNB3YSs,10198
956
+ datahub/testing/mcp_diff.py,sha256=1BpQ3hST46cOQi1SmKdsto3j6x6Sk6yHm0vG1w9IDL0,10749
956
957
  datahub/testing/pytest_hooks.py,sha256=eifmj0M68AIfjTn_-0vtaBkKl75vNKMjsbYX-pJqmGY,1417
957
958
  datahub/upgrade/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
958
959
  datahub/upgrade/upgrade.py,sha256=lf60_dCu51twObAL5E8NqdrW3_2lsnUJUaB9MSEVXwI,16638
@@ -1045,8 +1046,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1045
1046
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1046
1047
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1047
1048
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1048
- acryl_datahub-1.0.0.2rc5.dist-info/METADATA,sha256=urp7GO85YeQHY_-wuzs6YWZ6xzfGkunfiD-r-e7CvfY,176853
1049
- acryl_datahub-1.0.0.2rc5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
1050
- acryl_datahub-1.0.0.2rc5.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1051
- acryl_datahub-1.0.0.2rc5.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1052
- acryl_datahub-1.0.0.2rc5.dist-info/RECORD,,
1049
+ acryl_datahub-1.0.0.3rc2.dist-info/METADATA,sha256=Iez_7GLl0EEt7MEDlMXlVb-A_-YB-RO4IZJRWSwuLjI,176855
1050
+ acryl_datahub-1.0.0.3rc2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
1051
+ acryl_datahub-1.0.0.3rc2.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1052
+ acryl_datahub-1.0.0.3rc2.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1053
+ acryl_datahub-1.0.0.3rc2.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.0.0.2rc5"
3
+ __version__ = "1.0.0.3rc2"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
datahub/emitter/mcp.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import dataclasses
2
2
  import json
3
- from typing import TYPE_CHECKING, List, Optional, Sequence, Tuple, Union
3
+ from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union
4
4
 
5
5
  from datahub.emitter.aspect import ASPECT_MAP, JSON_CONTENT_TYPE
6
6
  from datahub.emitter.serialization_helper import post_json_transform, pre_json_transform
@@ -69,6 +69,7 @@ class MetadataChangeProposalWrapper:
69
69
  aspectName: Union[None, str] = None
70
70
  aspect: Union[None, _Aspect] = None
71
71
  systemMetadata: Union[None, SystemMetadataClass] = None
72
+ headers: Union[None, Dict[str, str]] = None
72
73
 
73
74
  def __post_init__(self) -> None:
74
75
  if self.entityUrn and self.entityType == _ENTITY_TYPE_UNSET:
@@ -112,6 +113,7 @@ class MetadataChangeProposalWrapper:
112
113
  auditHeader=self.auditHeader,
113
114
  aspectName=self.aspectName,
114
115
  systemMetadata=self.systemMetadata,
116
+ headers=self.headers,
115
117
  )
116
118
 
117
119
  def make_mcp(self) -> MetadataChangeProposalClass:
@@ -211,6 +213,7 @@ class MetadataChangeProposalWrapper:
211
213
  aspectName=mcpc.aspectName,
212
214
  aspect=aspect,
213
215
  systemMetadata=mcpc.systemMetadata,
216
+ headers=mcpc.headers,
214
217
  )
215
218
  else:
216
219
  return None
@@ -228,6 +231,7 @@ class MetadataChangeProposalWrapper:
228
231
  changeType=mcl.changeType,
229
232
  auditHeader=mcl.auditHeader,
230
233
  systemMetadata=mcl.systemMetadata,
234
+ headers=mcl.headers,
231
235
  )
232
236
  return cls.try_from_mcpc(mcpc) or mcpc
233
237
 
@@ -23,6 +23,7 @@ class EnsureAspectSizeProcessor:
23
23
  ):
24
24
  self.report = report
25
25
  self.payload_constraint = payload_constraint
26
+ self.schema_size_constraint = int(self.payload_constraint * 0.985)
26
27
 
27
28
  def ensure_dataset_profile_size(
28
29
  self, dataset_urn: str, profile: DatasetProfileClass
@@ -68,7 +69,7 @@ class EnsureAspectSizeProcessor:
68
69
  for field in schema.fields:
69
70
  field_size = len(json.dumps(pre_json_transform(field.to_obj())))
70
71
  logger.debug(f"Field {field.fieldPath} takes total {field_size}")
71
- if total_fields_size + field_size < self.payload_constraint:
72
+ if total_fields_size + field_size < self.schema_size_constraint:
72
73
  accepted_fields.append(field)
73
74
  total_fields_size += field_size
74
75
  else:
@@ -27,6 +27,7 @@ logger = logging.getLogger(__name__)
27
27
 
28
28
  # The following models were Claude-generated from Hex API OpenAPI definition https://static.hex.site/openapi.json
29
29
  # To be exclusively used internally for the deserialization of the API response
30
+ # Model is incomplete and fields may have not been mapped if not used in the ingestion
30
31
 
31
32
 
32
33
  class HexApiAppViewStats(BaseModel):
@@ -83,20 +84,10 @@ class HexApiUser(BaseModel):
83
84
  email: str
84
85
 
85
86
 
86
- class HexApiAccessType(StrEnum):
87
- """Access type enum."""
88
-
89
- NONE = "NONE"
90
- VIEW = "VIEW"
91
- EDIT = "EDIT"
92
- FULL_ACCESS = "FULL_ACCESS"
93
-
94
-
95
87
  class HexApiUserAccess(BaseModel):
96
88
  """User access model."""
97
89
 
98
90
  user: HexApiUser
99
- access: Optional[HexApiAccessType] = None
100
91
 
101
92
 
102
93
  class HexApiCollectionData(BaseModel):
@@ -109,13 +100,6 @@ class HexApiCollectionAccess(BaseModel):
109
100
  """Collection access model."""
110
101
 
111
102
  collection: HexApiCollectionData
112
- access: Optional[HexApiAccessType] = None
113
-
114
-
115
- class HexApiAccessSettings(BaseModel):
116
- """Access settings model."""
117
-
118
- access: Optional[HexApiAccessType] = None
119
103
 
120
104
 
121
105
  class HexApiWeeklySchedule(BaseModel):
@@ -145,9 +129,6 @@ class HexApiSharing(BaseModel):
145
129
  users: Optional[List[HexApiUserAccess]] = []
146
130
  collections: Optional[List[HexApiCollectionAccess]] = []
147
131
  groups: Optional[List[Any]] = []
148
- workspace: Optional[HexApiAccessSettings] = None
149
- public_web: Optional[HexApiAccessSettings] = Field(default=None, alias="publicWeb")
150
- support: Optional[HexApiAccessSettings] = None
151
132
 
152
133
  class Config:
153
134
  extra = "ignore" # Allow extra fields in the JSON
@@ -7,6 +7,7 @@ from typing import Any, Callable, Iterable, List, Optional, Tuple, TypeVar, Unio
7
7
  from mlflow import MlflowClient
8
8
  from mlflow.entities import Dataset as MlflowDataset, Experiment, Run
9
9
  from mlflow.entities.model_registry import ModelVersion, RegisteredModel
10
+ from mlflow.exceptions import MlflowException
10
11
  from mlflow.store.entities import PagedList
11
12
  from pydantic.fields import Field
12
13
 
@@ -589,8 +590,8 @@ class MLflowSource(StatefulIngestionSourceBase):
589
590
  )
590
591
  return runs
591
592
 
592
- @staticmethod
593
593
  def _traverse_mlflow_search_func(
594
+ self,
594
595
  search_func: Callable[..., PagedList[T]],
595
596
  **kwargs: Any,
596
597
  ) -> Iterable[T]:
@@ -598,12 +599,24 @@ class MLflowSource(StatefulIngestionSourceBase):
598
599
  Utility to traverse an MLflow search_* functions which return PagedList.
599
600
  """
600
601
  next_page_token = None
601
- while True:
602
- paged_list = search_func(page_token=next_page_token, **kwargs)
603
- yield from paged_list.to_list()
604
- next_page_token = paged_list.token
605
- if not next_page_token:
602
+ try:
603
+ while True:
604
+ paged_list = search_func(page_token=next_page_token, **kwargs)
605
+ yield from paged_list.to_list()
606
+ next_page_token = paged_list.token
607
+ if not next_page_token:
608
+ return
609
+ except MlflowException as e:
610
+ if e.error_code == "ENDPOINT_NOT_FOUND":
611
+ self.report.warning(
612
+ title="MLflow API Endpoint Not Found for Experiments.",
613
+ message="Please upgrade to version 1.28.0 or higher to ensure compatibility. Skipping ingestion for experiments and runs.",
614
+ context=None,
615
+ exc=e,
616
+ )
606
617
  return
618
+ else:
619
+ raise # Only re-raise other exceptions
607
620
 
608
621
  def _get_latest_version(self, registered_model: RegisteredModel) -> Optional[str]:
609
622
  return (
@@ -192,6 +192,11 @@ class SupportedDataPlatform(Enum):
192
192
  datahub_data_platform_name="mysql",
193
193
  )
194
194
 
195
+ ODBC = DataPlatformPair(
196
+ powerbi_data_platform_name="Odbc",
197
+ datahub_data_platform_name="odbc",
198
+ )
199
+
195
200
 
196
201
  @dataclass
197
202
  class PowerBiDashboardSourceReport(StaleEntityRemovalSourceReport):
@@ -341,6 +346,13 @@ class PowerBiDashboardSourceConfig(
341
346
  "For Google BigQuery the datasource's server is google bigquery project name. "
342
347
  "For Databricks Unity Catalog the datasource's server is workspace FQDN.",
343
348
  )
349
+ # ODBC DSN to platform mapping
350
+ dsn_to_platform_name: Dict[str, str] = pydantic.Field(
351
+ default={},
352
+ description="A mapping of ODBC DSN to DataHub data platform name. "
353
+ "For example with an ODBC connection string 'DSN=database' where the database type "
354
+ "is 'PostgreSQL' you would configure the mapping as 'database: postgres'.",
355
+ )
344
356
  # deprecated warning
345
357
  _dataset_type_mapping = pydantic_field_deprecated(
346
358
  "dataset_type_mapping",
@@ -75,3 +75,4 @@ class FunctionName(Enum):
75
75
  AMAZON_REDSHIFT_DATA_ACCESS = "AmazonRedshift.Database"
76
76
  DATABRICK_MULTI_CLOUD_DATA_ACCESS = "DatabricksMultiCloud.Catalogs"
77
77
  MYSQL_DATA_ACCESS = "MySQL.Database"
78
+ ODBC_DATA_ACCESS = "Odbc.DataSource"
@@ -0,0 +1,185 @@
1
+ import re
2
+ from typing import Optional, Tuple, Union
3
+
4
+ server_patterns = [
5
+ r"Server=([^:]+)[:][0-9]+/.*",
6
+ r"SERVER=\{([^}]*)\}",
7
+ r"SERVER=([^;]*)",
8
+ r"HOST=\{([^}]*)\}",
9
+ r"HOST=([^;]*)",
10
+ r"DATA SOURCE=\{([^}]*)\}",
11
+ r"DATA SOURCE=([^;]*)",
12
+ r"DSN=\{([^}]*)\}",
13
+ r"DSN=([^;]*)",
14
+ r"Server=([^;]*)",
15
+ r"S3OutputLocation=([^;]*)",
16
+ r"HTTPPath=([^;]*)",
17
+ r"Host=([^;]*)",
18
+ ]
19
+
20
+ dsn_patterns = [
21
+ r"DSN\s*=\s*\"([^\"]+)\"",
22
+ r"DSN\s*=\s*\'([^\']+)\'",
23
+ r"DSN\s*=\s*([^;]+)",
24
+ ]
25
+
26
+ platform_patterns = {
27
+ "mysql": r"mysql",
28
+ "postgres": r"post(gre(s|sql)?|gres)",
29
+ "mssql": r"(sql\s*server|mssql|sqlncli)",
30
+ "oracle": r"oracle",
31
+ "db2": r"db2",
32
+ "sqlite": r"sqlite",
33
+ "access": r"(access|\.mdb|\.accdb)",
34
+ "excel": r"(excel|\.xls)",
35
+ "firebird": r"firebird",
36
+ "informix": r"informix",
37
+ "sybase": r"sybase",
38
+ "teradata": r"teradata",
39
+ "hadoop": r"(hadoop|hive)",
40
+ "snowflake": r"snowflake",
41
+ "redshift": r"redshift",
42
+ "bigquery": r"bigquery",
43
+ "athena": r"(athena|aws\s*athena)",
44
+ "databricks": r"(databricks|spark)",
45
+ }
46
+
47
+ powerbi_platform_names = {
48
+ "mysql": "MySQL",
49
+ "postgres": "PostgreSQL",
50
+ "mssql": "SQL Server",
51
+ "oracle": "Oracle",
52
+ "db2": "IBM DB2",
53
+ "sqlite": "SQLite",
54
+ "access": "Microsoft Access",
55
+ "excel": "Microsoft Excel",
56
+ "firebird": "Firebird",
57
+ "informix": "IBM Informix",
58
+ "sybase": "SAP Sybase",
59
+ "teradata": "Teradata",
60
+ "hadoop": "Hadoop",
61
+ "snowflake": "Snowflake",
62
+ "redshift": "Amazon Redshift",
63
+ "bigquery": "Google BigQuery",
64
+ "athena": "Amazon Athena",
65
+ "databricks": "Databricks",
66
+ }
67
+
68
+
69
+ def extract_driver(connection_string: str) -> Union[str, None]:
70
+ """
71
+ Parse an ODBC connection string and extract the driver name.
72
+ Handles whitespace in driver names and various connection string formats.
73
+
74
+ Args:
75
+ connection_string (str): The ODBC connection string
76
+
77
+ Returns:
78
+ str: The extracted driver name, or None if not found
79
+ """
80
+ # Match DRIVER={driver name} pattern
81
+ driver_match = re.search(r"DRIVER=\{([^}]*)}", connection_string, re.IGNORECASE)
82
+
83
+ if driver_match:
84
+ return driver_match.group(1).strip()
85
+
86
+ # Alternative pattern for DRIVER=driver
87
+ driver_match = re.search(r"DRIVER=([^;]*)", connection_string, re.IGNORECASE)
88
+
89
+ if driver_match:
90
+ return driver_match.group(1).strip()
91
+
92
+ return None
93
+
94
+
95
+ def extract_dsn(connection_string: str) -> Union[str, None]:
96
+ """
97
+ Extract the DSN value from an ODBC connection string.
98
+
99
+ Args:
100
+ connection_string (str): The ODBC connection string
101
+
102
+ Returns:
103
+ str or None: The extracted DSN value, or None if not found
104
+ """
105
+ for pattern in dsn_patterns:
106
+ match = re.search(pattern, connection_string, re.IGNORECASE)
107
+ if match:
108
+ return match.group(1).strip()
109
+
110
+ return None
111
+
112
+
113
+ def extract_server(connection_string: str) -> Union[str, None]:
114
+ """
115
+ Parse an ODBC connection string and extract the server name.
116
+ Handles various parameter names for server (SERVER, Host, Data Source, etc.)
117
+
118
+ Args:
119
+ connection_string (str): The ODBC connection string
120
+
121
+ Returns:
122
+ str: The extracted server name, or None if not found
123
+ """
124
+ for pattern in server_patterns:
125
+ server_match = re.search(pattern, connection_string, re.IGNORECASE)
126
+ if server_match:
127
+ return server_match.group(1).strip()
128
+
129
+ # Special case for Athena: extract from AwsRegion if no server found
130
+ region_match = re.search(r"AwsRegion=([^;]*)", connection_string, re.IGNORECASE)
131
+ if region_match:
132
+ return f"aws-athena-{region_match.group(1).strip()}"
133
+
134
+ # Special case for Databricks: try to extract hostname from JDBC URL
135
+ jdbc_match = re.search(r"jdbc:spark://([^:;/]+)", connection_string, re.IGNORECASE)
136
+ if jdbc_match:
137
+ return jdbc_match.group(1).strip()
138
+
139
+ return None
140
+
141
+
142
+ def extract_platform(connection_string: str) -> Tuple[Optional[str], Optional[str]]:
143
+ """
144
+ Extract the database platform name from the ODBC driver name.
145
+ Returns the lowercase platform name.
146
+
147
+ Args:
148
+ connection_string (str): The ODBC connection string
149
+
150
+ Returns:
151
+ tuple: A tuple containing the normalized platform name and the corresponding
152
+ Power BI platform name, or None if not recognized.
153
+ """
154
+ driver_name = extract_driver(connection_string)
155
+ if not driver_name:
156
+ return None, None
157
+
158
+ driver_lower = driver_name.lower()
159
+
160
+ for platform, pattern in platform_patterns.items():
161
+ if re.search(pattern, driver_lower):
162
+ return platform, powerbi_platform_names.get(platform)
163
+
164
+ return None, None
165
+
166
+
167
+ def normalize_platform_name(platform: str) -> Tuple[Optional[str], Optional[str]]:
168
+ """
169
+ Normalizes the platform name by matching it with predefined patterns and maps it to
170
+ a corresponding Power BI platform name.
171
+
172
+ Args:
173
+ platform (str): The platform name to normalize
174
+
175
+ Returns:
176
+ tuple: A tuple containing the normalized platform name and the corresponding
177
+ Power BI platform name, or None if not recognized.
178
+ """
179
+ platform_lower = platform.lower()
180
+
181
+ for platform, pattern in platform_patterns.items():
182
+ if re.search(pattern, platform_lower):
183
+ return platform, powerbi_platform_names.get(platform)
184
+
185
+ return None, None