acryl-datahub 1.0.0.3rc1__py3-none-any.whl → 1.0.0.3rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (27) hide show
  1. {acryl_datahub-1.0.0.3rc1.dist-info → acryl_datahub-1.0.0.3rc4.dist-info}/METADATA +2285 -2283
  2. {acryl_datahub-1.0.0.3rc1.dist-info → acryl_datahub-1.0.0.3rc4.dist-info}/RECORD +27 -26
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/datacontract/datacontract.py +35 -3
  5. datahub/api/entities/datajob/datajob.py +7 -4
  6. datahub/emitter/mcp.py +5 -1
  7. datahub/ingestion/run/pipeline.py +6 -4
  8. datahub/ingestion/source/common/subtypes.py +3 -0
  9. datahub/ingestion/source/mlflow.py +19 -6
  10. datahub/ingestion/source/powerbi/config.py +12 -0
  11. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  12. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  13. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +153 -0
  14. datahub/ingestion/source/sigma/config.py +2 -3
  15. datahub/ingestion/source/snowflake/snowflake_config.py +1 -0
  16. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  17. datahub/ingestion/source/tableau/tableau.py +31 -6
  18. datahub/ingestion/source/vertexai/vertexai.py +316 -4
  19. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +23 -2
  20. datahub/metadata/_schema_classes.py +1 -1
  21. datahub/metadata/schema.avsc +2 -1
  22. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  23. datahub/testing/mcp_diff.py +15 -2
  24. {acryl_datahub-1.0.0.3rc1.dist-info → acryl_datahub-1.0.0.3rc4.dist-info}/WHEEL +0 -0
  25. {acryl_datahub-1.0.0.3rc1.dist-info → acryl_datahub-1.0.0.3rc4.dist-info}/entry_points.txt +0 -0
  26. {acryl_datahub-1.0.0.3rc1.dist-info → acryl_datahub-1.0.0.3rc4.dist-info}/licenses/LICENSE +0 -0
  27. {acryl_datahub-1.0.0.3rc1.dist-info → acryl_datahub-1.0.0.3rc4.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
- acryl_datahub-1.0.0.3rc1.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.0.0.3rc4.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=R-5q2sde87sdyofKBpzMGjN_yrh8SbPAoOTVYlH3CuU,323
4
+ datahub/_version.py,sha256=911KmAC1s4iZRnTeHRY8SXqgCBef_UX6nB2GAa1FqkE,323
5
5
  datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
6
6
  datahub/errors.py,sha256=BzKdcmYseHOt36zfjJXc17WNutFhp9Y23cU_L6cIkxc,612
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -37,12 +37,12 @@ datahub/api/entities/datacontract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
37
37
  datahub/api/entities/datacontract/assertion.py,sha256=3tkX2c1g6u3sZatuzrac9RNuPlAdxSSe5QhyVjt90SU,182
38
38
  datahub/api/entities/datacontract/assertion_operator.py,sha256=N10PWIwhAPLAdcXIp_ZJoiQMS5FfFi4vH9sYmt2vUsA,4526
39
39
  datahub/api/entities/datacontract/data_quality_assertion.py,sha256=ioYOz-oNkAfHdpTyG6wVhGpY6YpH1L2cHiyClpxoNvg,3918
40
- datahub/api/entities/datacontract/datacontract.py,sha256=Vi6u9snSiacDlq2iofHiUGlsVl9_uqEJBACrVfT3Zts,7698
40
+ datahub/api/entities/datacontract/datacontract.py,sha256=V1P5i8_9IuKf3k-mvJphZo9y28a_wnAwfipwCklPGGM,9272
41
41
  datahub/api/entities/datacontract/freshness_assertion.py,sha256=TpKqNeUzeGH9wrYqelYVuZpolEm7iYXj1nymwWu8-_c,2700
42
42
  datahub/api/entities/datacontract/schema_assertion.py,sha256=24VQ0lm4LxWbpIrho2R6Atc80yqo76Vu7AN2LZiMOzc,2418
43
43
  datahub/api/entities/datajob/__init__.py,sha256=suzCs4cLphQ64oDG7meXsJVp6ya9_Glzqamoq_ti5GA,222
44
44
  datahub/api/entities/datajob/dataflow.py,sha256=VBaBoO9it2cqoJarXqGQhSGJrecHVqGhdCPbpzqeSz0,7505
45
- datahub/api/entities/datajob/datajob.py,sha256=tAxdxy8djDKD2THKIYEAsNi1Ly_eRpmBjT6fASCnUqg,7944
45
+ datahub/api/entities/datajob/datajob.py,sha256=gAIdTSlAY3iV3R3EUAcOtuYam8aR2jTGQ833iVsQNt4,8033
46
46
  datahub/api/entities/dataprocess/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
47
  datahub/api/entities/dataprocess/dataprocess_instance.py,sha256=IhY-rcXs-r8EatwW1_sJA79GxQyg9lhILBR66IrnLkY,19120
48
48
  datahub/api/entities/dataproduct/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -121,7 +121,7 @@ datahub/emitter/enum_helpers.py,sha256=QBOEUu_hDCvyL_v4ayNQV8XwJbf5zKyu0Xat0mI1K
121
121
  datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
122
122
  datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
123
123
  datahub/emitter/mce_builder.py,sha256=i-iLLdnuy7h1JrzwC2sCtQthbso-cNj1uijOQZKHbeA,16717
124
- datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
124
+ datahub/emitter/mcp.py,sha256=v7tKlIFX4s7f77KQYeFww8QbOQu6-qU609VeQiUkcsY,9796
125
125
  datahub/emitter/mcp_builder.py,sha256=8IwJAlolQkPpMqQJPLtGrsUqAcuFNs98nrI5iYUxgaU,11920
126
126
  datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
127
127
  datahub/emitter/request_helper.py,sha256=HpI9a9W0TzoVbrs584rF8P8w-IT_iKLmvYmO_6IHhXs,1008
@@ -182,7 +182,7 @@ datahub/ingestion/reporting/file_reporter.py,sha256=tiWukmMxHrTQI3rOAumsq6lRlw8T
182
182
  datahub/ingestion/reporting/reporting_provider_registry.py,sha256=jTYSh3T4sensjnHQfPLiIcbA2dG8w0px9ghChAJjGdU,310
183
183
  datahub/ingestion/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
184
184
  datahub/ingestion/run/connection.py,sha256=mngNzr5aRLUDa5Izqxa0xkdDEqEqcDuacWSKIlkdvPc,1483
185
- datahub/ingestion/run/pipeline.py,sha256=-74g3vIuecTuXsNBgspOSEfnpO-NqT-yg-ZqkL6OgP0,29692
185
+ datahub/ingestion/run/pipeline.py,sha256=4CJ3fUAPI1AzIjjg_lyv0FP9K0kfu9dPtNDwvVDojXs,29758
186
186
  datahub/ingestion/run/pipeline_config.py,sha256=EDwqlid4h_qyqyeTRCEqb1RiFA4py_T-Poz1eIKmzT4,4101
187
187
  datahub/ingestion/run/sink_callback.py,sha256=xZAzaKkR0dcQP838pMJWsA52oaQXV5BiqXkpxEpJ_9U,2856
188
188
  datahub/ingestion/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -205,7 +205,7 @@ datahub/ingestion/source/ge_profiling_config.py,sha256=FlWfXoVoayabVXNMB9qETEU0G
205
205
  datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suabri1yV5geaAPjzg2eORDc,2559
206
206
  datahub/ingestion/source/ldap.py,sha256=CNr3foofIpoCXu_GGqfcajlQE2qkHr5isYwVcDutdkk,18695
207
207
  datahub/ingestion/source/metabase.py,sha256=j8DRV2GvisezidL1JZ5HJLF_hdFdtvaoyDoEdEyh0Ks,32603
208
- datahub/ingestion/source/mlflow.py,sha256=2K5D95HLUhnx8jP54hK8aKNB0nPaCgXaUgO2PdL6Gto,32539
208
+ datahub/ingestion/source/mlflow.py,sha256=fh7izN9jlSwbpGIrEyJktlmwFZR5vNG9z9L5VQ31k_4,33141
209
209
  datahub/ingestion/source/mode.py,sha256=_FKZutF-59w0pYhko6HSVL3yjjYNd329-2DJmyfDqF8,64492
210
210
  datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-OPSNGU,21237
211
211
  datahub/ingestion/source/nifi.py,sha256=D1gBXxdpLuUQ0eurwofIR_SGg1rHGhwk3qxsWI1PT9c,56882
@@ -272,7 +272,7 @@ datahub/ingestion/source/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
272
272
  datahub/ingestion/source/common/data_platforms.py,sha256=HhuP3YIEi2WpyKDjUU8RiM0a2qjHWQcvc8kcqub0cVo,548
273
273
  datahub/ingestion/source/common/data_reader.py,sha256=XbSxiRTYrk6seOz0ZjVjzSpGvP8lEjmqXrNI4cdYYmQ,1819
274
274
  datahub/ingestion/source/common/gcp_credentials_config.py,sha256=_NapGkAqZMbXNClLlmOfANS7U6rChhdthRX9s9iUv9k,2411
275
- datahub/ingestion/source/common/subtypes.py,sha256=UZca0ZQUQdoXr5Z-3AIUT9gIlPt-XwbMNjj7WEEiR_4,3107
275
+ datahub/ingestion/source/common/subtypes.py,sha256=iJ9IfuiLK_T6yJ9ovY1HA83ujaVWichAgR-rUlBXemk,3238
276
276
  datahub/ingestion/source/data_lake_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
277
277
  datahub/ingestion/source/data_lake_common/config.py,sha256=qUk83B01hjuBKHvVz8SmXnVCy5eFj-2-2QLEOrAdbgk,359
278
278
  datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=nxu7osuzqxScPFc-1ODA2M1c_xPNPpRH_SMMU7zKOIE,6212
@@ -376,15 +376,16 @@ datahub/ingestion/source/metadata/lineage.py,sha256=2iK-hsORWm7NSvMZcG4D5hb8_PH5
376
376
  datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
377
377
  datahub/ingestion/source/neo4j/neo4j_source.py,sha256=O3jjdnsx7IyYPBLbxowL85Qo4zs4H-maMOH4-6ZNCk4,13063
378
378
  datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
379
- datahub/ingestion/source/powerbi/config.py,sha256=bflLIq6rpZeJ7ULvN2gaAVcSHO5jTJ6vdNPvwo1LH7M,24212
379
+ datahub/ingestion/source/powerbi/config.py,sha256=-gof-85gqS_cft2blp5Uw5TVypii4T_bl8XhTZUVlgc,24707
380
380
  datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=-njW1kJOy-LY5JFwJLhVQ0bMBj9NQz5TZhQqsSi_KsM,2285
381
381
  datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=5df3qvalCS9hZ46DPXs6XDcw9-IofGf8Eol_rUC7LHI,20329
382
382
  datahub/ingestion/source/powerbi/powerbi.py,sha256=b9zNeT9aS7v2GWUL1SROnIMwQwAFX0YTO2UNQMLWItc,56450
383
383
  datahub/ingestion/source/powerbi/m_query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
384
- datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=yDi0C13ko2dVxdLJBYvUuGbT4Q2hxQRse3sL7Ul1ZU0,2050
384
+ datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=l_L6DzOWMShOWGtVclcf4JtNWzSINuwJka59LjwRLCk,2091
385
385
  datahub/ingestion/source/powerbi/m_query/native_sql_parser.py,sha256=zzKVDGeUM3Yv3-zNah4D6mSnr6jXsstNuLmzczcPQEE,3683
386
+ datahub/ingestion/source/powerbi/m_query/odbc.py,sha256=fZgl8-M5s3Y-3U9OVQs7ttc8FTDbzodIM2HJtFmPNI8,5405
386
387
  datahub/ingestion/source/powerbi/m_query/parser.py,sha256=5KqhUwj9H9yL9ZMPP9oSeVGiZjvXjw6Iu_HrGr95E5M,5876
387
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py,sha256=aOhAb8U4OEZnO4ufnb-Cm3KMpdy-JF6r9YMK3RNZs5A,35906
388
+ datahub/ingestion/source/powerbi/m_query/pattern_handler.py,sha256=MqZj7VBf9ppKYrA-dRaOVGFpotLFqZditwOD-6ynkFg,41635
388
389
  datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=ISH8Xjx51q2S81fn2v5RhCCU-kRAW3juxM0rMFs4TDo,17413
389
390
  datahub/ingestion/source/powerbi/m_query/tree_function.py,sha256=NIKNNHAE4kTJefTM1WR-StJi9NuingaRYn_mS_kV6A8,6180
390
391
  datahub/ingestion/source/powerbi/m_query/validator.py,sha256=crG-VZy2XPieiDliP9yVMgiFcc8b2xbZyDFEATXqEAQ,1155
@@ -439,7 +440,7 @@ datahub/ingestion/source/schema_inference/json.py,sha256=p5S-3idn65V2uad5T8txs1U
439
440
  datahub/ingestion/source/schema_inference/object.py,sha256=dhSOtxVJHbTDY0hWeHwdLYHnOsW07Omk7Y4DPeztie0,5847
440
441
  datahub/ingestion/source/schema_inference/parquet.py,sha256=CdqsNuiabLLCulWbuPMssijeFmKLv3M5MKFIhlatpWA,3456
441
442
  datahub/ingestion/source/sigma/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
442
- datahub/ingestion/source/sigma/config.py,sha256=xpZXt4f05-sroWFv9SbzVhU1-iBeVfU1ocJKb-fy3aM,6333
443
+ datahub/ingestion/source/sigma/config.py,sha256=ztZf0YisGSXKgKeqP9ipDlRKLXU-Y-XABqm7HCJ8pvA,6265
443
444
  datahub/ingestion/source/sigma/data_classes.py,sha256=YZkkzwftV34mq5c_4jlC2PCSiRKt4hvHjmqikLQhl1I,2012
444
445
  datahub/ingestion/source/sigma/sigma.py,sha256=ZtPj8eu6hcJxyFcWizob4kRaxrpcqsWzh__lmuVZdt8,25212
445
446
  datahub/ingestion/source/sigma/sigma_api.py,sha256=7PK5AQa838hYeaQ5L0dioi4n4bLrpN-r7COKTTNUYw8,19837
@@ -450,7 +451,7 @@ datahub/ingestion/source/snowflake/constants.py,sha256=XCW3vw4JfLn_s8-oXBX6WFNMP
450
451
  datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
451
452
  datahub/ingestion/source/snowflake/oauth_generator.py,sha256=fu2VnREGuJXeTqIV2jx4TwieVnznf83HQkrE0h2DGGM,3423
452
453
  datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81xxdeizJn9nJCZ_nMIXgk9N6pEk5o,4803
453
- datahub/ingestion/source/snowflake/snowflake_config.py,sha256=Jm3TW7ed9LYNOZ9egUwkHs2bQv_WlCD6D2QoVxIzxsI,20729
454
+ datahub/ingestion/source/snowflake/snowflake_config.py,sha256=SD2agFE64WgEDbQHPXQjAIP4gsHT1G9H8X_r-RvKGas,20804
454
455
  datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=pEw2O9xoTSIWDiROlkF8k4oj5zBjkqTnynLvut08yhc,17796
455
456
  datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
456
457
  datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=FBmiONx4EGHWV8RNJT6zHZyntKinPFFyd2oKbTUIbhE,21319
@@ -462,7 +463,7 @@ datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=1yGBbs2aWIdHnrwgeT
462
463
  datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=_37-AQyI4uGt4fu-d3v2eAWzQ3uG835ZQxMjFwGYCng,57193
463
464
  datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=maZyFkfrbVogEFM0tTKRiNp9c_1muv6YfleSd3q0umI,6341
464
465
  datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYhWeK4XafJq3ssFL9kcS03jEeWT4,5506
465
- datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=1eLYTcgmfzDs9xktMTTE74L5SeNP48Qg3uLr9y-Ez3Y,8733
466
+ datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=eA9xh-G1Ydr1OwUUtrbXUWp26hE1jF0zvyKNky_i_nQ,8887
466
467
  datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=ySFm7WDk8FW9KjCnX4HQfTqObIrlUS-V8WIHl3j0CTI,24848
467
468
  datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=2lmvAeZELTjAzg4Y5E0oY41r1IzVEvg6OHAvVJftSFk,14081
468
469
  datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=nAbudDVh9A0kqao3jnIdgBlFNhNk1WIxoU1cofeXkFQ,33905
@@ -514,7 +515,7 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
514
515
  datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=DziD57PbHn2Tcy51tYXCG-GQgyTGMUxnkuzVS_xihFY,4079
515
516
  datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
516
517
  datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
517
- datahub/ingestion/source/tableau/tableau.py,sha256=tBDyDAAFnFS1pag_cK1hjE73qpFRcZK-BVphBJ5r_Gs,154109
518
+ datahub/ingestion/source/tableau/tableau.py,sha256=WLOcP9VJgJPlCTLHuUWlglrHueoXvGCzXvctasJiIHE,155034
518
519
  datahub/ingestion/source/tableau/tableau_common.py,sha256=MIkHKZg_v2IVCRk-YdPlLZl3m0LcWZm5Indwb3IV2ZQ,26931
519
520
  datahub/ingestion/source/tableau/tableau_constant.py,sha256=ZcAeHsQUXVVL26ORly0ByZk_GJAFbxaKuJAlX_sYMac,2686
520
521
  datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=nSyx9RzC6TCQDm-cTVJ657qT8iDwzk_8JMKpohhmOc4,1046
@@ -536,9 +537,9 @@ datahub/ingestion/source/usage/clickhouse_usage.py,sha256=jJ-EUJdS7t4d9RVjLWQQ2e
536
537
  datahub/ingestion/source/usage/starburst_trino_usage.py,sha256=O3EDEZsXPNdsKGD-jStREA8e4-iTlnqd3ocqtAYFKNA,10544
537
538
  datahub/ingestion/source/usage/usage_common.py,sha256=uuCgIduhlRL2zIAN8rymZ5cZn1WF6akZ-ZbbaVYo9_w,9813
538
539
  datahub/ingestion/source/vertexai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
539
- datahub/ingestion/source/vertexai/vertexai.py,sha256=ipq2Zb2lXTZkUg9r78kvuIIhX7mc-5hr-o83395IWpo,43589
540
+ datahub/ingestion/source/vertexai/vertexai.py,sha256=RuHda0mbc1DElYZIZ_W_hvkN7Eg4LIvI1fRFMvpHPB0,56012
540
541
  datahub/ingestion/source/vertexai/vertexai_config.py,sha256=uMnsv3b6TsPRH26u_JE_v1u0db7ANEAFlVxU5A6ELRM,989
541
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py,sha256=fE2l_xXvKONqb4jabl4LtKRBZDnP3koMLJV520wEAMg,2555
542
+ datahub/ingestion/source/vertexai/vertexai_result_type_utils.py,sha256=dJwRxuDA3flmTKjV5EUbmDFfxE0S8K1CEPB_EYUfNfI,3578
542
543
  datahub/ingestion/source_config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
543
544
  datahub/ingestion/source_config/csv_enricher.py,sha256=IROxxfFJA56dHkmmbjjhb7h1pZSi33tzW9sQb7ZEgac,1733
544
545
  datahub/ingestion/source_config/operation_config.py,sha256=hxF2RM0jk0HUPXYiliMniXBC-wz-ZPcs90ZGLfHT8rE,3924
@@ -592,8 +593,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
592
593
  datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
593
594
  datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
594
595
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
595
- datahub/metadata/_schema_classes.py,sha256=n8NwTeBKsnQdtNCMG85LDc6mPgDT5JGUbsgg9KcNj18,1012635
596
- datahub/metadata/schema.avsc,sha256=ckv1TFKtvz0eGTSqL2qijL4eqfTuQYMIj91pO0vbc2M,753114
596
+ datahub/metadata/_schema_classes.py,sha256=sdjR8XblvlYPYxYeLDQwVzTcYPAR9ycCa2sNOylOews,1012659
597
+ datahub/metadata/schema.avsc,sha256=w2P9zIqAZQmcJdw3pJxAM05m97KssRHBMGbEn_VnjKU,753146
597
598
  datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
598
599
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
599
600
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
@@ -705,7 +706,7 @@ datahub/metadata/schemas/Cost.avsc,sha256=o4kYZSss2uEwJ6gCA9fhBUoyD5xUqcSxz78vkI
705
706
  datahub/metadata/schemas/DashboardInfo.avsc,sha256=li2lSV6R4V-nz6foOi-NYxt_8ShHWfoKRw6M2BG5530,12907
706
707
  datahub/metadata/schemas/DashboardKey.avsc,sha256=yKlusgebWTvZhVeGgRNLZW6Qu6Fg_K0e2EbV8zr3jvA,1360
707
708
  datahub/metadata/schemas/DashboardUsageStatistics.avsc,sha256=pUAKqs49Wy5pAL92g_6QcFtJeoYeMWRGiHWS68IJN2A,7693
708
- datahub/metadata/schemas/DataContractKey.avsc,sha256=Oceu7P26--E0812IFrX3RiEY0Ktam869iiYN30zBudc,481
709
+ datahub/metadata/schemas/DataContractKey.avsc,sha256=m0ej_Wu7NcuZQCRwQI3Sidfv9bUy5mvuhlpgax6i1xA,511
709
710
  datahub/metadata/schemas/DataContractProperties.avsc,sha256=RCxuJMlZwqEE0iHTpuXvcH6zRFoOt7ysQFPrJRp3RqE,4763
710
711
  datahub/metadata/schemas/DataContractStatus.avsc,sha256=5yvT43AIB13Dn_h0-4s7fsL7BTuXhkK5pi2KJug4_qg,1029
711
712
  datahub/metadata/schemas/DataFlowInfo.avsc,sha256=tDRTd1rA3v_7kwUVbQbb-cuo6D-t3pcuE4fiRz4D8f0,4682
@@ -952,7 +953,7 @@ datahub/testing/check_str_enum.py,sha256=yqk0XXHOGteN-IGqCp5JHy0Kca13BnI09ZqKc4N
952
953
  datahub/testing/compare_metadata_json.py,sha256=mTU5evu7KLS3cx8OLOC1fFxj0eY1J1CGV2PEQZmapos,5361
953
954
  datahub/testing/docker_utils.py,sha256=g169iy_jNR_mg0p8X31cChZqjOryutAIHUYLq3xqueY,2415
954
955
  datahub/testing/doctest.py,sha256=1_8WEhHZ2eRQtw8vsXKzr9L5zzvs0Tcr6q4mnkyyvtw,295
955
- datahub/testing/mcp_diff.py,sha256=Jk1NluXkKWEMLOE11mHE98lfYE_Gn2GeFuu5TNB3YSs,10198
956
+ datahub/testing/mcp_diff.py,sha256=1BpQ3hST46cOQi1SmKdsto3j6x6Sk6yHm0vG1w9IDL0,10749
956
957
  datahub/testing/pytest_hooks.py,sha256=eifmj0M68AIfjTn_-0vtaBkKl75vNKMjsbYX-pJqmGY,1417
957
958
  datahub/upgrade/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
958
959
  datahub/upgrade/upgrade.py,sha256=lf60_dCu51twObAL5E8NqdrW3_2lsnUJUaB9MSEVXwI,16638
@@ -1045,8 +1046,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1045
1046
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1046
1047
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1047
1048
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1048
- acryl_datahub-1.0.0.3rc1.dist-info/METADATA,sha256=43mPIcmD4ByKfyR6rn8PPgaKNUBSmDmVJnGm1KhBZuo,176855
1049
- acryl_datahub-1.0.0.3rc1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
1050
- acryl_datahub-1.0.0.3rc1.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1051
- acryl_datahub-1.0.0.3rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1052
- acryl_datahub-1.0.0.3rc1.dist-info/RECORD,,
1049
+ acryl_datahub-1.0.0.3rc4.dist-info/METADATA,sha256=66uBY8gH_YmKeBmwbECGBpJhT7JiyrJXfFJv5wcFgQQ,176965
1050
+ acryl_datahub-1.0.0.3rc4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
1051
+ acryl_datahub-1.0.0.3rc4.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1052
+ acryl_datahub-1.0.0.3rc4.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1053
+ acryl_datahub-1.0.0.3rc4.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.0.0.3rc1"
3
+ __version__ = "1.0.0.3rc4"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -1,5 +1,5 @@
1
1
  import collections
2
- from typing import Iterable, List, Optional, Tuple
2
+ from typing import Dict, Iterable, List, Optional, Tuple, Union
3
3
 
4
4
  from ruamel.yaml import YAML
5
5
  from typing_extensions import Literal
@@ -25,6 +25,8 @@ from datahub.metadata.schema_classes import (
25
25
  FreshnessContractClass,
26
26
  SchemaContractClass,
27
27
  StatusClass,
28
+ StructuredPropertiesClass,
29
+ StructuredPropertyValueAssignmentClass,
28
30
  )
29
31
  from datahub.utilities.urns.urn import guess_entity_type
30
32
 
@@ -47,8 +49,12 @@ class DataContract(v1_ConfigModel):
47
49
  entity: str = v1_Field(
48
50
  description="The entity urn that the Data Contract is associated with"
49
51
  )
50
- # TODO: add support for properties
51
- # properties: Optional[Dict[str, str]] = None
52
+ properties: Optional[Dict[str, Union[str, float, List[Union[str, float]]]]] = (
53
+ v1_Field(
54
+ default=None,
55
+ description="Structured properties associated with the data contract.",
56
+ )
57
+ )
52
58
 
53
59
  schema_field: Optional[SchemaAssertion] = v1_Field(default=None, alias="schema")
54
60
 
@@ -172,6 +178,30 @@ class DataContract(v1_ConfigModel):
172
178
  )
173
179
  yield from dq_assertion_mcps
174
180
 
181
+ # Construct the structured properties aspect if properties are defined
182
+ structured_properties_aspect: Optional[StructuredPropertiesClass] = None
183
+ if self.properties:
184
+ property_assignments: List[StructuredPropertyValueAssignmentClass] = []
185
+ for key, value in self.properties.items():
186
+ # Use f-string formatting for the property URN, like in dataset.py
187
+ prop_urn = f"urn:li:structuredProperty:{key}"
188
+ # Ensure value is a list for StructuredPropertyValueAssignmentClass
189
+ values_list = value if isinstance(value, list) else [value]
190
+ property_assignments.append(
191
+ StructuredPropertyValueAssignmentClass(
192
+ propertyUrn=prop_urn,
193
+ values=[
194
+ str(v) for v in values_list
195
+ ], # Ensure all values are strings
196
+ )
197
+ )
198
+ if (
199
+ property_assignments
200
+ ): # Only create aspect if there are valid assignments
201
+ structured_properties_aspect = StructuredPropertiesClass(
202
+ properties=property_assignments
203
+ )
204
+
175
205
  # Now that we've generated the assertions, we can generate
176
206
  # the actual data contract.
177
207
  yield from MetadataChangeProposalWrapper.construct_many(
@@ -202,6 +232,8 @@ class DataContract(v1_ConfigModel):
202
232
  if True
203
233
  else None
204
234
  ),
235
+ # Add structured properties aspect if defined
236
+ structured_properties_aspect,
205
237
  ],
206
238
  )
207
239
 
@@ -108,7 +108,9 @@ class DataJob:
108
108
  return [tags]
109
109
 
110
110
  def generate_mcp(
111
- self, materialize_iolets: bool = True
111
+ self,
112
+ generate_lineage: bool = True,
113
+ materialize_iolets: bool = True,
112
114
  ) -> Iterable[MetadataChangeProposalWrapper]:
113
115
  env: Optional[str] = None
114
116
  if self.flow_urn.cluster.upper() in builder.ALL_ENV_TYPES:
@@ -152,9 +154,10 @@ class DataJob:
152
154
  )
153
155
  yield mcp
154
156
 
155
- yield from self.generate_data_input_output_mcp(
156
- materialize_iolets=materialize_iolets
157
- )
157
+ if generate_lineage:
158
+ yield from self.generate_data_input_output_mcp(
159
+ materialize_iolets=materialize_iolets
160
+ )
158
161
 
159
162
  for owner in self.generate_ownership_aspect():
160
163
  mcp = MetadataChangeProposalWrapper(
datahub/emitter/mcp.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import dataclasses
2
2
  import json
3
- from typing import TYPE_CHECKING, List, Optional, Sequence, Tuple, Union
3
+ from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union
4
4
 
5
5
  from datahub.emitter.aspect import ASPECT_MAP, JSON_CONTENT_TYPE
6
6
  from datahub.emitter.serialization_helper import post_json_transform, pre_json_transform
@@ -69,6 +69,7 @@ class MetadataChangeProposalWrapper:
69
69
  aspectName: Union[None, str] = None
70
70
  aspect: Union[None, _Aspect] = None
71
71
  systemMetadata: Union[None, SystemMetadataClass] = None
72
+ headers: Union[None, Dict[str, str]] = None
72
73
 
73
74
  def __post_init__(self) -> None:
74
75
  if self.entityUrn and self.entityType == _ENTITY_TYPE_UNSET:
@@ -112,6 +113,7 @@ class MetadataChangeProposalWrapper:
112
113
  auditHeader=self.auditHeader,
113
114
  aspectName=self.aspectName,
114
115
  systemMetadata=self.systemMetadata,
116
+ headers=self.headers,
115
117
  )
116
118
 
117
119
  def make_mcp(self) -> MetadataChangeProposalClass:
@@ -211,6 +213,7 @@ class MetadataChangeProposalWrapper:
211
213
  aspectName=mcpc.aspectName,
212
214
  aspect=aspect,
213
215
  systemMetadata=mcpc.systemMetadata,
216
+ headers=mcpc.headers,
214
217
  )
215
218
  else:
216
219
  return None
@@ -228,6 +231,7 @@ class MetadataChangeProposalWrapper:
228
231
  changeType=mcl.changeType,
229
232
  auditHeader=mcl.auditHeader,
230
233
  systemMetadata=mcl.systemMetadata,
234
+ headers=mcl.headers,
231
235
  )
232
236
  return cls.try_from_mcpc(mcpc) or mcpc
233
237
 
@@ -555,18 +555,20 @@ class Pipeline:
555
555
  def raise_from_status(self, raise_warnings: bool = False) -> None:
556
556
  if self.source.get_report().failures:
557
557
  raise PipelineExecutionError(
558
- "Source reported errors", self.source.get_report()
558
+ "Source reported errors", self.source.get_report().failures
559
559
  )
560
560
  if self.sink.get_report().failures:
561
- raise PipelineExecutionError("Sink reported errors", self.sink.get_report())
561
+ raise PipelineExecutionError(
562
+ "Sink reported errors", self.sink.get_report().failures
563
+ )
562
564
  if raise_warnings:
563
565
  if self.source.get_report().warnings:
564
566
  raise PipelineExecutionError(
565
- "Source reported warnings", self.source.get_report()
567
+ "Source reported warnings", self.source.get_report().warnings
566
568
  )
567
569
  if self.sink.get_report().warnings:
568
570
  raise PipelineExecutionError(
569
- "Sink reported warnings", self.sink.get_report()
571
+ "Sink reported warnings", self.sink.get_report().warnings
570
572
  )
571
573
 
572
574
  def log_ingestion_stats(self) -> None:
@@ -113,3 +113,6 @@ class MLAssetSubTypes(StrEnum):
113
113
  VERTEX_ENDPOINT = "Endpoint"
114
114
  VERTEX_DATASET = "Dataset"
115
115
  VERTEX_PROJECT = "Project"
116
+ VERTEX_PIPELINE = "Pipeline Job"
117
+ VERTEX_PIPELINE_TASK = "Pipeline Task"
118
+ VERTEX_PIPELINE_TASK_RUN = "Pipeline Task Run"
@@ -7,6 +7,7 @@ from typing import Any, Callable, Iterable, List, Optional, Tuple, TypeVar, Unio
7
7
  from mlflow import MlflowClient
8
8
  from mlflow.entities import Dataset as MlflowDataset, Experiment, Run
9
9
  from mlflow.entities.model_registry import ModelVersion, RegisteredModel
10
+ from mlflow.exceptions import MlflowException
10
11
  from mlflow.store.entities import PagedList
11
12
  from pydantic.fields import Field
12
13
 
@@ -589,8 +590,8 @@ class MLflowSource(StatefulIngestionSourceBase):
589
590
  )
590
591
  return runs
591
592
 
592
- @staticmethod
593
593
  def _traverse_mlflow_search_func(
594
+ self,
594
595
  search_func: Callable[..., PagedList[T]],
595
596
  **kwargs: Any,
596
597
  ) -> Iterable[T]:
@@ -598,12 +599,24 @@ class MLflowSource(StatefulIngestionSourceBase):
598
599
  Utility to traverse an MLflow search_* functions which return PagedList.
599
600
  """
600
601
  next_page_token = None
601
- while True:
602
- paged_list = search_func(page_token=next_page_token, **kwargs)
603
- yield from paged_list.to_list()
604
- next_page_token = paged_list.token
605
- if not next_page_token:
602
+ try:
603
+ while True:
604
+ paged_list = search_func(page_token=next_page_token, **kwargs)
605
+ yield from paged_list.to_list()
606
+ next_page_token = paged_list.token
607
+ if not next_page_token:
608
+ return
609
+ except MlflowException as e:
610
+ if e.error_code == "ENDPOINT_NOT_FOUND":
611
+ self.report.warning(
612
+ title="MLflow API Endpoint Not Found for Experiments.",
613
+ message="Please upgrade to version 1.28.0 or higher to ensure compatibility. Skipping ingestion for experiments and runs.",
614
+ context=None,
615
+ exc=e,
616
+ )
606
617
  return
618
+ else:
619
+ raise # Only re-raise other exceptions
607
620
 
608
621
  def _get_latest_version(self, registered_model: RegisteredModel) -> Optional[str]:
609
622
  return (
@@ -192,6 +192,11 @@ class SupportedDataPlatform(Enum):
192
192
  datahub_data_platform_name="mysql",
193
193
  )
194
194
 
195
+ ODBC = DataPlatformPair(
196
+ powerbi_data_platform_name="Odbc",
197
+ datahub_data_platform_name="odbc",
198
+ )
199
+
195
200
 
196
201
  @dataclass
197
202
  class PowerBiDashboardSourceReport(StaleEntityRemovalSourceReport):
@@ -341,6 +346,13 @@ class PowerBiDashboardSourceConfig(
341
346
  "For Google BigQuery the datasource's server is google bigquery project name. "
342
347
  "For Databricks Unity Catalog the datasource's server is workspace FQDN.",
343
348
  )
349
+ # ODBC DSN to platform mapping
350
+ dsn_to_platform_name: Dict[str, str] = pydantic.Field(
351
+ default={},
352
+ description="A mapping of ODBC DSN to DataHub data platform name. "
353
+ "For example with an ODBC connection string 'DSN=database' where the database type "
354
+ "is 'PostgreSQL' you would configure the mapping as 'database: postgres'.",
355
+ )
344
356
  # deprecated warning
345
357
  _dataset_type_mapping = pydantic_field_deprecated(
346
358
  "dataset_type_mapping",
@@ -75,3 +75,4 @@ class FunctionName(Enum):
75
75
  AMAZON_REDSHIFT_DATA_ACCESS = "AmazonRedshift.Database"
76
76
  DATABRICK_MULTI_CLOUD_DATA_ACCESS = "DatabricksMultiCloud.Catalogs"
77
77
  MYSQL_DATA_ACCESS = "MySQL.Database"
78
+ ODBC_DATA_ACCESS = "Odbc.DataSource"
@@ -0,0 +1,185 @@
1
+ import re
2
+ from typing import Optional, Tuple, Union
3
+
4
+ server_patterns = [
5
+ r"Server=([^:]+)[:][0-9]+/.*",
6
+ r"SERVER=\{([^}]*)\}",
7
+ r"SERVER=([^;]*)",
8
+ r"HOST=\{([^}]*)\}",
9
+ r"HOST=([^;]*)",
10
+ r"DATA SOURCE=\{([^}]*)\}",
11
+ r"DATA SOURCE=([^;]*)",
12
+ r"DSN=\{([^}]*)\}",
13
+ r"DSN=([^;]*)",
14
+ r"Server=([^;]*)",
15
+ r"S3OutputLocation=([^;]*)",
16
+ r"HTTPPath=([^;]*)",
17
+ r"Host=([^;]*)",
18
+ ]
19
+
20
+ dsn_patterns = [
21
+ r"DSN\s*=\s*\"([^\"]+)\"",
22
+ r"DSN\s*=\s*\'([^\']+)\'",
23
+ r"DSN\s*=\s*([^;]+)",
24
+ ]
25
+
26
+ platform_patterns = {
27
+ "mysql": r"mysql",
28
+ "postgres": r"post(gre(s|sql)?|gres)",
29
+ "mssql": r"(sql\s*server|mssql|sqlncli)",
30
+ "oracle": r"oracle",
31
+ "db2": r"db2",
32
+ "sqlite": r"sqlite",
33
+ "access": r"(access|\.mdb|\.accdb)",
34
+ "excel": r"(excel|\.xls)",
35
+ "firebird": r"firebird",
36
+ "informix": r"informix",
37
+ "sybase": r"sybase",
38
+ "teradata": r"teradata",
39
+ "hadoop": r"(hadoop|hive)",
40
+ "snowflake": r"snowflake",
41
+ "redshift": r"redshift",
42
+ "bigquery": r"bigquery",
43
+ "athena": r"(athena|aws\s*athena)",
44
+ "databricks": r"(databricks|spark)",
45
+ }
46
+
47
+ powerbi_platform_names = {
48
+ "mysql": "MySQL",
49
+ "postgres": "PostgreSQL",
50
+ "mssql": "SQL Server",
51
+ "oracle": "Oracle",
52
+ "db2": "IBM DB2",
53
+ "sqlite": "SQLite",
54
+ "access": "Microsoft Access",
55
+ "excel": "Microsoft Excel",
56
+ "firebird": "Firebird",
57
+ "informix": "IBM Informix",
58
+ "sybase": "SAP Sybase",
59
+ "teradata": "Teradata",
60
+ "hadoop": "Hadoop",
61
+ "snowflake": "Snowflake",
62
+ "redshift": "Amazon Redshift",
63
+ "bigquery": "Google BigQuery",
64
+ "athena": "Amazon Athena",
65
+ "databricks": "Databricks",
66
+ }
67
+
68
+
69
+ def extract_driver(connection_string: str) -> Union[str, None]:
70
+ """
71
+ Parse an ODBC connection string and extract the driver name.
72
+ Handles whitespace in driver names and various connection string formats.
73
+
74
+ Args:
75
+ connection_string (str): The ODBC connection string
76
+
77
+ Returns:
78
+ str: The extracted driver name, or None if not found
79
+ """
80
+ # Match DRIVER={driver name} pattern
81
+ driver_match = re.search(r"DRIVER=\{([^}]*)}", connection_string, re.IGNORECASE)
82
+
83
+ if driver_match:
84
+ return driver_match.group(1).strip()
85
+
86
+ # Alternative pattern for DRIVER=driver
87
+ driver_match = re.search(r"DRIVER=([^;]*)", connection_string, re.IGNORECASE)
88
+
89
+ if driver_match:
90
+ return driver_match.group(1).strip()
91
+
92
+ return None
93
+
94
+
95
+ def extract_dsn(connection_string: str) -> Union[str, None]:
96
+ """
97
+ Extract the DSN value from an ODBC connection string.
98
+
99
+ Args:
100
+ connection_string (str): The ODBC connection string
101
+
102
+ Returns:
103
+ str or None: The extracted DSN value, or None if not found
104
+ """
105
+ for pattern in dsn_patterns:
106
+ match = re.search(pattern, connection_string, re.IGNORECASE)
107
+ if match:
108
+ return match.group(1).strip()
109
+
110
+ return None
111
+
112
+
113
+ def extract_server(connection_string: str) -> Union[str, None]:
114
+ """
115
+ Parse an ODBC connection string and extract the server name.
116
+ Handles various parameter names for server (SERVER, Host, Data Source, etc.)
117
+
118
+ Args:
119
+ connection_string (str): The ODBC connection string
120
+
121
+ Returns:
122
+ str: The extracted server name, or None if not found
123
+ """
124
+ for pattern in server_patterns:
125
+ server_match = re.search(pattern, connection_string, re.IGNORECASE)
126
+ if server_match:
127
+ return server_match.group(1).strip()
128
+
129
+ # Special case for Athena: extract from AwsRegion if no server found
130
+ region_match = re.search(r"AwsRegion=([^;]*)", connection_string, re.IGNORECASE)
131
+ if region_match:
132
+ return f"aws-athena-{region_match.group(1).strip()}"
133
+
134
+ # Special case for Databricks: try to extract hostname from JDBC URL
135
+ jdbc_match = re.search(r"jdbc:spark://([^:;/]+)", connection_string, re.IGNORECASE)
136
+ if jdbc_match:
137
+ return jdbc_match.group(1).strip()
138
+
139
+ return None
140
+
141
+
142
+ def extract_platform(connection_string: str) -> Tuple[Optional[str], Optional[str]]:
143
+ """
144
+ Extract the database platform name from the ODBC driver name.
145
+ Returns the lowercase platform name.
146
+
147
+ Args:
148
+ connection_string (str): The ODBC connection string
149
+
150
+ Returns:
151
+ tuple: A tuple containing the normalized platform name and the corresponding
152
+ Power BI platform name, or None if not recognized.
153
+ """
154
+ driver_name = extract_driver(connection_string)
155
+ if not driver_name:
156
+ return None, None
157
+
158
+ driver_lower = driver_name.lower()
159
+
160
+ for platform, pattern in platform_patterns.items():
161
+ if re.search(pattern, driver_lower):
162
+ return platform, powerbi_platform_names.get(platform)
163
+
164
+ return None, None
165
+
166
+
167
+ def normalize_platform_name(platform: str) -> Tuple[Optional[str], Optional[str]]:
168
+ """
169
+ Normalizes the platform name by matching it with predefined patterns and maps it to
170
+ a corresponding Power BI platform name.
171
+
172
+ Args:
173
+ platform (str): The platform name to normalize
174
+
175
+ Returns:
176
+ tuple: A tuple containing the normalized platform name and the corresponding
177
+ Power BI platform name, or None if not recognized.
178
+ """
179
+ platform_lower = platform.lower()
180
+
181
+ for platform, pattern in platform_patterns.items():
182
+ if re.search(pattern, platform_lower):
183
+ return platform, powerbi_platform_names.get(platform)
184
+
185
+ return None, None