acryl-datahub 1.0.0.3rc1__py3-none-any.whl → 1.0.0.3rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0.3rc1.dist-info → acryl_datahub-1.0.0.3rc4.dist-info}/METADATA +2285 -2283
- {acryl_datahub-1.0.0.3rc1.dist-info → acryl_datahub-1.0.0.3rc4.dist-info}/RECORD +27 -26
- datahub/_version.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/datajob.py +7 -4
- datahub/emitter/mcp.py +5 -1
- datahub/ingestion/run/pipeline.py +6 -4
- datahub/ingestion/source/common/subtypes.py +3 -0
- datahub/ingestion/source/mlflow.py +19 -6
- datahub/ingestion/source/powerbi/config.py +12 -0
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +153 -0
- datahub/ingestion/source/sigma/config.py +2 -3
- datahub/ingestion/source/snowflake/snowflake_config.py +1 -0
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/tableau/tableau.py +31 -6
- datahub/ingestion/source/vertexai/vertexai.py +316 -4
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +23 -2
- datahub/metadata/_schema_classes.py +1 -1
- datahub/metadata/schema.avsc +2 -1
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/testing/mcp_diff.py +15 -2
- {acryl_datahub-1.0.0.3rc1.dist-info → acryl_datahub-1.0.0.3rc4.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0.3rc1.dist-info → acryl_datahub-1.0.0.3rc4.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0.3rc1.dist-info → acryl_datahub-1.0.0.3rc4.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.0.0.3rc1.dist-info → acryl_datahub-1.0.0.3rc4.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
acryl_datahub-1.0.0.
|
|
1
|
+
acryl_datahub-1.0.0.3rc4.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=911KmAC1s4iZRnTeHRY8SXqgCBef_UX6nB2GAa1FqkE,323
|
|
5
5
|
datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
|
|
6
6
|
datahub/errors.py,sha256=BzKdcmYseHOt36zfjJXc17WNutFhp9Y23cU_L6cIkxc,612
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -37,12 +37,12 @@ datahub/api/entities/datacontract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
|
|
|
37
37
|
datahub/api/entities/datacontract/assertion.py,sha256=3tkX2c1g6u3sZatuzrac9RNuPlAdxSSe5QhyVjt90SU,182
|
|
38
38
|
datahub/api/entities/datacontract/assertion_operator.py,sha256=N10PWIwhAPLAdcXIp_ZJoiQMS5FfFi4vH9sYmt2vUsA,4526
|
|
39
39
|
datahub/api/entities/datacontract/data_quality_assertion.py,sha256=ioYOz-oNkAfHdpTyG6wVhGpY6YpH1L2cHiyClpxoNvg,3918
|
|
40
|
-
datahub/api/entities/datacontract/datacontract.py,sha256=
|
|
40
|
+
datahub/api/entities/datacontract/datacontract.py,sha256=V1P5i8_9IuKf3k-mvJphZo9y28a_wnAwfipwCklPGGM,9272
|
|
41
41
|
datahub/api/entities/datacontract/freshness_assertion.py,sha256=TpKqNeUzeGH9wrYqelYVuZpolEm7iYXj1nymwWu8-_c,2700
|
|
42
42
|
datahub/api/entities/datacontract/schema_assertion.py,sha256=24VQ0lm4LxWbpIrho2R6Atc80yqo76Vu7AN2LZiMOzc,2418
|
|
43
43
|
datahub/api/entities/datajob/__init__.py,sha256=suzCs4cLphQ64oDG7meXsJVp6ya9_Glzqamoq_ti5GA,222
|
|
44
44
|
datahub/api/entities/datajob/dataflow.py,sha256=VBaBoO9it2cqoJarXqGQhSGJrecHVqGhdCPbpzqeSz0,7505
|
|
45
|
-
datahub/api/entities/datajob/datajob.py,sha256=
|
|
45
|
+
datahub/api/entities/datajob/datajob.py,sha256=gAIdTSlAY3iV3R3EUAcOtuYam8aR2jTGQ833iVsQNt4,8033
|
|
46
46
|
datahub/api/entities/dataprocess/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
47
|
datahub/api/entities/dataprocess/dataprocess_instance.py,sha256=IhY-rcXs-r8EatwW1_sJA79GxQyg9lhILBR66IrnLkY,19120
|
|
48
48
|
datahub/api/entities/dataproduct/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -121,7 +121,7 @@ datahub/emitter/enum_helpers.py,sha256=QBOEUu_hDCvyL_v4ayNQV8XwJbf5zKyu0Xat0mI1K
|
|
|
121
121
|
datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
|
|
122
122
|
datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
|
|
123
123
|
datahub/emitter/mce_builder.py,sha256=i-iLLdnuy7h1JrzwC2sCtQthbso-cNj1uijOQZKHbeA,16717
|
|
124
|
-
datahub/emitter/mcp.py,sha256=
|
|
124
|
+
datahub/emitter/mcp.py,sha256=v7tKlIFX4s7f77KQYeFww8QbOQu6-qU609VeQiUkcsY,9796
|
|
125
125
|
datahub/emitter/mcp_builder.py,sha256=8IwJAlolQkPpMqQJPLtGrsUqAcuFNs98nrI5iYUxgaU,11920
|
|
126
126
|
datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
|
|
127
127
|
datahub/emitter/request_helper.py,sha256=HpI9a9W0TzoVbrs584rF8P8w-IT_iKLmvYmO_6IHhXs,1008
|
|
@@ -182,7 +182,7 @@ datahub/ingestion/reporting/file_reporter.py,sha256=tiWukmMxHrTQI3rOAumsq6lRlw8T
|
|
|
182
182
|
datahub/ingestion/reporting/reporting_provider_registry.py,sha256=jTYSh3T4sensjnHQfPLiIcbA2dG8w0px9ghChAJjGdU,310
|
|
183
183
|
datahub/ingestion/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
184
184
|
datahub/ingestion/run/connection.py,sha256=mngNzr5aRLUDa5Izqxa0xkdDEqEqcDuacWSKIlkdvPc,1483
|
|
185
|
-
datahub/ingestion/run/pipeline.py,sha256
|
|
185
|
+
datahub/ingestion/run/pipeline.py,sha256=4CJ3fUAPI1AzIjjg_lyv0FP9K0kfu9dPtNDwvVDojXs,29758
|
|
186
186
|
datahub/ingestion/run/pipeline_config.py,sha256=EDwqlid4h_qyqyeTRCEqb1RiFA4py_T-Poz1eIKmzT4,4101
|
|
187
187
|
datahub/ingestion/run/sink_callback.py,sha256=xZAzaKkR0dcQP838pMJWsA52oaQXV5BiqXkpxEpJ_9U,2856
|
|
188
188
|
datahub/ingestion/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -205,7 +205,7 @@ datahub/ingestion/source/ge_profiling_config.py,sha256=FlWfXoVoayabVXNMB9qETEU0G
|
|
|
205
205
|
datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suabri1yV5geaAPjzg2eORDc,2559
|
|
206
206
|
datahub/ingestion/source/ldap.py,sha256=CNr3foofIpoCXu_GGqfcajlQE2qkHr5isYwVcDutdkk,18695
|
|
207
207
|
datahub/ingestion/source/metabase.py,sha256=j8DRV2GvisezidL1JZ5HJLF_hdFdtvaoyDoEdEyh0Ks,32603
|
|
208
|
-
datahub/ingestion/source/mlflow.py,sha256=
|
|
208
|
+
datahub/ingestion/source/mlflow.py,sha256=fh7izN9jlSwbpGIrEyJktlmwFZR5vNG9z9L5VQ31k_4,33141
|
|
209
209
|
datahub/ingestion/source/mode.py,sha256=_FKZutF-59w0pYhko6HSVL3yjjYNd329-2DJmyfDqF8,64492
|
|
210
210
|
datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-OPSNGU,21237
|
|
211
211
|
datahub/ingestion/source/nifi.py,sha256=D1gBXxdpLuUQ0eurwofIR_SGg1rHGhwk3qxsWI1PT9c,56882
|
|
@@ -272,7 +272,7 @@ datahub/ingestion/source/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
|
272
272
|
datahub/ingestion/source/common/data_platforms.py,sha256=HhuP3YIEi2WpyKDjUU8RiM0a2qjHWQcvc8kcqub0cVo,548
|
|
273
273
|
datahub/ingestion/source/common/data_reader.py,sha256=XbSxiRTYrk6seOz0ZjVjzSpGvP8lEjmqXrNI4cdYYmQ,1819
|
|
274
274
|
datahub/ingestion/source/common/gcp_credentials_config.py,sha256=_NapGkAqZMbXNClLlmOfANS7U6rChhdthRX9s9iUv9k,2411
|
|
275
|
-
datahub/ingestion/source/common/subtypes.py,sha256=
|
|
275
|
+
datahub/ingestion/source/common/subtypes.py,sha256=iJ9IfuiLK_T6yJ9ovY1HA83ujaVWichAgR-rUlBXemk,3238
|
|
276
276
|
datahub/ingestion/source/data_lake_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
277
277
|
datahub/ingestion/source/data_lake_common/config.py,sha256=qUk83B01hjuBKHvVz8SmXnVCy5eFj-2-2QLEOrAdbgk,359
|
|
278
278
|
datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=nxu7osuzqxScPFc-1ODA2M1c_xPNPpRH_SMMU7zKOIE,6212
|
|
@@ -376,15 +376,16 @@ datahub/ingestion/source/metadata/lineage.py,sha256=2iK-hsORWm7NSvMZcG4D5hb8_PH5
|
|
|
376
376
|
datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
377
377
|
datahub/ingestion/source/neo4j/neo4j_source.py,sha256=O3jjdnsx7IyYPBLbxowL85Qo4zs4H-maMOH4-6ZNCk4,13063
|
|
378
378
|
datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
379
|
-
datahub/ingestion/source/powerbi/config.py,sha256
|
|
379
|
+
datahub/ingestion/source/powerbi/config.py,sha256=-gof-85gqS_cft2blp5Uw5TVypii4T_bl8XhTZUVlgc,24707
|
|
380
380
|
datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=-njW1kJOy-LY5JFwJLhVQ0bMBj9NQz5TZhQqsSi_KsM,2285
|
|
381
381
|
datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=5df3qvalCS9hZ46DPXs6XDcw9-IofGf8Eol_rUC7LHI,20329
|
|
382
382
|
datahub/ingestion/source/powerbi/powerbi.py,sha256=b9zNeT9aS7v2GWUL1SROnIMwQwAFX0YTO2UNQMLWItc,56450
|
|
383
383
|
datahub/ingestion/source/powerbi/m_query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
384
|
-
datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=
|
|
384
|
+
datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=l_L6DzOWMShOWGtVclcf4JtNWzSINuwJka59LjwRLCk,2091
|
|
385
385
|
datahub/ingestion/source/powerbi/m_query/native_sql_parser.py,sha256=zzKVDGeUM3Yv3-zNah4D6mSnr6jXsstNuLmzczcPQEE,3683
|
|
386
|
+
datahub/ingestion/source/powerbi/m_query/odbc.py,sha256=fZgl8-M5s3Y-3U9OVQs7ttc8FTDbzodIM2HJtFmPNI8,5405
|
|
386
387
|
datahub/ingestion/source/powerbi/m_query/parser.py,sha256=5KqhUwj9H9yL9ZMPP9oSeVGiZjvXjw6Iu_HrGr95E5M,5876
|
|
387
|
-
datahub/ingestion/source/powerbi/m_query/pattern_handler.py,sha256=
|
|
388
|
+
datahub/ingestion/source/powerbi/m_query/pattern_handler.py,sha256=MqZj7VBf9ppKYrA-dRaOVGFpotLFqZditwOD-6ynkFg,41635
|
|
388
389
|
datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=ISH8Xjx51q2S81fn2v5RhCCU-kRAW3juxM0rMFs4TDo,17413
|
|
389
390
|
datahub/ingestion/source/powerbi/m_query/tree_function.py,sha256=NIKNNHAE4kTJefTM1WR-StJi9NuingaRYn_mS_kV6A8,6180
|
|
390
391
|
datahub/ingestion/source/powerbi/m_query/validator.py,sha256=crG-VZy2XPieiDliP9yVMgiFcc8b2xbZyDFEATXqEAQ,1155
|
|
@@ -439,7 +440,7 @@ datahub/ingestion/source/schema_inference/json.py,sha256=p5S-3idn65V2uad5T8txs1U
|
|
|
439
440
|
datahub/ingestion/source/schema_inference/object.py,sha256=dhSOtxVJHbTDY0hWeHwdLYHnOsW07Omk7Y4DPeztie0,5847
|
|
440
441
|
datahub/ingestion/source/schema_inference/parquet.py,sha256=CdqsNuiabLLCulWbuPMssijeFmKLv3M5MKFIhlatpWA,3456
|
|
441
442
|
datahub/ingestion/source/sigma/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
442
|
-
datahub/ingestion/source/sigma/config.py,sha256=
|
|
443
|
+
datahub/ingestion/source/sigma/config.py,sha256=ztZf0YisGSXKgKeqP9ipDlRKLXU-Y-XABqm7HCJ8pvA,6265
|
|
443
444
|
datahub/ingestion/source/sigma/data_classes.py,sha256=YZkkzwftV34mq5c_4jlC2PCSiRKt4hvHjmqikLQhl1I,2012
|
|
444
445
|
datahub/ingestion/source/sigma/sigma.py,sha256=ZtPj8eu6hcJxyFcWizob4kRaxrpcqsWzh__lmuVZdt8,25212
|
|
445
446
|
datahub/ingestion/source/sigma/sigma_api.py,sha256=7PK5AQa838hYeaQ5L0dioi4n4bLrpN-r7COKTTNUYw8,19837
|
|
@@ -450,7 +451,7 @@ datahub/ingestion/source/snowflake/constants.py,sha256=XCW3vw4JfLn_s8-oXBX6WFNMP
|
|
|
450
451
|
datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
|
|
451
452
|
datahub/ingestion/source/snowflake/oauth_generator.py,sha256=fu2VnREGuJXeTqIV2jx4TwieVnznf83HQkrE0h2DGGM,3423
|
|
452
453
|
datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81xxdeizJn9nJCZ_nMIXgk9N6pEk5o,4803
|
|
453
|
-
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=
|
|
454
|
+
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=SD2agFE64WgEDbQHPXQjAIP4gsHT1G9H8X_r-RvKGas,20804
|
|
454
455
|
datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=pEw2O9xoTSIWDiROlkF8k4oj5zBjkqTnynLvut08yhc,17796
|
|
455
456
|
datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
|
|
456
457
|
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=FBmiONx4EGHWV8RNJT6zHZyntKinPFFyd2oKbTUIbhE,21319
|
|
@@ -462,7 +463,7 @@ datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=1yGBbs2aWIdHnrwgeT
|
|
|
462
463
|
datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=_37-AQyI4uGt4fu-d3v2eAWzQ3uG835ZQxMjFwGYCng,57193
|
|
463
464
|
datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=maZyFkfrbVogEFM0tTKRiNp9c_1muv6YfleSd3q0umI,6341
|
|
464
465
|
datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYhWeK4XafJq3ssFL9kcS03jEeWT4,5506
|
|
465
|
-
datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=
|
|
466
|
+
datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=eA9xh-G1Ydr1OwUUtrbXUWp26hE1jF0zvyKNky_i_nQ,8887
|
|
466
467
|
datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=ySFm7WDk8FW9KjCnX4HQfTqObIrlUS-V8WIHl3j0CTI,24848
|
|
467
468
|
datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=2lmvAeZELTjAzg4Y5E0oY41r1IzVEvg6OHAvVJftSFk,14081
|
|
468
469
|
datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=nAbudDVh9A0kqao3jnIdgBlFNhNk1WIxoU1cofeXkFQ,33905
|
|
@@ -514,7 +515,7 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
|
|
|
514
515
|
datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=DziD57PbHn2Tcy51tYXCG-GQgyTGMUxnkuzVS_xihFY,4079
|
|
515
516
|
datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
|
|
516
517
|
datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
517
|
-
datahub/ingestion/source/tableau/tableau.py,sha256=
|
|
518
|
+
datahub/ingestion/source/tableau/tableau.py,sha256=WLOcP9VJgJPlCTLHuUWlglrHueoXvGCzXvctasJiIHE,155034
|
|
518
519
|
datahub/ingestion/source/tableau/tableau_common.py,sha256=MIkHKZg_v2IVCRk-YdPlLZl3m0LcWZm5Indwb3IV2ZQ,26931
|
|
519
520
|
datahub/ingestion/source/tableau/tableau_constant.py,sha256=ZcAeHsQUXVVL26ORly0ByZk_GJAFbxaKuJAlX_sYMac,2686
|
|
520
521
|
datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=nSyx9RzC6TCQDm-cTVJ657qT8iDwzk_8JMKpohhmOc4,1046
|
|
@@ -536,9 +537,9 @@ datahub/ingestion/source/usage/clickhouse_usage.py,sha256=jJ-EUJdS7t4d9RVjLWQQ2e
|
|
|
536
537
|
datahub/ingestion/source/usage/starburst_trino_usage.py,sha256=O3EDEZsXPNdsKGD-jStREA8e4-iTlnqd3ocqtAYFKNA,10544
|
|
537
538
|
datahub/ingestion/source/usage/usage_common.py,sha256=uuCgIduhlRL2zIAN8rymZ5cZn1WF6akZ-ZbbaVYo9_w,9813
|
|
538
539
|
datahub/ingestion/source/vertexai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
539
|
-
datahub/ingestion/source/vertexai/vertexai.py,sha256=
|
|
540
|
+
datahub/ingestion/source/vertexai/vertexai.py,sha256=RuHda0mbc1DElYZIZ_W_hvkN7Eg4LIvI1fRFMvpHPB0,56012
|
|
540
541
|
datahub/ingestion/source/vertexai/vertexai_config.py,sha256=uMnsv3b6TsPRH26u_JE_v1u0db7ANEAFlVxU5A6ELRM,989
|
|
541
|
-
datahub/ingestion/source/vertexai/vertexai_result_type_utils.py,sha256=
|
|
542
|
+
datahub/ingestion/source/vertexai/vertexai_result_type_utils.py,sha256=dJwRxuDA3flmTKjV5EUbmDFfxE0S8K1CEPB_EYUfNfI,3578
|
|
542
543
|
datahub/ingestion/source_config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
543
544
|
datahub/ingestion/source_config/csv_enricher.py,sha256=IROxxfFJA56dHkmmbjjhb7h1pZSi33tzW9sQb7ZEgac,1733
|
|
544
545
|
datahub/ingestion/source_config/operation_config.py,sha256=hxF2RM0jk0HUPXYiliMniXBC-wz-ZPcs90ZGLfHT8rE,3924
|
|
@@ -592,8 +593,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
|
|
|
592
593
|
datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
|
|
593
594
|
datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
|
|
594
595
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
595
|
-
datahub/metadata/_schema_classes.py,sha256=
|
|
596
|
-
datahub/metadata/schema.avsc,sha256=
|
|
596
|
+
datahub/metadata/_schema_classes.py,sha256=sdjR8XblvlYPYxYeLDQwVzTcYPAR9ycCa2sNOylOews,1012659
|
|
597
|
+
datahub/metadata/schema.avsc,sha256=w2P9zIqAZQmcJdw3pJxAM05m97KssRHBMGbEn_VnjKU,753146
|
|
597
598
|
datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
|
|
598
599
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
599
600
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
@@ -705,7 +706,7 @@ datahub/metadata/schemas/Cost.avsc,sha256=o4kYZSss2uEwJ6gCA9fhBUoyD5xUqcSxz78vkI
|
|
|
705
706
|
datahub/metadata/schemas/DashboardInfo.avsc,sha256=li2lSV6R4V-nz6foOi-NYxt_8ShHWfoKRw6M2BG5530,12907
|
|
706
707
|
datahub/metadata/schemas/DashboardKey.avsc,sha256=yKlusgebWTvZhVeGgRNLZW6Qu6Fg_K0e2EbV8zr3jvA,1360
|
|
707
708
|
datahub/metadata/schemas/DashboardUsageStatistics.avsc,sha256=pUAKqs49Wy5pAL92g_6QcFtJeoYeMWRGiHWS68IJN2A,7693
|
|
708
|
-
datahub/metadata/schemas/DataContractKey.avsc,sha256=
|
|
709
|
+
datahub/metadata/schemas/DataContractKey.avsc,sha256=m0ej_Wu7NcuZQCRwQI3Sidfv9bUy5mvuhlpgax6i1xA,511
|
|
709
710
|
datahub/metadata/schemas/DataContractProperties.avsc,sha256=RCxuJMlZwqEE0iHTpuXvcH6zRFoOt7ysQFPrJRp3RqE,4763
|
|
710
711
|
datahub/metadata/schemas/DataContractStatus.avsc,sha256=5yvT43AIB13Dn_h0-4s7fsL7BTuXhkK5pi2KJug4_qg,1029
|
|
711
712
|
datahub/metadata/schemas/DataFlowInfo.avsc,sha256=tDRTd1rA3v_7kwUVbQbb-cuo6D-t3pcuE4fiRz4D8f0,4682
|
|
@@ -952,7 +953,7 @@ datahub/testing/check_str_enum.py,sha256=yqk0XXHOGteN-IGqCp5JHy0Kca13BnI09ZqKc4N
|
|
|
952
953
|
datahub/testing/compare_metadata_json.py,sha256=mTU5evu7KLS3cx8OLOC1fFxj0eY1J1CGV2PEQZmapos,5361
|
|
953
954
|
datahub/testing/docker_utils.py,sha256=g169iy_jNR_mg0p8X31cChZqjOryutAIHUYLq3xqueY,2415
|
|
954
955
|
datahub/testing/doctest.py,sha256=1_8WEhHZ2eRQtw8vsXKzr9L5zzvs0Tcr6q4mnkyyvtw,295
|
|
955
|
-
datahub/testing/mcp_diff.py,sha256=
|
|
956
|
+
datahub/testing/mcp_diff.py,sha256=1BpQ3hST46cOQi1SmKdsto3j6x6Sk6yHm0vG1w9IDL0,10749
|
|
956
957
|
datahub/testing/pytest_hooks.py,sha256=eifmj0M68AIfjTn_-0vtaBkKl75vNKMjsbYX-pJqmGY,1417
|
|
957
958
|
datahub/upgrade/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
958
959
|
datahub/upgrade/upgrade.py,sha256=lf60_dCu51twObAL5E8NqdrW3_2lsnUJUaB9MSEVXwI,16638
|
|
@@ -1045,8 +1046,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1045
1046
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1046
1047
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1047
1048
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1048
|
-
acryl_datahub-1.0.0.
|
|
1049
|
-
acryl_datahub-1.0.0.
|
|
1050
|
-
acryl_datahub-1.0.0.
|
|
1051
|
-
acryl_datahub-1.0.0.
|
|
1052
|
-
acryl_datahub-1.0.0.
|
|
1049
|
+
acryl_datahub-1.0.0.3rc4.dist-info/METADATA,sha256=66uBY8gH_YmKeBmwbECGBpJhT7JiyrJXfFJv5wcFgQQ,176965
|
|
1050
|
+
acryl_datahub-1.0.0.3rc4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
1051
|
+
acryl_datahub-1.0.0.3rc4.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
|
|
1052
|
+
acryl_datahub-1.0.0.3rc4.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1053
|
+
acryl_datahub-1.0.0.3rc4.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import collections
|
|
2
|
-
from typing import Iterable, List, Optional, Tuple
|
|
2
|
+
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
|
3
3
|
|
|
4
4
|
from ruamel.yaml import YAML
|
|
5
5
|
from typing_extensions import Literal
|
|
@@ -25,6 +25,8 @@ from datahub.metadata.schema_classes import (
|
|
|
25
25
|
FreshnessContractClass,
|
|
26
26
|
SchemaContractClass,
|
|
27
27
|
StatusClass,
|
|
28
|
+
StructuredPropertiesClass,
|
|
29
|
+
StructuredPropertyValueAssignmentClass,
|
|
28
30
|
)
|
|
29
31
|
from datahub.utilities.urns.urn import guess_entity_type
|
|
30
32
|
|
|
@@ -47,8 +49,12 @@ class DataContract(v1_ConfigModel):
|
|
|
47
49
|
entity: str = v1_Field(
|
|
48
50
|
description="The entity urn that the Data Contract is associated with"
|
|
49
51
|
)
|
|
50
|
-
|
|
51
|
-
|
|
52
|
+
properties: Optional[Dict[str, Union[str, float, List[Union[str, float]]]]] = (
|
|
53
|
+
v1_Field(
|
|
54
|
+
default=None,
|
|
55
|
+
description="Structured properties associated with the data contract.",
|
|
56
|
+
)
|
|
57
|
+
)
|
|
52
58
|
|
|
53
59
|
schema_field: Optional[SchemaAssertion] = v1_Field(default=None, alias="schema")
|
|
54
60
|
|
|
@@ -172,6 +178,30 @@ class DataContract(v1_ConfigModel):
|
|
|
172
178
|
)
|
|
173
179
|
yield from dq_assertion_mcps
|
|
174
180
|
|
|
181
|
+
# Construct the structured properties aspect if properties are defined
|
|
182
|
+
structured_properties_aspect: Optional[StructuredPropertiesClass] = None
|
|
183
|
+
if self.properties:
|
|
184
|
+
property_assignments: List[StructuredPropertyValueAssignmentClass] = []
|
|
185
|
+
for key, value in self.properties.items():
|
|
186
|
+
# Use f-string formatting for the property URN, like in dataset.py
|
|
187
|
+
prop_urn = f"urn:li:structuredProperty:{key}"
|
|
188
|
+
# Ensure value is a list for StructuredPropertyValueAssignmentClass
|
|
189
|
+
values_list = value if isinstance(value, list) else [value]
|
|
190
|
+
property_assignments.append(
|
|
191
|
+
StructuredPropertyValueAssignmentClass(
|
|
192
|
+
propertyUrn=prop_urn,
|
|
193
|
+
values=[
|
|
194
|
+
str(v) for v in values_list
|
|
195
|
+
], # Ensure all values are strings
|
|
196
|
+
)
|
|
197
|
+
)
|
|
198
|
+
if (
|
|
199
|
+
property_assignments
|
|
200
|
+
): # Only create aspect if there are valid assignments
|
|
201
|
+
structured_properties_aspect = StructuredPropertiesClass(
|
|
202
|
+
properties=property_assignments
|
|
203
|
+
)
|
|
204
|
+
|
|
175
205
|
# Now that we've generated the assertions, we can generate
|
|
176
206
|
# the actual data contract.
|
|
177
207
|
yield from MetadataChangeProposalWrapper.construct_many(
|
|
@@ -202,6 +232,8 @@ class DataContract(v1_ConfigModel):
|
|
|
202
232
|
if True
|
|
203
233
|
else None
|
|
204
234
|
),
|
|
235
|
+
# Add structured properties aspect if defined
|
|
236
|
+
structured_properties_aspect,
|
|
205
237
|
],
|
|
206
238
|
)
|
|
207
239
|
|
|
@@ -108,7 +108,9 @@ class DataJob:
|
|
|
108
108
|
return [tags]
|
|
109
109
|
|
|
110
110
|
def generate_mcp(
|
|
111
|
-
self,
|
|
111
|
+
self,
|
|
112
|
+
generate_lineage: bool = True,
|
|
113
|
+
materialize_iolets: bool = True,
|
|
112
114
|
) -> Iterable[MetadataChangeProposalWrapper]:
|
|
113
115
|
env: Optional[str] = None
|
|
114
116
|
if self.flow_urn.cluster.upper() in builder.ALL_ENV_TYPES:
|
|
@@ -152,9 +154,10 @@ class DataJob:
|
|
|
152
154
|
)
|
|
153
155
|
yield mcp
|
|
154
156
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
157
|
+
if generate_lineage:
|
|
158
|
+
yield from self.generate_data_input_output_mcp(
|
|
159
|
+
materialize_iolets=materialize_iolets
|
|
160
|
+
)
|
|
158
161
|
|
|
159
162
|
for owner in self.generate_ownership_aspect():
|
|
160
163
|
mcp = MetadataChangeProposalWrapper(
|
datahub/emitter/mcp.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import json
|
|
3
|
-
from typing import TYPE_CHECKING, List, Optional, Sequence, Tuple, Union
|
|
3
|
+
from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union
|
|
4
4
|
|
|
5
5
|
from datahub.emitter.aspect import ASPECT_MAP, JSON_CONTENT_TYPE
|
|
6
6
|
from datahub.emitter.serialization_helper import post_json_transform, pre_json_transform
|
|
@@ -69,6 +69,7 @@ class MetadataChangeProposalWrapper:
|
|
|
69
69
|
aspectName: Union[None, str] = None
|
|
70
70
|
aspect: Union[None, _Aspect] = None
|
|
71
71
|
systemMetadata: Union[None, SystemMetadataClass] = None
|
|
72
|
+
headers: Union[None, Dict[str, str]] = None
|
|
72
73
|
|
|
73
74
|
def __post_init__(self) -> None:
|
|
74
75
|
if self.entityUrn and self.entityType == _ENTITY_TYPE_UNSET:
|
|
@@ -112,6 +113,7 @@ class MetadataChangeProposalWrapper:
|
|
|
112
113
|
auditHeader=self.auditHeader,
|
|
113
114
|
aspectName=self.aspectName,
|
|
114
115
|
systemMetadata=self.systemMetadata,
|
|
116
|
+
headers=self.headers,
|
|
115
117
|
)
|
|
116
118
|
|
|
117
119
|
def make_mcp(self) -> MetadataChangeProposalClass:
|
|
@@ -211,6 +213,7 @@ class MetadataChangeProposalWrapper:
|
|
|
211
213
|
aspectName=mcpc.aspectName,
|
|
212
214
|
aspect=aspect,
|
|
213
215
|
systemMetadata=mcpc.systemMetadata,
|
|
216
|
+
headers=mcpc.headers,
|
|
214
217
|
)
|
|
215
218
|
else:
|
|
216
219
|
return None
|
|
@@ -228,6 +231,7 @@ class MetadataChangeProposalWrapper:
|
|
|
228
231
|
changeType=mcl.changeType,
|
|
229
232
|
auditHeader=mcl.auditHeader,
|
|
230
233
|
systemMetadata=mcl.systemMetadata,
|
|
234
|
+
headers=mcl.headers,
|
|
231
235
|
)
|
|
232
236
|
return cls.try_from_mcpc(mcpc) or mcpc
|
|
233
237
|
|
|
@@ -555,18 +555,20 @@ class Pipeline:
|
|
|
555
555
|
def raise_from_status(self, raise_warnings: bool = False) -> None:
|
|
556
556
|
if self.source.get_report().failures:
|
|
557
557
|
raise PipelineExecutionError(
|
|
558
|
-
"Source reported errors", self.source.get_report()
|
|
558
|
+
"Source reported errors", self.source.get_report().failures
|
|
559
559
|
)
|
|
560
560
|
if self.sink.get_report().failures:
|
|
561
|
-
raise PipelineExecutionError(
|
|
561
|
+
raise PipelineExecutionError(
|
|
562
|
+
"Sink reported errors", self.sink.get_report().failures
|
|
563
|
+
)
|
|
562
564
|
if raise_warnings:
|
|
563
565
|
if self.source.get_report().warnings:
|
|
564
566
|
raise PipelineExecutionError(
|
|
565
|
-
"Source reported warnings", self.source.get_report()
|
|
567
|
+
"Source reported warnings", self.source.get_report().warnings
|
|
566
568
|
)
|
|
567
569
|
if self.sink.get_report().warnings:
|
|
568
570
|
raise PipelineExecutionError(
|
|
569
|
-
"Sink reported warnings", self.sink.get_report()
|
|
571
|
+
"Sink reported warnings", self.sink.get_report().warnings
|
|
570
572
|
)
|
|
571
573
|
|
|
572
574
|
def log_ingestion_stats(self) -> None:
|
|
@@ -7,6 +7,7 @@ from typing import Any, Callable, Iterable, List, Optional, Tuple, TypeVar, Unio
|
|
|
7
7
|
from mlflow import MlflowClient
|
|
8
8
|
from mlflow.entities import Dataset as MlflowDataset, Experiment, Run
|
|
9
9
|
from mlflow.entities.model_registry import ModelVersion, RegisteredModel
|
|
10
|
+
from mlflow.exceptions import MlflowException
|
|
10
11
|
from mlflow.store.entities import PagedList
|
|
11
12
|
from pydantic.fields import Field
|
|
12
13
|
|
|
@@ -589,8 +590,8 @@ class MLflowSource(StatefulIngestionSourceBase):
|
|
|
589
590
|
)
|
|
590
591
|
return runs
|
|
591
592
|
|
|
592
|
-
@staticmethod
|
|
593
593
|
def _traverse_mlflow_search_func(
|
|
594
|
+
self,
|
|
594
595
|
search_func: Callable[..., PagedList[T]],
|
|
595
596
|
**kwargs: Any,
|
|
596
597
|
) -> Iterable[T]:
|
|
@@ -598,12 +599,24 @@ class MLflowSource(StatefulIngestionSourceBase):
|
|
|
598
599
|
Utility to traverse an MLflow search_* functions which return PagedList.
|
|
599
600
|
"""
|
|
600
601
|
next_page_token = None
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
602
|
+
try:
|
|
603
|
+
while True:
|
|
604
|
+
paged_list = search_func(page_token=next_page_token, **kwargs)
|
|
605
|
+
yield from paged_list.to_list()
|
|
606
|
+
next_page_token = paged_list.token
|
|
607
|
+
if not next_page_token:
|
|
608
|
+
return
|
|
609
|
+
except MlflowException as e:
|
|
610
|
+
if e.error_code == "ENDPOINT_NOT_FOUND":
|
|
611
|
+
self.report.warning(
|
|
612
|
+
title="MLflow API Endpoint Not Found for Experiments.",
|
|
613
|
+
message="Please upgrade to version 1.28.0 or higher to ensure compatibility. Skipping ingestion for experiments and runs.",
|
|
614
|
+
context=None,
|
|
615
|
+
exc=e,
|
|
616
|
+
)
|
|
606
617
|
return
|
|
618
|
+
else:
|
|
619
|
+
raise # Only re-raise other exceptions
|
|
607
620
|
|
|
608
621
|
def _get_latest_version(self, registered_model: RegisteredModel) -> Optional[str]:
|
|
609
622
|
return (
|
|
@@ -192,6 +192,11 @@ class SupportedDataPlatform(Enum):
|
|
|
192
192
|
datahub_data_platform_name="mysql",
|
|
193
193
|
)
|
|
194
194
|
|
|
195
|
+
ODBC = DataPlatformPair(
|
|
196
|
+
powerbi_data_platform_name="Odbc",
|
|
197
|
+
datahub_data_platform_name="odbc",
|
|
198
|
+
)
|
|
199
|
+
|
|
195
200
|
|
|
196
201
|
@dataclass
|
|
197
202
|
class PowerBiDashboardSourceReport(StaleEntityRemovalSourceReport):
|
|
@@ -341,6 +346,13 @@ class PowerBiDashboardSourceConfig(
|
|
|
341
346
|
"For Google BigQuery the datasource's server is google bigquery project name. "
|
|
342
347
|
"For Databricks Unity Catalog the datasource's server is workspace FQDN.",
|
|
343
348
|
)
|
|
349
|
+
# ODBC DSN to platform mapping
|
|
350
|
+
dsn_to_platform_name: Dict[str, str] = pydantic.Field(
|
|
351
|
+
default={},
|
|
352
|
+
description="A mapping of ODBC DSN to DataHub data platform name. "
|
|
353
|
+
"For example with an ODBC connection string 'DSN=database' where the database type "
|
|
354
|
+
"is 'PostgreSQL' you would configure the mapping as 'database: postgres'.",
|
|
355
|
+
)
|
|
344
356
|
# deprecated warning
|
|
345
357
|
_dataset_type_mapping = pydantic_field_deprecated(
|
|
346
358
|
"dataset_type_mapping",
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Optional, Tuple, Union
|
|
3
|
+
|
|
4
|
+
server_patterns = [
|
|
5
|
+
r"Server=([^:]+)[:][0-9]+/.*",
|
|
6
|
+
r"SERVER=\{([^}]*)\}",
|
|
7
|
+
r"SERVER=([^;]*)",
|
|
8
|
+
r"HOST=\{([^}]*)\}",
|
|
9
|
+
r"HOST=([^;]*)",
|
|
10
|
+
r"DATA SOURCE=\{([^}]*)\}",
|
|
11
|
+
r"DATA SOURCE=([^;]*)",
|
|
12
|
+
r"DSN=\{([^}]*)\}",
|
|
13
|
+
r"DSN=([^;]*)",
|
|
14
|
+
r"Server=([^;]*)",
|
|
15
|
+
r"S3OutputLocation=([^;]*)",
|
|
16
|
+
r"HTTPPath=([^;]*)",
|
|
17
|
+
r"Host=([^;]*)",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
dsn_patterns = [
|
|
21
|
+
r"DSN\s*=\s*\"([^\"]+)\"",
|
|
22
|
+
r"DSN\s*=\s*\'([^\']+)\'",
|
|
23
|
+
r"DSN\s*=\s*([^;]+)",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
platform_patterns = {
|
|
27
|
+
"mysql": r"mysql",
|
|
28
|
+
"postgres": r"post(gre(s|sql)?|gres)",
|
|
29
|
+
"mssql": r"(sql\s*server|mssql|sqlncli)",
|
|
30
|
+
"oracle": r"oracle",
|
|
31
|
+
"db2": r"db2",
|
|
32
|
+
"sqlite": r"sqlite",
|
|
33
|
+
"access": r"(access|\.mdb|\.accdb)",
|
|
34
|
+
"excel": r"(excel|\.xls)",
|
|
35
|
+
"firebird": r"firebird",
|
|
36
|
+
"informix": r"informix",
|
|
37
|
+
"sybase": r"sybase",
|
|
38
|
+
"teradata": r"teradata",
|
|
39
|
+
"hadoop": r"(hadoop|hive)",
|
|
40
|
+
"snowflake": r"snowflake",
|
|
41
|
+
"redshift": r"redshift",
|
|
42
|
+
"bigquery": r"bigquery",
|
|
43
|
+
"athena": r"(athena|aws\s*athena)",
|
|
44
|
+
"databricks": r"(databricks|spark)",
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
powerbi_platform_names = {
|
|
48
|
+
"mysql": "MySQL",
|
|
49
|
+
"postgres": "PostgreSQL",
|
|
50
|
+
"mssql": "SQL Server",
|
|
51
|
+
"oracle": "Oracle",
|
|
52
|
+
"db2": "IBM DB2",
|
|
53
|
+
"sqlite": "SQLite",
|
|
54
|
+
"access": "Microsoft Access",
|
|
55
|
+
"excel": "Microsoft Excel",
|
|
56
|
+
"firebird": "Firebird",
|
|
57
|
+
"informix": "IBM Informix",
|
|
58
|
+
"sybase": "SAP Sybase",
|
|
59
|
+
"teradata": "Teradata",
|
|
60
|
+
"hadoop": "Hadoop",
|
|
61
|
+
"snowflake": "Snowflake",
|
|
62
|
+
"redshift": "Amazon Redshift",
|
|
63
|
+
"bigquery": "Google BigQuery",
|
|
64
|
+
"athena": "Amazon Athena",
|
|
65
|
+
"databricks": "Databricks",
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def extract_driver(connection_string: str) -> Union[str, None]:
|
|
70
|
+
"""
|
|
71
|
+
Parse an ODBC connection string and extract the driver name.
|
|
72
|
+
Handles whitespace in driver names and various connection string formats.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
connection_string (str): The ODBC connection string
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
str: The extracted driver name, or None if not found
|
|
79
|
+
"""
|
|
80
|
+
# Match DRIVER={driver name} pattern
|
|
81
|
+
driver_match = re.search(r"DRIVER=\{([^}]*)}", connection_string, re.IGNORECASE)
|
|
82
|
+
|
|
83
|
+
if driver_match:
|
|
84
|
+
return driver_match.group(1).strip()
|
|
85
|
+
|
|
86
|
+
# Alternative pattern for DRIVER=driver
|
|
87
|
+
driver_match = re.search(r"DRIVER=([^;]*)", connection_string, re.IGNORECASE)
|
|
88
|
+
|
|
89
|
+
if driver_match:
|
|
90
|
+
return driver_match.group(1).strip()
|
|
91
|
+
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def extract_dsn(connection_string: str) -> Union[str, None]:
|
|
96
|
+
"""
|
|
97
|
+
Extract the DSN value from an ODBC connection string.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
connection_string (str): The ODBC connection string
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
str or None: The extracted DSN value, or None if not found
|
|
104
|
+
"""
|
|
105
|
+
for pattern in dsn_patterns:
|
|
106
|
+
match = re.search(pattern, connection_string, re.IGNORECASE)
|
|
107
|
+
if match:
|
|
108
|
+
return match.group(1).strip()
|
|
109
|
+
|
|
110
|
+
return None
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def extract_server(connection_string: str) -> Union[str, None]:
|
|
114
|
+
"""
|
|
115
|
+
Parse an ODBC connection string and extract the server name.
|
|
116
|
+
Handles various parameter names for server (SERVER, Host, Data Source, etc.)
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
connection_string (str): The ODBC connection string
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
str: The extracted server name, or None if not found
|
|
123
|
+
"""
|
|
124
|
+
for pattern in server_patterns:
|
|
125
|
+
server_match = re.search(pattern, connection_string, re.IGNORECASE)
|
|
126
|
+
if server_match:
|
|
127
|
+
return server_match.group(1).strip()
|
|
128
|
+
|
|
129
|
+
# Special case for Athena: extract from AwsRegion if no server found
|
|
130
|
+
region_match = re.search(r"AwsRegion=([^;]*)", connection_string, re.IGNORECASE)
|
|
131
|
+
if region_match:
|
|
132
|
+
return f"aws-athena-{region_match.group(1).strip()}"
|
|
133
|
+
|
|
134
|
+
# Special case for Databricks: try to extract hostname from JDBC URL
|
|
135
|
+
jdbc_match = re.search(r"jdbc:spark://([^:;/]+)", connection_string, re.IGNORECASE)
|
|
136
|
+
if jdbc_match:
|
|
137
|
+
return jdbc_match.group(1).strip()
|
|
138
|
+
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def extract_platform(connection_string: str) -> Tuple[Optional[str], Optional[str]]:
|
|
143
|
+
"""
|
|
144
|
+
Extract the database platform name from the ODBC driver name.
|
|
145
|
+
Returns the lowercase platform name.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
connection_string (str): The ODBC connection string
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
tuple: A tuple containing the normalized platform name and the corresponding
|
|
152
|
+
Power BI platform name, or None if not recognized.
|
|
153
|
+
"""
|
|
154
|
+
driver_name = extract_driver(connection_string)
|
|
155
|
+
if not driver_name:
|
|
156
|
+
return None, None
|
|
157
|
+
|
|
158
|
+
driver_lower = driver_name.lower()
|
|
159
|
+
|
|
160
|
+
for platform, pattern in platform_patterns.items():
|
|
161
|
+
if re.search(pattern, driver_lower):
|
|
162
|
+
return platform, powerbi_platform_names.get(platform)
|
|
163
|
+
|
|
164
|
+
return None, None
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def normalize_platform_name(platform: str) -> Tuple[Optional[str], Optional[str]]:
|
|
168
|
+
"""
|
|
169
|
+
Normalizes the platform name by matching it with predefined patterns and maps it to
|
|
170
|
+
a corresponding Power BI platform name.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
platform (str): The platform name to normalize
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
tuple: A tuple containing the normalized platform name and the corresponding
|
|
177
|
+
Power BI platform name, or None if not recognized.
|
|
178
|
+
"""
|
|
179
|
+
platform_lower = platform.lower()
|
|
180
|
+
|
|
181
|
+
for platform, pattern in platform_patterns.items():
|
|
182
|
+
if re.search(pattern, platform_lower):
|
|
183
|
+
return platform, powerbi_platform_names.get(platform)
|
|
184
|
+
|
|
185
|
+
return None, None
|