acryl-datahub 1.0.0rc9__py3-none-any.whl → 1.0.0rc11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (36) hide show
  1. {acryl_datahub-1.0.0rc9.dist-info → acryl_datahub-1.0.0rc11.dist-info}/METADATA +2445 -2446
  2. {acryl_datahub-1.0.0rc9.dist-info → acryl_datahub-1.0.0rc11.dist-info}/RECORD +36 -35
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/common/serialized_value.py +4 -3
  5. datahub/emitter/mce_builder.py +28 -13
  6. datahub/ingestion/source/common/subtypes.py +7 -0
  7. datahub/ingestion/source/iceberg/iceberg_common.py +40 -1
  8. datahub/ingestion/source/identity/okta.py +22 -0
  9. datahub/ingestion/source/metabase.py +3 -3
  10. datahub/ingestion/source/metadata/business_glossary.py +45 -3
  11. datahub/ingestion/source/mode.py +1 -1
  12. datahub/ingestion/source/redshift/config.py +4 -0
  13. datahub/ingestion/source/redshift/datashares.py +236 -0
  14. datahub/ingestion/source/redshift/lineage.py +6 -2
  15. datahub/ingestion/source/redshift/lineage_v2.py +7 -4
  16. datahub/ingestion/source/redshift/profile.py +1 -1
  17. datahub/ingestion/source/redshift/query.py +125 -33
  18. datahub/ingestion/source/redshift/redshift.py +41 -72
  19. datahub/ingestion/source/redshift/redshift_schema.py +166 -6
  20. datahub/ingestion/source/redshift/report.py +3 -0
  21. datahub/ingestion/source/sql/mssql/job_models.py +29 -0
  22. datahub/ingestion/source/sql/mssql/source.py +10 -4
  23. datahub/ingestion/source/sql/oracle.py +93 -63
  24. datahub/metadata/_schema_classes.py +5 -5
  25. datahub/metadata/_urns/urn_defs.py +24 -0
  26. datahub/metadata/schema.avsc +2 -1
  27. datahub/metadata/schemas/DomainKey.avsc +2 -1
  28. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  29. datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -1
  30. datahub/metadata/schemas/MLModelGroupKey.avsc +2 -1
  31. datahub/metadata/schemas/MLModelKey.avsc +2 -1
  32. datahub/sql_parsing/sql_parsing_common.py +7 -0
  33. {acryl_datahub-1.0.0rc9.dist-info → acryl_datahub-1.0.0rc11.dist-info}/LICENSE +0 -0
  34. {acryl_datahub-1.0.0rc9.dist-info → acryl_datahub-1.0.0rc11.dist-info}/WHEEL +0 -0
  35. {acryl_datahub-1.0.0rc9.dist-info → acryl_datahub-1.0.0rc11.dist-info}/entry_points.txt +0 -0
  36. {acryl_datahub-1.0.0rc9.dist-info → acryl_datahub-1.0.0rc11.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
- datahub/_version.py,sha256=Lb1n-6UK3dLoc2_soJ-gDgjrGTnKGEGSjla124hRAcA,321
3
+ datahub/_version.py,sha256=m3vMOf1XXwW_i72T14wHeXSyYmTku5A-KQz7nxQXArM,322
4
4
  datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
5
5
  datahub/errors.py,sha256=w6h8b27j9XlmPbTwqpu7-wgiTrXlHzcnUOnJ_iOrwzo,520
6
6
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -27,7 +27,7 @@ datahub/api/entities/assertion/sql_assertion.py,sha256=myJU-Wf8O-RbiyU_Xlbp2cacw
27
27
  datahub/api/entities/assertion/volume_assertion.py,sha256=37bNLGP-81MvcZj_cVHvrdw5I4aBxkER0xN0ZqyB3NU,3360
28
28
  datahub/api/entities/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
29
  datahub/api/entities/common/data_platform_instance.py,sha256=AVqQ-yactNZi_bislIEUcQZCGovaHY-gQi1EY7PVsT4,1065
30
- datahub/api/entities/common/serialized_value.py,sha256=pfw16vH2sG6U5kgBsmuEFBxJ0vL9CZpOQ5HZGOFbbQ0,5538
30
+ datahub/api/entities/common/serialized_value.py,sha256=DFPK7p4OwqRTOnH8luEWzqH_4vQHZSNxFIL63x_o2ok,5565
31
31
  datahub/api/entities/corpgroup/__init__.py,sha256=Uf3SxsZUSY-yZ2Kx3-1dWwz600D1C4Ds_z_nG7hwanA,63
32
32
  datahub/api/entities/corpgroup/corpgroup.py,sha256=XSrGHCwl7lMNtzWviMzZbw8VDdesXC2HLZP5kpHt2fQ,8878
33
33
  datahub/api/entities/corpuser/__init__.py,sha256=RspO1ceu6q2zUqYqZqRRY_MPcP7PNdd2lQoZn-KfeQE,60
@@ -119,7 +119,7 @@ datahub/emitter/composite_emitter.py,sha256=ZU-IdlAXKGPtmyT0JJgYC09vRn-TmeNaA6VP
119
119
  datahub/emitter/enum_helpers.py,sha256=QBOEUu_hDCvyL_v4ayNQV8XwJbf5zKyu0Xat0mI1Kgo,376
120
120
  datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
121
121
  datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
122
- datahub/emitter/mce_builder.py,sha256=9wjXG1WmWZUN7-_JdRJ5OcH8IPG0b3TGzxry4yscOR0,16545
122
+ datahub/emitter/mce_builder.py,sha256=8UiG2VsYgC7n29h_y4qL6F9faGwwMZF3zGscl_CBT9s,16808
123
123
  datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
124
124
  datahub/emitter/mcp_builder.py,sha256=Q1bX2BthNvZ7ae71XYF6ICoiN8IOqaAd_h3zOct57Q0,11752
125
125
  datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
@@ -202,9 +202,9 @@ datahub/ingestion/source/ge_data_profiler.py,sha256=C93ZZrtIRVL6pDpQ3fn7ZbbJiZmH
202
202
  datahub/ingestion/source/ge_profiling_config.py,sha256=FlWfXoVoayabVXNMB9qETEU0GX0az6HYqNUZRnIu_fQ,10866
203
203
  datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suabri1yV5geaAPjzg2eORDc,2559
204
204
  datahub/ingestion/source/ldap.py,sha256=CNr3foofIpoCXu_GGqfcajlQE2qkHr5isYwVcDutdkk,18695
205
- datahub/ingestion/source/metabase.py,sha256=6mUPZrgv0Yrdu_crYWjbd1B6dRKx1YCRAz9uocIZYXw,32588
205
+ datahub/ingestion/source/metabase.py,sha256=j8DRV2GvisezidL1JZ5HJLF_hdFdtvaoyDoEdEyh0Ks,32603
206
206
  datahub/ingestion/source/mlflow.py,sha256=cqQivSyrptm15vn--xbT7eTRHJJVKMmQpoVqfzuDIDU,12858
207
- datahub/ingestion/source/mode.py,sha256=26gB13L6Eflm8fle_e31x-FnLn41WdRsBmelsiFavu4,63627
207
+ datahub/ingestion/source/mode.py,sha256=w85zCIZicfABx5dKCupsGpH1tgUMhS1El-jIWa2gwNU,63632
208
208
  datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-OPSNGU,21237
209
209
  datahub/ingestion/source/nifi.py,sha256=w5TPnqPmpotvzSsJROi6nUiHWPUVC6u1g0CzXIE6FNs,56903
210
210
  datahub/ingestion/source/openapi.py,sha256=39ep3etbWh8NBPjTXXwH3mieC5P6bMVAjhvK7UvcTis,17372
@@ -268,7 +268,7 @@ datahub/ingestion/source/cassandra/cassandra_profiling.py,sha256=DkSIryZNwLei5Pa
268
268
  datahub/ingestion/source/cassandra/cassandra_utils.py,sha256=j-LidYkaCTmGnpUVNLsax_c3z32PsQbsbHeYojygd1s,5105
269
269
  datahub/ingestion/source/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
270
270
  datahub/ingestion/source/common/data_reader.py,sha256=XbSxiRTYrk6seOz0ZjVjzSpGvP8lEjmqXrNI4cdYYmQ,1819
271
- datahub/ingestion/source/common/subtypes.py,sha256=S0ssIxV7V38HGQwl-h5izYWyj1MQgmvJk4k_Q-5VGJ8,2329
271
+ datahub/ingestion/source/common/subtypes.py,sha256=EiYSjBHiRvGjRB5wjKEfS5b_k9tQCFWMP1ADw_1p-CY,2525
272
272
  datahub/ingestion/source/data_lake_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
273
273
  datahub/ingestion/source/data_lake_common/config.py,sha256=qUk83B01hjuBKHvVz8SmXnVCy5eFj-2-2QLEOrAdbgk,359
274
274
  datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=nxu7osuzqxScPFc-1ODA2M1c_xPNPpRH_SMMU7zKOIE,6212
@@ -324,11 +324,11 @@ datahub/ingestion/source/grafana/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
324
324
  datahub/ingestion/source/grafana/grafana_source.py,sha256=3pU3xodPgS5lmnjuQ_u7F0XPzD_Y8MnPlMxRJ86qz4g,4960
325
325
  datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
326
326
  datahub/ingestion/source/iceberg/iceberg.py,sha256=pMWQtn88XAYwZsRNkICX1GlQOqOnyuWdLpkcjVQEon0,29039
327
- datahub/ingestion/source/iceberg/iceberg_common.py,sha256=krt-41r90t0CkNeJXsiwO-p5zJIulI-tyq3xaU2yw_c,10645
327
+ datahub/ingestion/source/iceberg/iceberg_common.py,sha256=VGosqYPmn_j6GETSnDHZ8Ay1BVOedmx2x5LHxw16I3A,12278
328
328
  datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=CkBB5fryMVoqqCM6eLSIeb4yP85ABHONNRm0QqZKrnw,9977
329
329
  datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
330
330
  datahub/ingestion/source/identity/azure_ad.py,sha256=9Hrvm4CSfc02yjnPUsCYSY4Qw9fXPnDFWLexab0mcpc,28559
331
- datahub/ingestion/source/identity/okta.py,sha256=ZVvRgFUyJ2jUSq0RS_0Cx-2J4oxMbruKhk7blts_HaU,31231
331
+ datahub/ingestion/source/identity/okta.py,sha256=jC21myJuMRTaPgj0OD9heaC-mz8ECjqpy2hSJwlUSwM,31943
332
332
  datahub/ingestion/source/kafka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
333
333
  datahub/ingestion/source/kafka/kafka.py,sha256=mboUWQmlumEwcXwY2POeK1L8tdk5-CABakZ-MWbvdNQ,26579
334
334
  datahub/ingestion/source/kafka/kafka_schema_registry_base.py,sha256=13XjSwqyVhH1CJUFHAbWdmmv_Rw0Ju_9HQdBmIzPNNA,566
@@ -360,7 +360,7 @@ datahub/ingestion/source/looker/str_functions.py,sha256=zceEX2ka_4WaWwWgEdyknUSz
360
360
  datahub/ingestion/source/looker/urn_functions.py,sha256=4VvqEfGvIMq3rNHHps0-HlPurMPnpqdxNtDAOOHIZww,528
361
361
  datahub/ingestion/source/looker/view_upstream.py,sha256=4FCjZaU6p2G7npB2RJpP4Gv2yLjbvbsYWEbAg55IvjY,26110
362
362
  datahub/ingestion/source/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
363
- datahub/ingestion/source/metadata/business_glossary.py,sha256=yySwJp2SCUQp8hRwN2lQuSqvOQowIhCKDKj9syhlTZA,18210
363
+ datahub/ingestion/source/metadata/business_glossary.py,sha256=T_RJHst6iQRghJNmLLPeSBMEDsbEKf3yBldOAgMcGuo,19666
364
364
  datahub/ingestion/source/metadata/lineage.py,sha256=2iK-hsORWm7NSvMZcG4D5hb8_PH57g-u6LWbu_f7HM4,9521
365
365
  datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
366
366
  datahub/ingestion/source/neo4j/neo4j_source.py,sha256=O3jjdnsx7IyYPBLbxowL85Qo4zs4H-maMOH4-6ZNCk4,13063
@@ -396,16 +396,17 @@ datahub/ingestion/source/qlik_sense/qlik_api.py,sha256=KoBaD1VowYrbaRg1rjDP1_mmP
396
396
  datahub/ingestion/source/qlik_sense/qlik_sense.py,sha256=bmhmOgSXzC6g-uqO1ljFLRNz2oo6Xjn400UQnWdMA1Y,22530
397
397
  datahub/ingestion/source/qlik_sense/websocket_connection.py,sha256=jp39OInvjCN9BtnKsHU_aa1B3X9hVHqSmD25stXuqHk,1940
398
398
  datahub/ingestion/source/redshift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
399
- datahub/ingestion/source/redshift/config.py,sha256=S0yQ9wZKdGjWeeziWHpPECJ3wGYWBIsdhS3YJ5oAX_Y,8853
399
+ datahub/ingestion/source/redshift/config.py,sha256=l_hlgsCjvlcgcFQpd5WMKlW8nqQUhaMGec8FnUbSl6Y,8997
400
+ datahub/ingestion/source/redshift/datashares.py,sha256=kH3YkoenOa59XZU12XeUf283lOOAITYD9jOXpy8R06E,9227
400
401
  datahub/ingestion/source/redshift/exception.py,sha256=dxzYUIv5B_FAWhOuzG2u5We7FX-ar4jhOXPXAlEIvgM,2055
401
- datahub/ingestion/source/redshift/lineage.py,sha256=bUy0uJowrqSc33Z50fIxFlJkyhe-OPM_qgPh-smSTgM,43983
402
- datahub/ingestion/source/redshift/lineage_v2.py,sha256=OcVW_27sSaZOYZPTd2j-LS9SzFQ1kXz6cMzM2ZDWhJQ,16751
403
- datahub/ingestion/source/redshift/profile.py,sha256=T4H79ycq2tPobLM1tTLRtu581Qa8LlKxEok49m0AirU,4294
404
- datahub/ingestion/source/redshift/query.py,sha256=X0KlDPzM68j0SYKXhq50DkLbFUIbGuPmGCYYmr8E0v0,44353
405
- datahub/ingestion/source/redshift/redshift.py,sha256=x9dKocJdGPaNs2fRdaddaBtZNxmTJFwYDhXY5nl_5zM,44444
402
+ datahub/ingestion/source/redshift/lineage.py,sha256=Gk2dNuRBEipZkY5W1sArlfRbFR7mBKutCFHHTrn3yX4,44096
403
+ datahub/ingestion/source/redshift/lineage_v2.py,sha256=H6Qky5dLeZEICdDWyH-My78NoKlXpExHg3m-6d5lbgo,16891
404
+ datahub/ingestion/source/redshift/profile.py,sha256=jqFQUSg_qzSYi1yIAq24NFwHW8yIcSDSSh-vgJ4nl6M,4287
405
+ datahub/ingestion/source/redshift/query.py,sha256=6Fw3I8qFLflySDu6WY5D9NjXnRnDIw0yxKisSpaHh0A,47526
406
+ datahub/ingestion/source/redshift/redshift.py,sha256=IZqeQws3mvDdu9K-ixPGZNalDcRRRse-l_TTwQI7B-4,43407
406
407
  datahub/ingestion/source/redshift/redshift_data_reader.py,sha256=zc69jwXHdF-w8J4Hq-ZQ6BjHQ75Ij2iNDMpoRJlcmlU,1724
407
- datahub/ingestion/source/redshift/redshift_schema.py,sha256=9IYeUsnISenq3eVB3k-s7zK8nInWDAYViFnDrNjtkb0,19149
408
- datahub/ingestion/source/redshift/report.py,sha256=M19aUHBkd9n-BVBX4fRhyRNdVkN2b9Es6ZqInRx5ZGI,2958
408
+ datahub/ingestion/source/redshift/redshift_schema.py,sha256=WTc-j4_PYlFgaJZ3hEorGIBWKruTX57E7V_5JaUe8mU,24045
409
+ datahub/ingestion/source/redshift/report.py,sha256=O3QFozHlmMbH9b7KxbqhgTgr_0tCryj6FIzMiN6kRxw,3044
409
410
  datahub/ingestion/source/redshift/usage.py,sha256=eSdB1MYZeQokkQOwl9LPdpo-oCBJSwxJBotSpJ9XjBc,17473
410
411
  datahub/ingestion/source/s3/__init__.py,sha256=HjqFPj11WtNFZM3kcVshlDb7kOsc19-l_3LM8PBjlJM,56
411
412
  datahub/ingestion/source/s3/config.py,sha256=lElFXgEpKDT9SVoiXvtx98wV6Gp880qP4pLQaOGJGOo,7828
@@ -464,7 +465,7 @@ datahub/ingestion/source/sql/hive.py,sha256=NRUrEWnR1JN5U0q4CHlRacdKzxJhS4unFXnX
464
465
  datahub/ingestion/source/sql/hive_metastore.py,sha256=fH7bAcljapYqmF8cQE7humoufFe2RVFRYOcyavMg9yo,36103
465
466
  datahub/ingestion/source/sql/mariadb.py,sha256=Hm102kmfs_1rd4lsTYhzVMZq5S3B6cyfvpHSzJjqvMw,737
466
467
  datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_k94o7g-0,3350
467
- datahub/ingestion/source/sql/oracle.py,sha256=pQeeQarUZpC7q09zL0LlPZB0aCwHU3QBRSzxyLHGIKY,26222
468
+ datahub/ingestion/source/sql/oracle.py,sha256=it9qhUkGRHTq_F5DoEsCBLYnB02divzxDlBvXACH4Pk,27712
468
469
  datahub/ingestion/source/sql/postgres.py,sha256=uC1kYEI8VdxiZ1Y9IxMWzwmg11wtMqYN0e2fkok1rxo,11972
469
470
  datahub/ingestion/source/sql/presto.py,sha256=PB-CS5MX2dSRFRHjlxfkLHGXLZXFNCsVAAyRBtY6HMg,3611
470
471
  datahub/ingestion/source/sql/sql_common.py,sha256=r75Cd06Qwe2fqTDRZKWnIf7kpnR0BSxZ9PYBOgY0I6k,48785
@@ -481,8 +482,8 @@ datahub/ingestion/source/sql/trino.py,sha256=8viVOu67mhDnsO3LuPSRi1WDR5MLdOXu7HO
481
482
  datahub/ingestion/source/sql/two_tier_sql_source.py,sha256=YDrGBb5WKVls6qv17QU5foKrf71SydzEltc3WsVAhQc,5732
482
483
  datahub/ingestion/source/sql/vertica.py,sha256=_9OgSgIgqBml0av063rb8nACiT3SAmzpw0ouyF91wv8,33382
483
484
  datahub/ingestion/source/sql/mssql/__init__.py,sha256=1agpl8S_uDW40olkhCX_W19dbr5GO9qgjS3R7pLRZSk,87
484
- datahub/ingestion/source/sql/mssql/job_models.py,sha256=nrRDYVKah5ep7J8-ddNX2PVPY0MIPQqkvEgO33s5L3k,7988
485
- datahub/ingestion/source/sql/mssql/source.py,sha256=sHckKQ7qMJKnH6r8rj8B-9NiIGXFLiyKTLBJh1SJxmQ,32371
485
+ datahub/ingestion/source/sql/mssql/job_models.py,sha256=5-QQv8w-KnyNq_y-VmSC_K5sr0VoZhfYW6Aasd-z2LY,8901
486
+ datahub/ingestion/source/sql/mssql/source.py,sha256=QxgUWL-aSjTXmqZPD_7811MsrDsgW_I9_oMimomdE0A,32593
486
487
  datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py,sha256=RpnvKPalAAaOD_eUg8bZ4VkGTSeLFWuy0mefwc4s3x8,2837
487
488
  datahub/ingestion/source/state/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
488
489
  datahub/ingestion/source/state/checkpoint.py,sha256=-fTUZKkY4nHTFqSWZ0jJkkdIu_tWlOjRNhm4FTr4ul4,8860
@@ -575,12 +576,12 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
575
576
  datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
576
577
  datahub/lite/lite_util.py,sha256=Cm6trMTeo0X1fv4nSsW9lC0jqce7Jt-05GhOtIGzsVc,4559
577
578
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
578
- datahub/metadata/_schema_classes.py,sha256=lQPz3jGfTOCBidsC_Ap62XqRD9A4AKPqhHa3CV9jyL0,993316
579
- datahub/metadata/schema.avsc,sha256=iQJaPYHy4xrGQBEbRgn-RF4kGC1iNPhZawHTAYTyfW0,741430
579
+ datahub/metadata/_schema_classes.py,sha256=uafVvWsnAqPranXzeC9CrSAu7I1-XJOogtiBPhxmn-k,993397
580
+ datahub/metadata/schema.avsc,sha256=uPWX2Rx9A12b-p4ef4zrsjbtQPSIH8w67l3B6pq6zE0,741459
580
581
  datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
581
582
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
582
583
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
583
- datahub/metadata/_urns/urn_defs.py,sha256=aw3971Iq-qsRcvqxIIrr9a4_1IdjWgkpIjH5qI1hnaI,133309
584
+ datahub/metadata/_urns/urn_defs.py,sha256=mQ52ozRUt19MyBLNZh1f1ETlafCzCYmEbcKxAjR_8o4,133983
584
585
  datahub/metadata/com/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
585
586
  datahub/metadata/com/linkedin/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
586
587
  datahub/metadata/com/linkedin/events/__init__.py,sha256=s_dR0plZF-rOxxIbE8ojekJqwiHzl2WYR-Z3kW6kKS0,298
@@ -745,7 +746,7 @@ datahub/metadata/schemas/DatasetUsageStatistics.avsc,sha256=JKNy_KlUqr3kt7o1Cu2D
745
746
  datahub/metadata/schemas/Deprecation.avsc,sha256=SmbTlMB9fujdMBjYEQkzaU4XJzwM1gD6E8L2zoL1b4Q,1280
746
747
  datahub/metadata/schemas/DisplayProperties.avsc,sha256=MTa_g2s0roxNFFggWU8rslUH3UFe3xe11uUXyh0Go_I,1732
747
748
  datahub/metadata/schemas/Documentation.avsc,sha256=9vIJG9B08FFrC3y5c1XVaT5U3c-b5sOAc5foUxMnyCs,4836
748
- datahub/metadata/schemas/DomainKey.avsc,sha256=1_kbsMTsO2ebB3zW7KpB71QfkGGR0mAgpNOKRoWHsJU,649
749
+ datahub/metadata/schemas/DomainKey.avsc,sha256=TYCcJRWqwbxbQuR5E68pvdeAmfVdYsJuMNhTxVphbqg,676
749
750
  datahub/metadata/schemas/DomainProperties.avsc,sha256=6do6wZ9G6gyt1QowQyi1xldqgdTXspb05FaqWpKJ6eM,3843
750
751
  datahub/metadata/schemas/Domains.avsc,sha256=5mRQcba6Zmp6Y1srbxhOjETutg0I_ZG4ikuS2r9fkR0,804
751
752
  datahub/metadata/schemas/DynamicFormAssignment.avsc,sha256=SXRL5D6kIYWdGl3zLQYxPnkQX71JXQOKrjQNavFqVp0,7339
@@ -783,7 +784,7 @@ datahub/metadata/schemas/GlobalSettingsInfo.avsc,sha256=OVMM6FwhHhufHkezYcVePK0z
783
784
  datahub/metadata/schemas/GlobalSettingsKey.avsc,sha256=Yj8s5IdM9yF7xrhJcLGCPCXBWqSsrPbufBaQjlZ3JlU,563
784
785
  datahub/metadata/schemas/GlobalTags.avsc,sha256=-SurkodMqTDnPpkRV6qYqmpNWjQNvynUiPZX7EhL5uc,4624
785
786
  datahub/metadata/schemas/GlossaryNodeInfo.avsc,sha256=G1Cb-w9VxIAEhNqyiEsDL_ABRO9QxyTpUANKU6DQrFw,1888
786
- datahub/metadata/schemas/GlossaryNodeKey.avsc,sha256=XzzrUEpRCxNjWfdjXBTuYrmeMWK_-eCXw49K_eOswuw,637
787
+ datahub/metadata/schemas/GlossaryNodeKey.avsc,sha256=hT8ny4TL1WvgFvnaVBjuw6AWDiPDjpkh20f83ZT-UZ8,664
787
788
  datahub/metadata/schemas/GlossaryRelatedTerms.avsc,sha256=ZTP0mrFD4y-C6JekRy8IVuHvICUkJib-ZAYD93Gv1tA,2763
788
789
  datahub/metadata/schemas/GlossaryTermInfo.avsc,sha256=j4s9NCyMOIF03HfaXoQEIkiMTRaCy_-euhenptfu7IA,2935
789
790
  datahub/metadata/schemas/GlossaryTermKey.avsc,sha256=mkyrzmOX_BGRHbcj2ccUALbrPVJNdQbItU-VyKN7P98,836
@@ -806,12 +807,12 @@ datahub/metadata/schemas/MLFeatureTableKey.avsc,sha256=6_typ7K0Bz8x62T31IYqf9XS9
806
807
  datahub/metadata/schemas/MLFeatureTableProperties.avsc,sha256=BtrqcsxoQXObPZXSGRNYtIBJCoeHkMK_Zr_imBWF2Zk,2008
807
808
  datahub/metadata/schemas/MLHyperParam.avsc,sha256=dE6i5r6LTYMNrQe9yy-jKoP09GOJUf__1bO69ldpydc,833
808
809
  datahub/metadata/schemas/MLMetric.avsc,sha256=y8WPVVwjhu3YGtqpFFJYNYK8w778RRL_d2sHG1Dc7uM,804
809
- datahub/metadata/schemas/MLModelDeploymentKey.avsc,sha256=gmXaUYxII8BVLnXOFdlPmyhD1rUhrw455R_hL77foSU,2406
810
+ datahub/metadata/schemas/MLModelDeploymentKey.avsc,sha256=vt04jFF_ZHSvWhqLoxC8C_KspiRLkvNNIXJI0aKPF1Q,2425
810
811
  datahub/metadata/schemas/MLModelDeploymentProperties.avsc,sha256=I3v-uNOeYxO4hooPHOjafWWHuVyeGvG90oma0tzpNFg,5409
811
812
  datahub/metadata/schemas/MLModelFactorPrompts.avsc,sha256=8kX-P4F4mVLFT980z3MwIautt1_6uA-c_Z87nYNDK-k,2712
812
- datahub/metadata/schemas/MLModelGroupKey.avsc,sha256=CL3DS8BfC2hOPmuUPaLcur0IUFg9Cnexc7bQ2lRgBfI,2478
813
+ datahub/metadata/schemas/MLModelGroupKey.avsc,sha256=3LoMWejMfCwdoqz3PFinRbY1_Yy4Kypw7pwg3tL42Jg,2497
813
814
  datahub/metadata/schemas/MLModelGroupProperties.avsc,sha256=zMl6ab6zfcYJmt31f-AUrrfeqfLoaSZQpfB3_S9JFFQ,6534
814
- datahub/metadata/schemas/MLModelKey.avsc,sha256=0D6IECBL-Y5FHMdUpMEnd3e0JjoQ1YCtNTjoUovJuHU,2847
815
+ datahub/metadata/schemas/MLModelKey.avsc,sha256=pRntMhcpgTJL2T2nGK6Sf9_q2vJOqHELYFh59VMXqv0,2866
815
816
  datahub/metadata/schemas/MLModelProperties.avsc,sha256=hDCBHxGe-cmCBeU1k0ANuQlKjtZsDcTfl2X_jWmtFqo,12355
816
817
  datahub/metadata/schemas/MLPrimaryKeyKey.avsc,sha256=Kq2Q9WxZ6nQ8wR4P6wpPCI-J7FwXQyoa10s6BvXtkm8,1110
817
818
  datahub/metadata/schemas/MLPrimaryKeyProperties.avsc,sha256=URIuOpS93RVk8MZVcbZ-dmTwu_cN3KSOKxSR8fm-eTo,6744
@@ -913,7 +914,7 @@ datahub/sql_parsing/query_types.py,sha256=FKjDzszZzsrCfYfm7dgD6T_8865qxWl767fdGy
913
914
  datahub/sql_parsing/schema_resolver.py,sha256=ISuingLcQnOJZkNXBkc73uPwYUbbOtERAjgGhJajDiQ,10782
914
915
  datahub/sql_parsing/split_statements.py,sha256=6KUoIPG7H8Rja3lrPjSrSfhFfwW4oqgfoNQeTbbOWNg,8953
915
916
  datahub/sql_parsing/sql_parsing_aggregator.py,sha256=XNZWjeaRhzaT92mzsJZGJfYaxJENsyp5dSHTmL81RIc,70130
916
- datahub/sql_parsing/sql_parsing_common.py,sha256=h_V_m54hJ9EUh5kczq7cYOIeNeo4bgf0Px0H-Nq-UIg,2602
917
+ datahub/sql_parsing/sql_parsing_common.py,sha256=cZ4WvVyHZuXDGjnBvKMX2_fz2EMextB5WQWcK0_saBo,3155
917
918
  datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
918
919
  datahub/sql_parsing/sqlglot_lineage.py,sha256=l0kT8MuRIg96X7BNJaboMznF54b-yvM2nMTLyF2d0Nw,47446
919
920
  datahub/sql_parsing/sqlglot_utils.py,sha256=6W6MQ5Yh0xXT9_h0jd19yoGWMdXicyRBDD_FwV7nj04,14701
@@ -1021,9 +1022,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1021
1022
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1022
1023
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1023
1024
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1024
- acryl_datahub-1.0.0rc9.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1025
- acryl_datahub-1.0.0rc9.dist-info/METADATA,sha256=V5RVytQ19FP_0BtHEgHjQ-89KScjbKyOHx4p5yZxLOQ,175382
1026
- acryl_datahub-1.0.0rc9.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
1027
- acryl_datahub-1.0.0rc9.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
1028
- acryl_datahub-1.0.0rc9.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1029
- acryl_datahub-1.0.0rc9.dist-info/RECORD,,
1025
+ acryl_datahub-1.0.0rc11.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1026
+ acryl_datahub-1.0.0rc11.dist-info/METADATA,sha256=hZCrduEZ7Qqkr76OUpdPLHm7AApR7AQHEaKKYq9uJZE,175337
1027
+ acryl_datahub-1.0.0rc11.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
1028
+ acryl_datahub-1.0.0rc11.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
1029
+ acryl_datahub-1.0.0rc11.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1030
+ acryl_datahub-1.0.0rc11.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.0.0rc9"
3
+ __version__ = "1.0.0rc11"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -1,6 +1,6 @@
1
1
  import json
2
2
  import logging
3
- from typing import Dict, Optional, Type, Union
3
+ from typing import Dict, Optional, Type, TypeVar, Union
4
4
 
5
5
  from avrogen.dict_wrapper import DictWrapper
6
6
  from pydantic import BaseModel
@@ -13,6 +13,7 @@ logger = logging.getLogger(__name__)
13
13
  _REMAPPED_SCHEMA_TYPES = {
14
14
  k.replace("pegasus2avro.", ""): v for k, v in SCHEMA_TYPES.items()
15
15
  }
16
+ T = TypeVar("T", bound=BaseModel)
16
17
 
17
18
 
18
19
  class SerializedResourceValue(BaseModel):
@@ -83,8 +84,8 @@ class SerializedResourceValue(BaseModel):
83
84
  )
84
85
 
85
86
  def as_pydantic_object(
86
- self, model_type: Type[BaseModel], validate_schema_ref: bool = False
87
- ) -> BaseModel:
87
+ self, model_type: Type[T], validate_schema_ref: bool = False
88
+ ) -> T:
88
89
  """
89
90
  Parse the blob into a Pydantic-defined Python object based on the schema type and schema
90
91
  ref.
@@ -52,7 +52,15 @@ from datahub.metadata.schema_classes import (
52
52
  UpstreamLineageClass,
53
53
  _Aspect as AspectAbstract,
54
54
  )
55
- from datahub.metadata.urns import DataFlowUrn, DatasetUrn, TagUrn
55
+ from datahub.metadata.urns import (
56
+ ChartUrn,
57
+ DashboardUrn,
58
+ DataFlowUrn,
59
+ DataJobUrn,
60
+ DataPlatformUrn,
61
+ DatasetUrn,
62
+ TagUrn,
63
+ )
56
64
  from datahub.utilities.urn_encoder import UrnEncoder
57
65
 
58
66
  logger = logging.getLogger(__name__)
@@ -119,7 +127,7 @@ def parse_ts_millis(ts: Optional[float]) -> Optional[datetime]:
119
127
  def make_data_platform_urn(platform: str) -> str:
120
128
  if platform.startswith("urn:li:dataPlatform:"):
121
129
  return platform
122
- return f"urn:li:dataPlatform:{platform}"
130
+ return DataPlatformUrn.create_from_id(platform).urn()
123
131
 
124
132
 
125
133
  def make_dataset_urn(platform: str, name: str, env: str = DEFAULT_ENV) -> str:
@@ -236,7 +244,7 @@ def make_user_urn(username: str) -> str:
236
244
  Makes a user urn if the input is not a user or group urn already
237
245
  """
238
246
  return (
239
- f"urn:li:corpuser:{username}"
247
+ f"urn:li:corpuser:{UrnEncoder.encode_string(username)}"
240
248
  if not username.startswith(("urn:li:corpuser:", "urn:li:corpGroup:"))
241
249
  else username
242
250
  )
@@ -249,7 +257,7 @@ def make_group_urn(groupname: str) -> str:
249
257
  if groupname and groupname.startswith(("urn:li:corpGroup:", "urn:li:corpuser:")):
250
258
  return groupname
251
259
  else:
252
- return f"urn:li:corpGroup:{groupname}"
260
+ return f"urn:li:corpGroup:{UrnEncoder.encode_string(groupname)}"
253
261
 
254
262
 
255
263
  def make_tag_urn(tag: str) -> str:
@@ -301,7 +309,12 @@ def make_data_flow_urn(
301
309
 
302
310
 
303
311
  def make_data_job_urn_with_flow(flow_urn: str, job_id: str) -> str:
304
- return f"urn:li:dataJob:({flow_urn},{job_id})"
312
+ data_flow_urn = DataFlowUrn.from_string(flow_urn)
313
+ data_job_urn = DataJobUrn.create_from_ids(
314
+ data_flow_urn=data_flow_urn.urn(),
315
+ job_id=job_id,
316
+ )
317
+ return data_job_urn.urn()
305
318
 
306
319
 
307
320
  def make_data_process_instance_urn(dataProcessInstanceId: str) -> str:
@@ -324,10 +337,11 @@ def make_dashboard_urn(
324
337
  platform: str, name: str, platform_instance: Optional[str] = None
325
338
  ) -> str:
326
339
  # FIXME: dashboards don't currently include data platform urn prefixes.
327
- if platform_instance:
328
- return f"urn:li:dashboard:({platform},{platform_instance}.{name})"
329
- else:
330
- return f"urn:li:dashboard:({platform},{name})"
340
+ return DashboardUrn.create_from_ids(
341
+ platform=platform,
342
+ name=name,
343
+ platform_instance=platform_instance,
344
+ ).urn()
331
345
 
332
346
 
333
347
  def dashboard_urn_to_key(dashboard_urn: str) -> Optional[DashboardKeyClass]:
@@ -342,10 +356,11 @@ def make_chart_urn(
342
356
  platform: str, name: str, platform_instance: Optional[str] = None
343
357
  ) -> str:
344
358
  # FIXME: charts don't currently include data platform urn prefixes.
345
- if platform_instance:
346
- return f"urn:li:chart:({platform},{platform_instance}.{name})"
347
- else:
348
- return f"urn:li:chart:({platform},{name})"
359
+ return ChartUrn.create_from_ids(
360
+ platform=platform,
361
+ name=name,
362
+ platform_instance=platform_instance,
363
+ ).urn()
349
364
 
350
365
 
351
366
  def chart_urn_to_key(chart_urn: str) -> Optional[ChartKeyClass]:
@@ -60,8 +60,15 @@ class BIContainerSubTypes(StrEnum):
60
60
  MODE_COLLECTION = "Collection"
61
61
 
62
62
 
63
+ class FlowContainerSubTypes(StrEnum):
64
+ MSSQL_JOB = "Job"
65
+ MSSQL_PROCEDURE_CONTAINER = "Procedures Container"
66
+
67
+
63
68
  class JobContainerSubTypes(StrEnum):
64
69
  NIFI_PROCESS_GROUP = "Process Group"
70
+ MSSQL_JOBSTEP = "Job Step"
71
+ MSSQL_STORED_PROCEDURE = "Stored Procedure"
65
72
 
66
73
 
67
74
  class BIAssetSubTypes(StrEnum):
@@ -6,7 +6,10 @@ from typing import Any, Dict, Optional
6
6
  from humanfriendly import format_timespan
7
7
  from pydantic import Field, validator
8
8
  from pyiceberg.catalog import Catalog, load_catalog
9
+ from pyiceberg.catalog.rest import RestCatalog
10
+ from requests.adapters import HTTPAdapter
9
11
  from sortedcontainers import SortedList
12
+ from urllib3.util import Retry
10
13
 
11
14
  from datahub.configuration.common import AllowDenyPattern, ConfigModel
12
15
  from datahub.configuration.source_common import DatasetSourceConfigMixin
@@ -26,6 +29,23 @@ from datahub.utilities.stats_collections import TopKDict, int_top_k_dict
26
29
 
27
30
  logger = logging.getLogger(__name__)
28
31
 
32
+ DEFAULT_REST_TIMEOUT = 120
33
+ DEFAULT_REST_RETRY_POLICY = {"total": 3, "backoff_factor": 0.1}
34
+
35
+
36
+ class TimeoutHTTPAdapter(HTTPAdapter):
37
+ def __init__(self, *args, **kwargs):
38
+ if "timeout" in kwargs:
39
+ self.timeout = kwargs["timeout"]
40
+ del kwargs["timeout"]
41
+ super().__init__(*args, **kwargs)
42
+
43
+ def send(self, request, **kwargs):
44
+ timeout = kwargs.get("timeout")
45
+ if timeout is None and hasattr(self, "timeout"):
46
+ kwargs["timeout"] = self.timeout
47
+ return super().send(request, **kwargs)
48
+
29
49
 
30
50
  class IcebergProfilingConfig(ConfigModel):
31
51
  enabled: bool = Field(
@@ -146,7 +166,26 @@ class IcebergSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin)
146
166
  logger.debug(
147
167
  "Initializing the catalog %s with config: %s", catalog_name, catalog_config
148
168
  )
149
- return load_catalog(name=catalog_name, **catalog_config)
169
+ catalog = load_catalog(name=catalog_name, **catalog_config)
170
+ if isinstance(catalog, RestCatalog):
171
+ logger.debug(
172
+ "Recognized REST catalog type being configured, attempting to configure HTTP Adapter for the session"
173
+ )
174
+ retry_policy: Dict[str, Any] = DEFAULT_REST_RETRY_POLICY.copy()
175
+ retry_policy.update(catalog_config.get("connection", {}).get("retry", {}))
176
+ retries = Retry(**retry_policy)
177
+ logger.debug(f"Retry policy to be set: {retry_policy}")
178
+ timeout = catalog_config.get("connection", {}).get(
179
+ "timeout", DEFAULT_REST_TIMEOUT
180
+ )
181
+ logger.debug(f"Timeout to be set: {timeout}")
182
+ catalog._session.mount(
183
+ "http://", TimeoutHTTPAdapter(timeout=timeout, max_retries=retries)
184
+ )
185
+ catalog._session.mount(
186
+ "https://", TimeoutHTTPAdapter(timeout=timeout, max_retries=retries)
187
+ )
188
+ return catalog
150
189
 
151
190
 
152
191
  class TopTableTimings:
@@ -666,6 +666,27 @@ class OktaSource(StatefulIngestionSourceBase):
666
666
  self.config.okta_profile_to_username_regex,
667
667
  )
668
668
 
669
+ def _map_okta_user_profile_custom_properties(
670
+ self, profile: UserProfile
671
+ ) -> Dict[str, str]:
672
+ # filter out the common fields that are already mapped to the CorpUserInfo aspect and the private ones
673
+ return {
674
+ k: str(v)
675
+ for k, v in profile.__dict__.items()
676
+ if v
677
+ and k
678
+ not in [
679
+ "displayName",
680
+ "firstName",
681
+ "lastName",
682
+ "email",
683
+ "title",
684
+ "countryCode",
685
+ "department",
686
+ ]
687
+ and not k.startswith("_")
688
+ }
689
+
669
690
  # Converts Okta User Profile into a CorpUserInfo.
670
691
  def _map_okta_user_profile(self, profile: UserProfile) -> CorpUserInfoClass:
671
692
  # TODO: Extract user's manager if provided.
@@ -683,6 +704,7 @@ class OktaSource(StatefulIngestionSourceBase):
683
704
  title=profile.title,
684
705
  countryCode=profile.countryCode,
685
706
  departmentName=profile.department,
707
+ customProperties=self._map_okta_user_profile_custom_properties(profile),
686
708
  )
687
709
 
688
710
  def _make_corp_group_urn(self, name: str) -> str:
@@ -313,7 +313,7 @@ class MetabaseSource(StatefulIngestionSourceBase):
313
313
  return None
314
314
 
315
315
  dashboard_urn = builder.make_dashboard_urn(
316
- self.platform, dashboard_details.get("id", "")
316
+ self.platform, str(dashboard_details.get("id", ""))
317
317
  )
318
318
  dashboard_snapshot = DashboardSnapshot(
319
319
  urn=dashboard_urn,
@@ -337,7 +337,7 @@ class MetabaseSource(StatefulIngestionSourceBase):
337
337
  card_id = card_info.get("card").get("id", "")
338
338
  if not card_id:
339
339
  continue # most likely a virtual card without an id (text or heading), not relevant.
340
- chart_urn = builder.make_chart_urn(self.platform, card_id)
340
+ chart_urn = builder.make_chart_urn(self.platform, str(card_id))
341
341
  chart_urns.append(chart_urn)
342
342
 
343
343
  dashboard_info_class = DashboardInfoClass(
@@ -459,7 +459,7 @@ class MetabaseSource(StatefulIngestionSourceBase):
459
459
  )
460
460
  return None
461
461
 
462
- chart_urn = builder.make_chart_urn(self.platform, card_id)
462
+ chart_urn = builder.make_chart_urn(self.platform, str(card_id))
463
463
  chart_snapshot = ChartSnapshot(
464
464
  urn=chart_urn,
465
465
  aspects=[],
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  import pathlib
3
+ import re
3
4
  import time
4
5
  from dataclasses import dataclass, field
5
6
  from typing import Any, Dict, Iterable, List, Optional, TypeVar, Union
@@ -118,17 +119,58 @@ class BusinessGlossaryConfig(DefaultConfig):
118
119
  return v
119
120
 
120
121
 
122
+ def clean_url(text: str) -> str:
123
+ """
124
+ Clean text for use in URLs by:
125
+ 1. Replacing spaces with hyphens
126
+ 2. Removing special characters (preserving hyphens and periods)
127
+ 3. Collapsing multiple hyphens and periods into single ones
128
+ """
129
+ # Replace spaces with hyphens
130
+ text = text.replace(" ", "-")
131
+ # Remove special characters except hyphens and periods
132
+ text = re.sub(r"[^a-zA-Z0-9\-.]", "", text)
133
+ # Collapse multiple hyphens into one
134
+ text = re.sub(r"-+", "-", text)
135
+ # Collapse multiple periods into one
136
+ text = re.sub(r"\.+", ".", text)
137
+ # Remove leading/trailing hyphens and periods
138
+ text = text.strip("-.")
139
+ return text
140
+
141
+
121
142
  def create_id(path: List[str], default_id: Optional[str], enable_auto_id: bool) -> str:
143
+ """
144
+ Create an ID for a glossary node or term.
145
+
146
+ Args:
147
+ path: List of path components leading to this node/term
148
+ default_id: Optional manually specified ID
149
+ enable_auto_id: Whether to generate GUIDs
150
+ """
122
151
  if default_id is not None:
123
- return default_id # No need to create id from path as default_id is provided
152
+ return default_id # Use explicitly provided ID
124
153
 
125
154
  id_: str = ".".join(path)
126
155
 
127
- if UrnEncoder.contains_extended_reserved_char(id_):
128
- enable_auto_id = True
156
+ # Check for non-ASCII characters before cleaning
157
+ if any(ord(c) > 127 for c in id_):
158
+ return datahub_guid({"path": id_})
129
159
 
130
160
  if enable_auto_id:
161
+ # Generate GUID for auto_id mode
131
162
  id_ = datahub_guid({"path": id_})
163
+ else:
164
+ # Clean the URL for better readability when not using auto_id
165
+ id_ = clean_url(id_)
166
+
167
+ # Force auto_id if the cleaned URL still contains problematic characters
168
+ if UrnEncoder.contains_extended_reserved_char(id_):
169
+ logger.warning(
170
+ f"ID '{id_}' contains problematic characters after URL cleaning. Falling back to GUID generation for stability."
171
+ )
172
+ id_ = datahub_guid({"path": id_})
173
+
132
174
  return id_
133
175
 
134
176
 
@@ -377,7 +377,7 @@ class ModeSource(StatefulIngestionSourceBase):
377
377
  ]
378
378
 
379
379
  def _dashboard_urn(self, report_info: dict) -> str:
380
- return builder.make_dashboard_urn(self.platform, report_info.get("id", ""))
380
+ return builder.make_dashboard_urn(self.platform, str(report_info.get("id", "")))
381
381
 
382
382
  def _parse_last_run_at(self, report_info: dict) -> Optional[int]:
383
383
  # Mode queries are refreshed, and that timestamp is reflected correctly here.
@@ -128,6 +128,10 @@ class RedshiftConfig(
128
128
  default=True,
129
129
  description="Whether lineage should be collected from copy commands",
130
130
  )
131
+ include_share_lineage: bool = Field(
132
+ default=True,
133
+ description="Whether lineage should be collected from datashares",
134
+ )
131
135
 
132
136
  include_usage_statistics: bool = Field(
133
137
  default=False,