acryl-datahub 1.0.0rc9__py3-none-any.whl → 1.0.0rc11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc9.dist-info → acryl_datahub-1.0.0rc11.dist-info}/METADATA +2445 -2446
- {acryl_datahub-1.0.0rc9.dist-info → acryl_datahub-1.0.0rc11.dist-info}/RECORD +36 -35
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +4 -3
- datahub/emitter/mce_builder.py +28 -13
- datahub/ingestion/source/common/subtypes.py +7 -0
- datahub/ingestion/source/iceberg/iceberg_common.py +40 -1
- datahub/ingestion/source/identity/okta.py +22 -0
- datahub/ingestion/source/metabase.py +3 -3
- datahub/ingestion/source/metadata/business_glossary.py +45 -3
- datahub/ingestion/source/mode.py +1 -1
- datahub/ingestion/source/redshift/config.py +4 -0
- datahub/ingestion/source/redshift/datashares.py +236 -0
- datahub/ingestion/source/redshift/lineage.py +6 -2
- datahub/ingestion/source/redshift/lineage_v2.py +7 -4
- datahub/ingestion/source/redshift/profile.py +1 -1
- datahub/ingestion/source/redshift/query.py +125 -33
- datahub/ingestion/source/redshift/redshift.py +41 -72
- datahub/ingestion/source/redshift/redshift_schema.py +166 -6
- datahub/ingestion/source/redshift/report.py +3 -0
- datahub/ingestion/source/sql/mssql/job_models.py +29 -0
- datahub/ingestion/source/sql/mssql/source.py +10 -4
- datahub/ingestion/source/sql/oracle.py +93 -63
- datahub/metadata/_schema_classes.py +5 -5
- datahub/metadata/_urns/urn_defs.py +24 -0
- datahub/metadata/schema.avsc +2 -1
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupKey.avsc +2 -1
- datahub/metadata/schemas/MLModelKey.avsc +2 -1
- datahub/sql_parsing/sql_parsing_common.py +7 -0
- {acryl_datahub-1.0.0rc9.dist-info → acryl_datahub-1.0.0rc11.dist-info}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc9.dist-info → acryl_datahub-1.0.0rc11.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0rc9.dist-info → acryl_datahub-1.0.0rc11.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0rc9.dist-info → acryl_datahub-1.0.0rc11.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
|
-
datahub/_version.py,sha256=
|
|
3
|
+
datahub/_version.py,sha256=m3vMOf1XXwW_i72T14wHeXSyYmTku5A-KQz7nxQXArM,322
|
|
4
4
|
datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
|
|
5
5
|
datahub/errors.py,sha256=w6h8b27j9XlmPbTwqpu7-wgiTrXlHzcnUOnJ_iOrwzo,520
|
|
6
6
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -27,7 +27,7 @@ datahub/api/entities/assertion/sql_assertion.py,sha256=myJU-Wf8O-RbiyU_Xlbp2cacw
|
|
|
27
27
|
datahub/api/entities/assertion/volume_assertion.py,sha256=37bNLGP-81MvcZj_cVHvrdw5I4aBxkER0xN0ZqyB3NU,3360
|
|
28
28
|
datahub/api/entities/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
29
|
datahub/api/entities/common/data_platform_instance.py,sha256=AVqQ-yactNZi_bislIEUcQZCGovaHY-gQi1EY7PVsT4,1065
|
|
30
|
-
datahub/api/entities/common/serialized_value.py,sha256=
|
|
30
|
+
datahub/api/entities/common/serialized_value.py,sha256=DFPK7p4OwqRTOnH8luEWzqH_4vQHZSNxFIL63x_o2ok,5565
|
|
31
31
|
datahub/api/entities/corpgroup/__init__.py,sha256=Uf3SxsZUSY-yZ2Kx3-1dWwz600D1C4Ds_z_nG7hwanA,63
|
|
32
32
|
datahub/api/entities/corpgroup/corpgroup.py,sha256=XSrGHCwl7lMNtzWviMzZbw8VDdesXC2HLZP5kpHt2fQ,8878
|
|
33
33
|
datahub/api/entities/corpuser/__init__.py,sha256=RspO1ceu6q2zUqYqZqRRY_MPcP7PNdd2lQoZn-KfeQE,60
|
|
@@ -119,7 +119,7 @@ datahub/emitter/composite_emitter.py,sha256=ZU-IdlAXKGPtmyT0JJgYC09vRn-TmeNaA6VP
|
|
|
119
119
|
datahub/emitter/enum_helpers.py,sha256=QBOEUu_hDCvyL_v4ayNQV8XwJbf5zKyu0Xat0mI1Kgo,376
|
|
120
120
|
datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
|
|
121
121
|
datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
|
|
122
|
-
datahub/emitter/mce_builder.py,sha256=
|
|
122
|
+
datahub/emitter/mce_builder.py,sha256=8UiG2VsYgC7n29h_y4qL6F9faGwwMZF3zGscl_CBT9s,16808
|
|
123
123
|
datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
|
|
124
124
|
datahub/emitter/mcp_builder.py,sha256=Q1bX2BthNvZ7ae71XYF6ICoiN8IOqaAd_h3zOct57Q0,11752
|
|
125
125
|
datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
|
|
@@ -202,9 +202,9 @@ datahub/ingestion/source/ge_data_profiler.py,sha256=C93ZZrtIRVL6pDpQ3fn7ZbbJiZmH
|
|
|
202
202
|
datahub/ingestion/source/ge_profiling_config.py,sha256=FlWfXoVoayabVXNMB9qETEU0GX0az6HYqNUZRnIu_fQ,10866
|
|
203
203
|
datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suabri1yV5geaAPjzg2eORDc,2559
|
|
204
204
|
datahub/ingestion/source/ldap.py,sha256=CNr3foofIpoCXu_GGqfcajlQE2qkHr5isYwVcDutdkk,18695
|
|
205
|
-
datahub/ingestion/source/metabase.py,sha256=
|
|
205
|
+
datahub/ingestion/source/metabase.py,sha256=j8DRV2GvisezidL1JZ5HJLF_hdFdtvaoyDoEdEyh0Ks,32603
|
|
206
206
|
datahub/ingestion/source/mlflow.py,sha256=cqQivSyrptm15vn--xbT7eTRHJJVKMmQpoVqfzuDIDU,12858
|
|
207
|
-
datahub/ingestion/source/mode.py,sha256=
|
|
207
|
+
datahub/ingestion/source/mode.py,sha256=w85zCIZicfABx5dKCupsGpH1tgUMhS1El-jIWa2gwNU,63632
|
|
208
208
|
datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-OPSNGU,21237
|
|
209
209
|
datahub/ingestion/source/nifi.py,sha256=w5TPnqPmpotvzSsJROi6nUiHWPUVC6u1g0CzXIE6FNs,56903
|
|
210
210
|
datahub/ingestion/source/openapi.py,sha256=39ep3etbWh8NBPjTXXwH3mieC5P6bMVAjhvK7UvcTis,17372
|
|
@@ -268,7 +268,7 @@ datahub/ingestion/source/cassandra/cassandra_profiling.py,sha256=DkSIryZNwLei5Pa
|
|
|
268
268
|
datahub/ingestion/source/cassandra/cassandra_utils.py,sha256=j-LidYkaCTmGnpUVNLsax_c3z32PsQbsbHeYojygd1s,5105
|
|
269
269
|
datahub/ingestion/source/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
270
270
|
datahub/ingestion/source/common/data_reader.py,sha256=XbSxiRTYrk6seOz0ZjVjzSpGvP8lEjmqXrNI4cdYYmQ,1819
|
|
271
|
-
datahub/ingestion/source/common/subtypes.py,sha256=
|
|
271
|
+
datahub/ingestion/source/common/subtypes.py,sha256=EiYSjBHiRvGjRB5wjKEfS5b_k9tQCFWMP1ADw_1p-CY,2525
|
|
272
272
|
datahub/ingestion/source/data_lake_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
273
273
|
datahub/ingestion/source/data_lake_common/config.py,sha256=qUk83B01hjuBKHvVz8SmXnVCy5eFj-2-2QLEOrAdbgk,359
|
|
274
274
|
datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=nxu7osuzqxScPFc-1ODA2M1c_xPNPpRH_SMMU7zKOIE,6212
|
|
@@ -324,11 +324,11 @@ datahub/ingestion/source/grafana/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
|
324
324
|
datahub/ingestion/source/grafana/grafana_source.py,sha256=3pU3xodPgS5lmnjuQ_u7F0XPzD_Y8MnPlMxRJ86qz4g,4960
|
|
325
325
|
datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
326
326
|
datahub/ingestion/source/iceberg/iceberg.py,sha256=pMWQtn88XAYwZsRNkICX1GlQOqOnyuWdLpkcjVQEon0,29039
|
|
327
|
-
datahub/ingestion/source/iceberg/iceberg_common.py,sha256=
|
|
327
|
+
datahub/ingestion/source/iceberg/iceberg_common.py,sha256=VGosqYPmn_j6GETSnDHZ8Ay1BVOedmx2x5LHxw16I3A,12278
|
|
328
328
|
datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=CkBB5fryMVoqqCM6eLSIeb4yP85ABHONNRm0QqZKrnw,9977
|
|
329
329
|
datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
330
330
|
datahub/ingestion/source/identity/azure_ad.py,sha256=9Hrvm4CSfc02yjnPUsCYSY4Qw9fXPnDFWLexab0mcpc,28559
|
|
331
|
-
datahub/ingestion/source/identity/okta.py,sha256=
|
|
331
|
+
datahub/ingestion/source/identity/okta.py,sha256=jC21myJuMRTaPgj0OD9heaC-mz8ECjqpy2hSJwlUSwM,31943
|
|
332
332
|
datahub/ingestion/source/kafka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
333
333
|
datahub/ingestion/source/kafka/kafka.py,sha256=mboUWQmlumEwcXwY2POeK1L8tdk5-CABakZ-MWbvdNQ,26579
|
|
334
334
|
datahub/ingestion/source/kafka/kafka_schema_registry_base.py,sha256=13XjSwqyVhH1CJUFHAbWdmmv_Rw0Ju_9HQdBmIzPNNA,566
|
|
@@ -360,7 +360,7 @@ datahub/ingestion/source/looker/str_functions.py,sha256=zceEX2ka_4WaWwWgEdyknUSz
|
|
|
360
360
|
datahub/ingestion/source/looker/urn_functions.py,sha256=4VvqEfGvIMq3rNHHps0-HlPurMPnpqdxNtDAOOHIZww,528
|
|
361
361
|
datahub/ingestion/source/looker/view_upstream.py,sha256=4FCjZaU6p2G7npB2RJpP4Gv2yLjbvbsYWEbAg55IvjY,26110
|
|
362
362
|
datahub/ingestion/source/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
363
|
-
datahub/ingestion/source/metadata/business_glossary.py,sha256=
|
|
363
|
+
datahub/ingestion/source/metadata/business_glossary.py,sha256=T_RJHst6iQRghJNmLLPeSBMEDsbEKf3yBldOAgMcGuo,19666
|
|
364
364
|
datahub/ingestion/source/metadata/lineage.py,sha256=2iK-hsORWm7NSvMZcG4D5hb8_PH57g-u6LWbu_f7HM4,9521
|
|
365
365
|
datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
366
366
|
datahub/ingestion/source/neo4j/neo4j_source.py,sha256=O3jjdnsx7IyYPBLbxowL85Qo4zs4H-maMOH4-6ZNCk4,13063
|
|
@@ -396,16 +396,17 @@ datahub/ingestion/source/qlik_sense/qlik_api.py,sha256=KoBaD1VowYrbaRg1rjDP1_mmP
|
|
|
396
396
|
datahub/ingestion/source/qlik_sense/qlik_sense.py,sha256=bmhmOgSXzC6g-uqO1ljFLRNz2oo6Xjn400UQnWdMA1Y,22530
|
|
397
397
|
datahub/ingestion/source/qlik_sense/websocket_connection.py,sha256=jp39OInvjCN9BtnKsHU_aa1B3X9hVHqSmD25stXuqHk,1940
|
|
398
398
|
datahub/ingestion/source/redshift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
399
|
-
datahub/ingestion/source/redshift/config.py,sha256=
|
|
399
|
+
datahub/ingestion/source/redshift/config.py,sha256=l_hlgsCjvlcgcFQpd5WMKlW8nqQUhaMGec8FnUbSl6Y,8997
|
|
400
|
+
datahub/ingestion/source/redshift/datashares.py,sha256=kH3YkoenOa59XZU12XeUf283lOOAITYD9jOXpy8R06E,9227
|
|
400
401
|
datahub/ingestion/source/redshift/exception.py,sha256=dxzYUIv5B_FAWhOuzG2u5We7FX-ar4jhOXPXAlEIvgM,2055
|
|
401
|
-
datahub/ingestion/source/redshift/lineage.py,sha256=
|
|
402
|
-
datahub/ingestion/source/redshift/lineage_v2.py,sha256=
|
|
403
|
-
datahub/ingestion/source/redshift/profile.py,sha256=
|
|
404
|
-
datahub/ingestion/source/redshift/query.py,sha256=
|
|
405
|
-
datahub/ingestion/source/redshift/redshift.py,sha256=
|
|
402
|
+
datahub/ingestion/source/redshift/lineage.py,sha256=Gk2dNuRBEipZkY5W1sArlfRbFR7mBKutCFHHTrn3yX4,44096
|
|
403
|
+
datahub/ingestion/source/redshift/lineage_v2.py,sha256=H6Qky5dLeZEICdDWyH-My78NoKlXpExHg3m-6d5lbgo,16891
|
|
404
|
+
datahub/ingestion/source/redshift/profile.py,sha256=jqFQUSg_qzSYi1yIAq24NFwHW8yIcSDSSh-vgJ4nl6M,4287
|
|
405
|
+
datahub/ingestion/source/redshift/query.py,sha256=6Fw3I8qFLflySDu6WY5D9NjXnRnDIw0yxKisSpaHh0A,47526
|
|
406
|
+
datahub/ingestion/source/redshift/redshift.py,sha256=IZqeQws3mvDdu9K-ixPGZNalDcRRRse-l_TTwQI7B-4,43407
|
|
406
407
|
datahub/ingestion/source/redshift/redshift_data_reader.py,sha256=zc69jwXHdF-w8J4Hq-ZQ6BjHQ75Ij2iNDMpoRJlcmlU,1724
|
|
407
|
-
datahub/ingestion/source/redshift/redshift_schema.py,sha256=
|
|
408
|
-
datahub/ingestion/source/redshift/report.py,sha256=
|
|
408
|
+
datahub/ingestion/source/redshift/redshift_schema.py,sha256=WTc-j4_PYlFgaJZ3hEorGIBWKruTX57E7V_5JaUe8mU,24045
|
|
409
|
+
datahub/ingestion/source/redshift/report.py,sha256=O3QFozHlmMbH9b7KxbqhgTgr_0tCryj6FIzMiN6kRxw,3044
|
|
409
410
|
datahub/ingestion/source/redshift/usage.py,sha256=eSdB1MYZeQokkQOwl9LPdpo-oCBJSwxJBotSpJ9XjBc,17473
|
|
410
411
|
datahub/ingestion/source/s3/__init__.py,sha256=HjqFPj11WtNFZM3kcVshlDb7kOsc19-l_3LM8PBjlJM,56
|
|
411
412
|
datahub/ingestion/source/s3/config.py,sha256=lElFXgEpKDT9SVoiXvtx98wV6Gp880qP4pLQaOGJGOo,7828
|
|
@@ -464,7 +465,7 @@ datahub/ingestion/source/sql/hive.py,sha256=NRUrEWnR1JN5U0q4CHlRacdKzxJhS4unFXnX
|
|
|
464
465
|
datahub/ingestion/source/sql/hive_metastore.py,sha256=fH7bAcljapYqmF8cQE7humoufFe2RVFRYOcyavMg9yo,36103
|
|
465
466
|
datahub/ingestion/source/sql/mariadb.py,sha256=Hm102kmfs_1rd4lsTYhzVMZq5S3B6cyfvpHSzJjqvMw,737
|
|
466
467
|
datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_k94o7g-0,3350
|
|
467
|
-
datahub/ingestion/source/sql/oracle.py,sha256=
|
|
468
|
+
datahub/ingestion/source/sql/oracle.py,sha256=it9qhUkGRHTq_F5DoEsCBLYnB02divzxDlBvXACH4Pk,27712
|
|
468
469
|
datahub/ingestion/source/sql/postgres.py,sha256=uC1kYEI8VdxiZ1Y9IxMWzwmg11wtMqYN0e2fkok1rxo,11972
|
|
469
470
|
datahub/ingestion/source/sql/presto.py,sha256=PB-CS5MX2dSRFRHjlxfkLHGXLZXFNCsVAAyRBtY6HMg,3611
|
|
470
471
|
datahub/ingestion/source/sql/sql_common.py,sha256=r75Cd06Qwe2fqTDRZKWnIf7kpnR0BSxZ9PYBOgY0I6k,48785
|
|
@@ -481,8 +482,8 @@ datahub/ingestion/source/sql/trino.py,sha256=8viVOu67mhDnsO3LuPSRi1WDR5MLdOXu7HO
|
|
|
481
482
|
datahub/ingestion/source/sql/two_tier_sql_source.py,sha256=YDrGBb5WKVls6qv17QU5foKrf71SydzEltc3WsVAhQc,5732
|
|
482
483
|
datahub/ingestion/source/sql/vertica.py,sha256=_9OgSgIgqBml0av063rb8nACiT3SAmzpw0ouyF91wv8,33382
|
|
483
484
|
datahub/ingestion/source/sql/mssql/__init__.py,sha256=1agpl8S_uDW40olkhCX_W19dbr5GO9qgjS3R7pLRZSk,87
|
|
484
|
-
datahub/ingestion/source/sql/mssql/job_models.py,sha256=
|
|
485
|
-
datahub/ingestion/source/sql/mssql/source.py,sha256=
|
|
485
|
+
datahub/ingestion/source/sql/mssql/job_models.py,sha256=5-QQv8w-KnyNq_y-VmSC_K5sr0VoZhfYW6Aasd-z2LY,8901
|
|
486
|
+
datahub/ingestion/source/sql/mssql/source.py,sha256=QxgUWL-aSjTXmqZPD_7811MsrDsgW_I9_oMimomdE0A,32593
|
|
486
487
|
datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py,sha256=RpnvKPalAAaOD_eUg8bZ4VkGTSeLFWuy0mefwc4s3x8,2837
|
|
487
488
|
datahub/ingestion/source/state/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
488
489
|
datahub/ingestion/source/state/checkpoint.py,sha256=-fTUZKkY4nHTFqSWZ0jJkkdIu_tWlOjRNhm4FTr4ul4,8860
|
|
@@ -575,12 +576,12 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
|
|
|
575
576
|
datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
|
|
576
577
|
datahub/lite/lite_util.py,sha256=Cm6trMTeo0X1fv4nSsW9lC0jqce7Jt-05GhOtIGzsVc,4559
|
|
577
578
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
578
|
-
datahub/metadata/_schema_classes.py,sha256=
|
|
579
|
-
datahub/metadata/schema.avsc,sha256=
|
|
579
|
+
datahub/metadata/_schema_classes.py,sha256=uafVvWsnAqPranXzeC9CrSAu7I1-XJOogtiBPhxmn-k,993397
|
|
580
|
+
datahub/metadata/schema.avsc,sha256=uPWX2Rx9A12b-p4ef4zrsjbtQPSIH8w67l3B6pq6zE0,741459
|
|
580
581
|
datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
|
|
581
582
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
582
583
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
583
|
-
datahub/metadata/_urns/urn_defs.py,sha256=
|
|
584
|
+
datahub/metadata/_urns/urn_defs.py,sha256=mQ52ozRUt19MyBLNZh1f1ETlafCzCYmEbcKxAjR_8o4,133983
|
|
584
585
|
datahub/metadata/com/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
|
|
585
586
|
datahub/metadata/com/linkedin/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
|
|
586
587
|
datahub/metadata/com/linkedin/events/__init__.py,sha256=s_dR0plZF-rOxxIbE8ojekJqwiHzl2WYR-Z3kW6kKS0,298
|
|
@@ -745,7 +746,7 @@ datahub/metadata/schemas/DatasetUsageStatistics.avsc,sha256=JKNy_KlUqr3kt7o1Cu2D
|
|
|
745
746
|
datahub/metadata/schemas/Deprecation.avsc,sha256=SmbTlMB9fujdMBjYEQkzaU4XJzwM1gD6E8L2zoL1b4Q,1280
|
|
746
747
|
datahub/metadata/schemas/DisplayProperties.avsc,sha256=MTa_g2s0roxNFFggWU8rslUH3UFe3xe11uUXyh0Go_I,1732
|
|
747
748
|
datahub/metadata/schemas/Documentation.avsc,sha256=9vIJG9B08FFrC3y5c1XVaT5U3c-b5sOAc5foUxMnyCs,4836
|
|
748
|
-
datahub/metadata/schemas/DomainKey.avsc,sha256=
|
|
749
|
+
datahub/metadata/schemas/DomainKey.avsc,sha256=TYCcJRWqwbxbQuR5E68pvdeAmfVdYsJuMNhTxVphbqg,676
|
|
749
750
|
datahub/metadata/schemas/DomainProperties.avsc,sha256=6do6wZ9G6gyt1QowQyi1xldqgdTXspb05FaqWpKJ6eM,3843
|
|
750
751
|
datahub/metadata/schemas/Domains.avsc,sha256=5mRQcba6Zmp6Y1srbxhOjETutg0I_ZG4ikuS2r9fkR0,804
|
|
751
752
|
datahub/metadata/schemas/DynamicFormAssignment.avsc,sha256=SXRL5D6kIYWdGl3zLQYxPnkQX71JXQOKrjQNavFqVp0,7339
|
|
@@ -783,7 +784,7 @@ datahub/metadata/schemas/GlobalSettingsInfo.avsc,sha256=OVMM6FwhHhufHkezYcVePK0z
|
|
|
783
784
|
datahub/metadata/schemas/GlobalSettingsKey.avsc,sha256=Yj8s5IdM9yF7xrhJcLGCPCXBWqSsrPbufBaQjlZ3JlU,563
|
|
784
785
|
datahub/metadata/schemas/GlobalTags.avsc,sha256=-SurkodMqTDnPpkRV6qYqmpNWjQNvynUiPZX7EhL5uc,4624
|
|
785
786
|
datahub/metadata/schemas/GlossaryNodeInfo.avsc,sha256=G1Cb-w9VxIAEhNqyiEsDL_ABRO9QxyTpUANKU6DQrFw,1888
|
|
786
|
-
datahub/metadata/schemas/GlossaryNodeKey.avsc,sha256=
|
|
787
|
+
datahub/metadata/schemas/GlossaryNodeKey.avsc,sha256=hT8ny4TL1WvgFvnaVBjuw6AWDiPDjpkh20f83ZT-UZ8,664
|
|
787
788
|
datahub/metadata/schemas/GlossaryRelatedTerms.avsc,sha256=ZTP0mrFD4y-C6JekRy8IVuHvICUkJib-ZAYD93Gv1tA,2763
|
|
788
789
|
datahub/metadata/schemas/GlossaryTermInfo.avsc,sha256=j4s9NCyMOIF03HfaXoQEIkiMTRaCy_-euhenptfu7IA,2935
|
|
789
790
|
datahub/metadata/schemas/GlossaryTermKey.avsc,sha256=mkyrzmOX_BGRHbcj2ccUALbrPVJNdQbItU-VyKN7P98,836
|
|
@@ -806,12 +807,12 @@ datahub/metadata/schemas/MLFeatureTableKey.avsc,sha256=6_typ7K0Bz8x62T31IYqf9XS9
|
|
|
806
807
|
datahub/metadata/schemas/MLFeatureTableProperties.avsc,sha256=BtrqcsxoQXObPZXSGRNYtIBJCoeHkMK_Zr_imBWF2Zk,2008
|
|
807
808
|
datahub/metadata/schemas/MLHyperParam.avsc,sha256=dE6i5r6LTYMNrQe9yy-jKoP09GOJUf__1bO69ldpydc,833
|
|
808
809
|
datahub/metadata/schemas/MLMetric.avsc,sha256=y8WPVVwjhu3YGtqpFFJYNYK8w778RRL_d2sHG1Dc7uM,804
|
|
809
|
-
datahub/metadata/schemas/MLModelDeploymentKey.avsc,sha256=
|
|
810
|
+
datahub/metadata/schemas/MLModelDeploymentKey.avsc,sha256=vt04jFF_ZHSvWhqLoxC8C_KspiRLkvNNIXJI0aKPF1Q,2425
|
|
810
811
|
datahub/metadata/schemas/MLModelDeploymentProperties.avsc,sha256=I3v-uNOeYxO4hooPHOjafWWHuVyeGvG90oma0tzpNFg,5409
|
|
811
812
|
datahub/metadata/schemas/MLModelFactorPrompts.avsc,sha256=8kX-P4F4mVLFT980z3MwIautt1_6uA-c_Z87nYNDK-k,2712
|
|
812
|
-
datahub/metadata/schemas/MLModelGroupKey.avsc,sha256=
|
|
813
|
+
datahub/metadata/schemas/MLModelGroupKey.avsc,sha256=3LoMWejMfCwdoqz3PFinRbY1_Yy4Kypw7pwg3tL42Jg,2497
|
|
813
814
|
datahub/metadata/schemas/MLModelGroupProperties.avsc,sha256=zMl6ab6zfcYJmt31f-AUrrfeqfLoaSZQpfB3_S9JFFQ,6534
|
|
814
|
-
datahub/metadata/schemas/MLModelKey.avsc,sha256=
|
|
815
|
+
datahub/metadata/schemas/MLModelKey.avsc,sha256=pRntMhcpgTJL2T2nGK6Sf9_q2vJOqHELYFh59VMXqv0,2866
|
|
815
816
|
datahub/metadata/schemas/MLModelProperties.avsc,sha256=hDCBHxGe-cmCBeU1k0ANuQlKjtZsDcTfl2X_jWmtFqo,12355
|
|
816
817
|
datahub/metadata/schemas/MLPrimaryKeyKey.avsc,sha256=Kq2Q9WxZ6nQ8wR4P6wpPCI-J7FwXQyoa10s6BvXtkm8,1110
|
|
817
818
|
datahub/metadata/schemas/MLPrimaryKeyProperties.avsc,sha256=URIuOpS93RVk8MZVcbZ-dmTwu_cN3KSOKxSR8fm-eTo,6744
|
|
@@ -913,7 +914,7 @@ datahub/sql_parsing/query_types.py,sha256=FKjDzszZzsrCfYfm7dgD6T_8865qxWl767fdGy
|
|
|
913
914
|
datahub/sql_parsing/schema_resolver.py,sha256=ISuingLcQnOJZkNXBkc73uPwYUbbOtERAjgGhJajDiQ,10782
|
|
914
915
|
datahub/sql_parsing/split_statements.py,sha256=6KUoIPG7H8Rja3lrPjSrSfhFfwW4oqgfoNQeTbbOWNg,8953
|
|
915
916
|
datahub/sql_parsing/sql_parsing_aggregator.py,sha256=XNZWjeaRhzaT92mzsJZGJfYaxJENsyp5dSHTmL81RIc,70130
|
|
916
|
-
datahub/sql_parsing/sql_parsing_common.py,sha256=
|
|
917
|
+
datahub/sql_parsing/sql_parsing_common.py,sha256=cZ4WvVyHZuXDGjnBvKMX2_fz2EMextB5WQWcK0_saBo,3155
|
|
917
918
|
datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
|
|
918
919
|
datahub/sql_parsing/sqlglot_lineage.py,sha256=l0kT8MuRIg96X7BNJaboMznF54b-yvM2nMTLyF2d0Nw,47446
|
|
919
920
|
datahub/sql_parsing/sqlglot_utils.py,sha256=6W6MQ5Yh0xXT9_h0jd19yoGWMdXicyRBDD_FwV7nj04,14701
|
|
@@ -1021,9 +1022,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1021
1022
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1022
1023
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1023
1024
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1024
|
-
acryl_datahub-1.0.
|
|
1025
|
-
acryl_datahub-1.0.
|
|
1026
|
-
acryl_datahub-1.0.
|
|
1027
|
-
acryl_datahub-1.0.
|
|
1028
|
-
acryl_datahub-1.0.
|
|
1029
|
-
acryl_datahub-1.0.
|
|
1025
|
+
acryl_datahub-1.0.0rc11.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
1026
|
+
acryl_datahub-1.0.0rc11.dist-info/METADATA,sha256=hZCrduEZ7Qqkr76OUpdPLHm7AApR7AQHEaKKYq9uJZE,175337
|
|
1027
|
+
acryl_datahub-1.0.0rc11.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
|
1028
|
+
acryl_datahub-1.0.0rc11.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
|
|
1029
|
+
acryl_datahub-1.0.0rc11.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1030
|
+
acryl_datahub-1.0.0rc11.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
-
from typing import Dict, Optional, Type, Union
|
|
3
|
+
from typing import Dict, Optional, Type, TypeVar, Union
|
|
4
4
|
|
|
5
5
|
from avrogen.dict_wrapper import DictWrapper
|
|
6
6
|
from pydantic import BaseModel
|
|
@@ -13,6 +13,7 @@ logger = logging.getLogger(__name__)
|
|
|
13
13
|
_REMAPPED_SCHEMA_TYPES = {
|
|
14
14
|
k.replace("pegasus2avro.", ""): v for k, v in SCHEMA_TYPES.items()
|
|
15
15
|
}
|
|
16
|
+
T = TypeVar("T", bound=BaseModel)
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
class SerializedResourceValue(BaseModel):
|
|
@@ -83,8 +84,8 @@ class SerializedResourceValue(BaseModel):
|
|
|
83
84
|
)
|
|
84
85
|
|
|
85
86
|
def as_pydantic_object(
|
|
86
|
-
self, model_type: Type[
|
|
87
|
-
) ->
|
|
87
|
+
self, model_type: Type[T], validate_schema_ref: bool = False
|
|
88
|
+
) -> T:
|
|
88
89
|
"""
|
|
89
90
|
Parse the blob into a Pydantic-defined Python object based on the schema type and schema
|
|
90
91
|
ref.
|
datahub/emitter/mce_builder.py
CHANGED
|
@@ -52,7 +52,15 @@ from datahub.metadata.schema_classes import (
|
|
|
52
52
|
UpstreamLineageClass,
|
|
53
53
|
_Aspect as AspectAbstract,
|
|
54
54
|
)
|
|
55
|
-
from datahub.metadata.urns import
|
|
55
|
+
from datahub.metadata.urns import (
|
|
56
|
+
ChartUrn,
|
|
57
|
+
DashboardUrn,
|
|
58
|
+
DataFlowUrn,
|
|
59
|
+
DataJobUrn,
|
|
60
|
+
DataPlatformUrn,
|
|
61
|
+
DatasetUrn,
|
|
62
|
+
TagUrn,
|
|
63
|
+
)
|
|
56
64
|
from datahub.utilities.urn_encoder import UrnEncoder
|
|
57
65
|
|
|
58
66
|
logger = logging.getLogger(__name__)
|
|
@@ -119,7 +127,7 @@ def parse_ts_millis(ts: Optional[float]) -> Optional[datetime]:
|
|
|
119
127
|
def make_data_platform_urn(platform: str) -> str:
|
|
120
128
|
if platform.startswith("urn:li:dataPlatform:"):
|
|
121
129
|
return platform
|
|
122
|
-
return
|
|
130
|
+
return DataPlatformUrn.create_from_id(platform).urn()
|
|
123
131
|
|
|
124
132
|
|
|
125
133
|
def make_dataset_urn(platform: str, name: str, env: str = DEFAULT_ENV) -> str:
|
|
@@ -236,7 +244,7 @@ def make_user_urn(username: str) -> str:
|
|
|
236
244
|
Makes a user urn if the input is not a user or group urn already
|
|
237
245
|
"""
|
|
238
246
|
return (
|
|
239
|
-
f"urn:li:corpuser:{username}"
|
|
247
|
+
f"urn:li:corpuser:{UrnEncoder.encode_string(username)}"
|
|
240
248
|
if not username.startswith(("urn:li:corpuser:", "urn:li:corpGroup:"))
|
|
241
249
|
else username
|
|
242
250
|
)
|
|
@@ -249,7 +257,7 @@ def make_group_urn(groupname: str) -> str:
|
|
|
249
257
|
if groupname and groupname.startswith(("urn:li:corpGroup:", "urn:li:corpuser:")):
|
|
250
258
|
return groupname
|
|
251
259
|
else:
|
|
252
|
-
return f"urn:li:corpGroup:{groupname}"
|
|
260
|
+
return f"urn:li:corpGroup:{UrnEncoder.encode_string(groupname)}"
|
|
253
261
|
|
|
254
262
|
|
|
255
263
|
def make_tag_urn(tag: str) -> str:
|
|
@@ -301,7 +309,12 @@ def make_data_flow_urn(
|
|
|
301
309
|
|
|
302
310
|
|
|
303
311
|
def make_data_job_urn_with_flow(flow_urn: str, job_id: str) -> str:
|
|
304
|
-
|
|
312
|
+
data_flow_urn = DataFlowUrn.from_string(flow_urn)
|
|
313
|
+
data_job_urn = DataJobUrn.create_from_ids(
|
|
314
|
+
data_flow_urn=data_flow_urn.urn(),
|
|
315
|
+
job_id=job_id,
|
|
316
|
+
)
|
|
317
|
+
return data_job_urn.urn()
|
|
305
318
|
|
|
306
319
|
|
|
307
320
|
def make_data_process_instance_urn(dataProcessInstanceId: str) -> str:
|
|
@@ -324,10 +337,11 @@ def make_dashboard_urn(
|
|
|
324
337
|
platform: str, name: str, platform_instance: Optional[str] = None
|
|
325
338
|
) -> str:
|
|
326
339
|
# FIXME: dashboards don't currently include data platform urn prefixes.
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
340
|
+
return DashboardUrn.create_from_ids(
|
|
341
|
+
platform=platform,
|
|
342
|
+
name=name,
|
|
343
|
+
platform_instance=platform_instance,
|
|
344
|
+
).urn()
|
|
331
345
|
|
|
332
346
|
|
|
333
347
|
def dashboard_urn_to_key(dashboard_urn: str) -> Optional[DashboardKeyClass]:
|
|
@@ -342,10 +356,11 @@ def make_chart_urn(
|
|
|
342
356
|
platform: str, name: str, platform_instance: Optional[str] = None
|
|
343
357
|
) -> str:
|
|
344
358
|
# FIXME: charts don't currently include data platform urn prefixes.
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
359
|
+
return ChartUrn.create_from_ids(
|
|
360
|
+
platform=platform,
|
|
361
|
+
name=name,
|
|
362
|
+
platform_instance=platform_instance,
|
|
363
|
+
).urn()
|
|
349
364
|
|
|
350
365
|
|
|
351
366
|
def chart_urn_to_key(chart_urn: str) -> Optional[ChartKeyClass]:
|
|
@@ -60,8 +60,15 @@ class BIContainerSubTypes(StrEnum):
|
|
|
60
60
|
MODE_COLLECTION = "Collection"
|
|
61
61
|
|
|
62
62
|
|
|
63
|
+
class FlowContainerSubTypes(StrEnum):
|
|
64
|
+
MSSQL_JOB = "Job"
|
|
65
|
+
MSSQL_PROCEDURE_CONTAINER = "Procedures Container"
|
|
66
|
+
|
|
67
|
+
|
|
63
68
|
class JobContainerSubTypes(StrEnum):
|
|
64
69
|
NIFI_PROCESS_GROUP = "Process Group"
|
|
70
|
+
MSSQL_JOBSTEP = "Job Step"
|
|
71
|
+
MSSQL_STORED_PROCEDURE = "Stored Procedure"
|
|
65
72
|
|
|
66
73
|
|
|
67
74
|
class BIAssetSubTypes(StrEnum):
|
|
@@ -6,7 +6,10 @@ from typing import Any, Dict, Optional
|
|
|
6
6
|
from humanfriendly import format_timespan
|
|
7
7
|
from pydantic import Field, validator
|
|
8
8
|
from pyiceberg.catalog import Catalog, load_catalog
|
|
9
|
+
from pyiceberg.catalog.rest import RestCatalog
|
|
10
|
+
from requests.adapters import HTTPAdapter
|
|
9
11
|
from sortedcontainers import SortedList
|
|
12
|
+
from urllib3.util import Retry
|
|
10
13
|
|
|
11
14
|
from datahub.configuration.common import AllowDenyPattern, ConfigModel
|
|
12
15
|
from datahub.configuration.source_common import DatasetSourceConfigMixin
|
|
@@ -26,6 +29,23 @@ from datahub.utilities.stats_collections import TopKDict, int_top_k_dict
|
|
|
26
29
|
|
|
27
30
|
logger = logging.getLogger(__name__)
|
|
28
31
|
|
|
32
|
+
DEFAULT_REST_TIMEOUT = 120
|
|
33
|
+
DEFAULT_REST_RETRY_POLICY = {"total": 3, "backoff_factor": 0.1}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class TimeoutHTTPAdapter(HTTPAdapter):
|
|
37
|
+
def __init__(self, *args, **kwargs):
|
|
38
|
+
if "timeout" in kwargs:
|
|
39
|
+
self.timeout = kwargs["timeout"]
|
|
40
|
+
del kwargs["timeout"]
|
|
41
|
+
super().__init__(*args, **kwargs)
|
|
42
|
+
|
|
43
|
+
def send(self, request, **kwargs):
|
|
44
|
+
timeout = kwargs.get("timeout")
|
|
45
|
+
if timeout is None and hasattr(self, "timeout"):
|
|
46
|
+
kwargs["timeout"] = self.timeout
|
|
47
|
+
return super().send(request, **kwargs)
|
|
48
|
+
|
|
29
49
|
|
|
30
50
|
class IcebergProfilingConfig(ConfigModel):
|
|
31
51
|
enabled: bool = Field(
|
|
@@ -146,7 +166,26 @@ class IcebergSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin)
|
|
|
146
166
|
logger.debug(
|
|
147
167
|
"Initializing the catalog %s with config: %s", catalog_name, catalog_config
|
|
148
168
|
)
|
|
149
|
-
|
|
169
|
+
catalog = load_catalog(name=catalog_name, **catalog_config)
|
|
170
|
+
if isinstance(catalog, RestCatalog):
|
|
171
|
+
logger.debug(
|
|
172
|
+
"Recognized REST catalog type being configured, attempting to configure HTTP Adapter for the session"
|
|
173
|
+
)
|
|
174
|
+
retry_policy: Dict[str, Any] = DEFAULT_REST_RETRY_POLICY.copy()
|
|
175
|
+
retry_policy.update(catalog_config.get("connection", {}).get("retry", {}))
|
|
176
|
+
retries = Retry(**retry_policy)
|
|
177
|
+
logger.debug(f"Retry policy to be set: {retry_policy}")
|
|
178
|
+
timeout = catalog_config.get("connection", {}).get(
|
|
179
|
+
"timeout", DEFAULT_REST_TIMEOUT
|
|
180
|
+
)
|
|
181
|
+
logger.debug(f"Timeout to be set: {timeout}")
|
|
182
|
+
catalog._session.mount(
|
|
183
|
+
"http://", TimeoutHTTPAdapter(timeout=timeout, max_retries=retries)
|
|
184
|
+
)
|
|
185
|
+
catalog._session.mount(
|
|
186
|
+
"https://", TimeoutHTTPAdapter(timeout=timeout, max_retries=retries)
|
|
187
|
+
)
|
|
188
|
+
return catalog
|
|
150
189
|
|
|
151
190
|
|
|
152
191
|
class TopTableTimings:
|
|
@@ -666,6 +666,27 @@ class OktaSource(StatefulIngestionSourceBase):
|
|
|
666
666
|
self.config.okta_profile_to_username_regex,
|
|
667
667
|
)
|
|
668
668
|
|
|
669
|
+
def _map_okta_user_profile_custom_properties(
|
|
670
|
+
self, profile: UserProfile
|
|
671
|
+
) -> Dict[str, str]:
|
|
672
|
+
# filter out the common fields that are already mapped to the CorpUserInfo aspect and the private ones
|
|
673
|
+
return {
|
|
674
|
+
k: str(v)
|
|
675
|
+
for k, v in profile.__dict__.items()
|
|
676
|
+
if v
|
|
677
|
+
and k
|
|
678
|
+
not in [
|
|
679
|
+
"displayName",
|
|
680
|
+
"firstName",
|
|
681
|
+
"lastName",
|
|
682
|
+
"email",
|
|
683
|
+
"title",
|
|
684
|
+
"countryCode",
|
|
685
|
+
"department",
|
|
686
|
+
]
|
|
687
|
+
and not k.startswith("_")
|
|
688
|
+
}
|
|
689
|
+
|
|
669
690
|
# Converts Okta User Profile into a CorpUserInfo.
|
|
670
691
|
def _map_okta_user_profile(self, profile: UserProfile) -> CorpUserInfoClass:
|
|
671
692
|
# TODO: Extract user's manager if provided.
|
|
@@ -683,6 +704,7 @@ class OktaSource(StatefulIngestionSourceBase):
|
|
|
683
704
|
title=profile.title,
|
|
684
705
|
countryCode=profile.countryCode,
|
|
685
706
|
departmentName=profile.department,
|
|
707
|
+
customProperties=self._map_okta_user_profile_custom_properties(profile),
|
|
686
708
|
)
|
|
687
709
|
|
|
688
710
|
def _make_corp_group_urn(self, name: str) -> str:
|
|
@@ -313,7 +313,7 @@ class MetabaseSource(StatefulIngestionSourceBase):
|
|
|
313
313
|
return None
|
|
314
314
|
|
|
315
315
|
dashboard_urn = builder.make_dashboard_urn(
|
|
316
|
-
self.platform, dashboard_details.get("id", "")
|
|
316
|
+
self.platform, str(dashboard_details.get("id", ""))
|
|
317
317
|
)
|
|
318
318
|
dashboard_snapshot = DashboardSnapshot(
|
|
319
319
|
urn=dashboard_urn,
|
|
@@ -337,7 +337,7 @@ class MetabaseSource(StatefulIngestionSourceBase):
|
|
|
337
337
|
card_id = card_info.get("card").get("id", "")
|
|
338
338
|
if not card_id:
|
|
339
339
|
continue # most likely a virtual card without an id (text or heading), not relevant.
|
|
340
|
-
chart_urn = builder.make_chart_urn(self.platform, card_id)
|
|
340
|
+
chart_urn = builder.make_chart_urn(self.platform, str(card_id))
|
|
341
341
|
chart_urns.append(chart_urn)
|
|
342
342
|
|
|
343
343
|
dashboard_info_class = DashboardInfoClass(
|
|
@@ -459,7 +459,7 @@ class MetabaseSource(StatefulIngestionSourceBase):
|
|
|
459
459
|
)
|
|
460
460
|
return None
|
|
461
461
|
|
|
462
|
-
chart_urn = builder.make_chart_urn(self.platform, card_id)
|
|
462
|
+
chart_urn = builder.make_chart_urn(self.platform, str(card_id))
|
|
463
463
|
chart_snapshot = ChartSnapshot(
|
|
464
464
|
urn=chart_urn,
|
|
465
465
|
aspects=[],
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import pathlib
|
|
3
|
+
import re
|
|
3
4
|
import time
|
|
4
5
|
from dataclasses import dataclass, field
|
|
5
6
|
from typing import Any, Dict, Iterable, List, Optional, TypeVar, Union
|
|
@@ -118,17 +119,58 @@ class BusinessGlossaryConfig(DefaultConfig):
|
|
|
118
119
|
return v
|
|
119
120
|
|
|
120
121
|
|
|
122
|
+
def clean_url(text: str) -> str:
|
|
123
|
+
"""
|
|
124
|
+
Clean text for use in URLs by:
|
|
125
|
+
1. Replacing spaces with hyphens
|
|
126
|
+
2. Removing special characters (preserving hyphens and periods)
|
|
127
|
+
3. Collapsing multiple hyphens and periods into single ones
|
|
128
|
+
"""
|
|
129
|
+
# Replace spaces with hyphens
|
|
130
|
+
text = text.replace(" ", "-")
|
|
131
|
+
# Remove special characters except hyphens and periods
|
|
132
|
+
text = re.sub(r"[^a-zA-Z0-9\-.]", "", text)
|
|
133
|
+
# Collapse multiple hyphens into one
|
|
134
|
+
text = re.sub(r"-+", "-", text)
|
|
135
|
+
# Collapse multiple periods into one
|
|
136
|
+
text = re.sub(r"\.+", ".", text)
|
|
137
|
+
# Remove leading/trailing hyphens and periods
|
|
138
|
+
text = text.strip("-.")
|
|
139
|
+
return text
|
|
140
|
+
|
|
141
|
+
|
|
121
142
|
def create_id(path: List[str], default_id: Optional[str], enable_auto_id: bool) -> str:
|
|
143
|
+
"""
|
|
144
|
+
Create an ID for a glossary node or term.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
path: List of path components leading to this node/term
|
|
148
|
+
default_id: Optional manually specified ID
|
|
149
|
+
enable_auto_id: Whether to generate GUIDs
|
|
150
|
+
"""
|
|
122
151
|
if default_id is not None:
|
|
123
|
-
return default_id #
|
|
152
|
+
return default_id # Use explicitly provided ID
|
|
124
153
|
|
|
125
154
|
id_: str = ".".join(path)
|
|
126
155
|
|
|
127
|
-
|
|
128
|
-
|
|
156
|
+
# Check for non-ASCII characters before cleaning
|
|
157
|
+
if any(ord(c) > 127 for c in id_):
|
|
158
|
+
return datahub_guid({"path": id_})
|
|
129
159
|
|
|
130
160
|
if enable_auto_id:
|
|
161
|
+
# Generate GUID for auto_id mode
|
|
131
162
|
id_ = datahub_guid({"path": id_})
|
|
163
|
+
else:
|
|
164
|
+
# Clean the URL for better readability when not using auto_id
|
|
165
|
+
id_ = clean_url(id_)
|
|
166
|
+
|
|
167
|
+
# Force auto_id if the cleaned URL still contains problematic characters
|
|
168
|
+
if UrnEncoder.contains_extended_reserved_char(id_):
|
|
169
|
+
logger.warning(
|
|
170
|
+
f"ID '{id_}' contains problematic characters after URL cleaning. Falling back to GUID generation for stability."
|
|
171
|
+
)
|
|
172
|
+
id_ = datahub_guid({"path": id_})
|
|
173
|
+
|
|
132
174
|
return id_
|
|
133
175
|
|
|
134
176
|
|
datahub/ingestion/source/mode.py
CHANGED
|
@@ -377,7 +377,7 @@ class ModeSource(StatefulIngestionSourceBase):
|
|
|
377
377
|
]
|
|
378
378
|
|
|
379
379
|
def _dashboard_urn(self, report_info: dict) -> str:
|
|
380
|
-
return builder.make_dashboard_urn(self.platform, report_info.get("id", ""))
|
|
380
|
+
return builder.make_dashboard_urn(self.platform, str(report_info.get("id", "")))
|
|
381
381
|
|
|
382
382
|
def _parse_last_run_at(self, report_info: dict) -> Optional[int]:
|
|
383
383
|
# Mode queries are refreshed, and that timestamp is reflected correctly here.
|
|
@@ -128,6 +128,10 @@ class RedshiftConfig(
|
|
|
128
128
|
default=True,
|
|
129
129
|
description="Whether lineage should be collected from copy commands",
|
|
130
130
|
)
|
|
131
|
+
include_share_lineage: bool = Field(
|
|
132
|
+
default=True,
|
|
133
|
+
description="Whether lineage should be collected from datashares",
|
|
134
|
+
)
|
|
131
135
|
|
|
132
136
|
include_usage_statistics: bool = Field(
|
|
133
137
|
default=False,
|