acryl-datahub 1.0.0rc10__py3-none-any.whl → 1.0.0rc11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc10.dist-info → acryl_datahub-1.0.0rc11.dist-info}/METADATA +2416 -2416
- {acryl_datahub-1.0.0rc10.dist-info → acryl_datahub-1.0.0rc11.dist-info}/RECORD +28 -27
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +4 -3
- datahub/ingestion/source/iceberg/iceberg_common.py +40 -1
- datahub/ingestion/source/metadata/business_glossary.py +45 -3
- datahub/ingestion/source/redshift/config.py +4 -0
- datahub/ingestion/source/redshift/datashares.py +236 -0
- datahub/ingestion/source/redshift/lineage.py +6 -2
- datahub/ingestion/source/redshift/lineage_v2.py +7 -4
- datahub/ingestion/source/redshift/profile.py +1 -1
- datahub/ingestion/source/redshift/query.py +125 -33
- datahub/ingestion/source/redshift/redshift.py +41 -72
- datahub/ingestion/source/redshift/redshift_schema.py +166 -6
- datahub/ingestion/source/redshift/report.py +3 -0
- datahub/ingestion/source/sql/oracle.py +93 -63
- datahub/metadata/_schema_classes.py +5 -5
- datahub/metadata/schema.avsc +2 -1
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupKey.avsc +2 -1
- datahub/metadata/schemas/MLModelKey.avsc +2 -1
- datahub/sql_parsing/sql_parsing_common.py +7 -0
- {acryl_datahub-1.0.0rc10.dist-info → acryl_datahub-1.0.0rc11.dist-info}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc10.dist-info → acryl_datahub-1.0.0rc11.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0rc10.dist-info → acryl_datahub-1.0.0rc11.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0rc10.dist-info → acryl_datahub-1.0.0rc11.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
|
-
datahub/_version.py,sha256=
|
|
3
|
+
datahub/_version.py,sha256=m3vMOf1XXwW_i72T14wHeXSyYmTku5A-KQz7nxQXArM,322
|
|
4
4
|
datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
|
|
5
5
|
datahub/errors.py,sha256=w6h8b27j9XlmPbTwqpu7-wgiTrXlHzcnUOnJ_iOrwzo,520
|
|
6
6
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -27,7 +27,7 @@ datahub/api/entities/assertion/sql_assertion.py,sha256=myJU-Wf8O-RbiyU_Xlbp2cacw
|
|
|
27
27
|
datahub/api/entities/assertion/volume_assertion.py,sha256=37bNLGP-81MvcZj_cVHvrdw5I4aBxkER0xN0ZqyB3NU,3360
|
|
28
28
|
datahub/api/entities/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
29
|
datahub/api/entities/common/data_platform_instance.py,sha256=AVqQ-yactNZi_bislIEUcQZCGovaHY-gQi1EY7PVsT4,1065
|
|
30
|
-
datahub/api/entities/common/serialized_value.py,sha256=
|
|
30
|
+
datahub/api/entities/common/serialized_value.py,sha256=DFPK7p4OwqRTOnH8luEWzqH_4vQHZSNxFIL63x_o2ok,5565
|
|
31
31
|
datahub/api/entities/corpgroup/__init__.py,sha256=Uf3SxsZUSY-yZ2Kx3-1dWwz600D1C4Ds_z_nG7hwanA,63
|
|
32
32
|
datahub/api/entities/corpgroup/corpgroup.py,sha256=XSrGHCwl7lMNtzWviMzZbw8VDdesXC2HLZP5kpHt2fQ,8878
|
|
33
33
|
datahub/api/entities/corpuser/__init__.py,sha256=RspO1ceu6q2zUqYqZqRRY_MPcP7PNdd2lQoZn-KfeQE,60
|
|
@@ -324,7 +324,7 @@ datahub/ingestion/source/grafana/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
|
324
324
|
datahub/ingestion/source/grafana/grafana_source.py,sha256=3pU3xodPgS5lmnjuQ_u7F0XPzD_Y8MnPlMxRJ86qz4g,4960
|
|
325
325
|
datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
326
326
|
datahub/ingestion/source/iceberg/iceberg.py,sha256=pMWQtn88XAYwZsRNkICX1GlQOqOnyuWdLpkcjVQEon0,29039
|
|
327
|
-
datahub/ingestion/source/iceberg/iceberg_common.py,sha256=
|
|
327
|
+
datahub/ingestion/source/iceberg/iceberg_common.py,sha256=VGosqYPmn_j6GETSnDHZ8Ay1BVOedmx2x5LHxw16I3A,12278
|
|
328
328
|
datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=CkBB5fryMVoqqCM6eLSIeb4yP85ABHONNRm0QqZKrnw,9977
|
|
329
329
|
datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
330
330
|
datahub/ingestion/source/identity/azure_ad.py,sha256=9Hrvm4CSfc02yjnPUsCYSY4Qw9fXPnDFWLexab0mcpc,28559
|
|
@@ -360,7 +360,7 @@ datahub/ingestion/source/looker/str_functions.py,sha256=zceEX2ka_4WaWwWgEdyknUSz
|
|
|
360
360
|
datahub/ingestion/source/looker/urn_functions.py,sha256=4VvqEfGvIMq3rNHHps0-HlPurMPnpqdxNtDAOOHIZww,528
|
|
361
361
|
datahub/ingestion/source/looker/view_upstream.py,sha256=4FCjZaU6p2G7npB2RJpP4Gv2yLjbvbsYWEbAg55IvjY,26110
|
|
362
362
|
datahub/ingestion/source/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
363
|
-
datahub/ingestion/source/metadata/business_glossary.py,sha256=
|
|
363
|
+
datahub/ingestion/source/metadata/business_glossary.py,sha256=T_RJHst6iQRghJNmLLPeSBMEDsbEKf3yBldOAgMcGuo,19666
|
|
364
364
|
datahub/ingestion/source/metadata/lineage.py,sha256=2iK-hsORWm7NSvMZcG4D5hb8_PH57g-u6LWbu_f7HM4,9521
|
|
365
365
|
datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
366
366
|
datahub/ingestion/source/neo4j/neo4j_source.py,sha256=O3jjdnsx7IyYPBLbxowL85Qo4zs4H-maMOH4-6ZNCk4,13063
|
|
@@ -396,16 +396,17 @@ datahub/ingestion/source/qlik_sense/qlik_api.py,sha256=KoBaD1VowYrbaRg1rjDP1_mmP
|
|
|
396
396
|
datahub/ingestion/source/qlik_sense/qlik_sense.py,sha256=bmhmOgSXzC6g-uqO1ljFLRNz2oo6Xjn400UQnWdMA1Y,22530
|
|
397
397
|
datahub/ingestion/source/qlik_sense/websocket_connection.py,sha256=jp39OInvjCN9BtnKsHU_aa1B3X9hVHqSmD25stXuqHk,1940
|
|
398
398
|
datahub/ingestion/source/redshift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
399
|
-
datahub/ingestion/source/redshift/config.py,sha256=
|
|
399
|
+
datahub/ingestion/source/redshift/config.py,sha256=l_hlgsCjvlcgcFQpd5WMKlW8nqQUhaMGec8FnUbSl6Y,8997
|
|
400
|
+
datahub/ingestion/source/redshift/datashares.py,sha256=kH3YkoenOa59XZU12XeUf283lOOAITYD9jOXpy8R06E,9227
|
|
400
401
|
datahub/ingestion/source/redshift/exception.py,sha256=dxzYUIv5B_FAWhOuzG2u5We7FX-ar4jhOXPXAlEIvgM,2055
|
|
401
|
-
datahub/ingestion/source/redshift/lineage.py,sha256=
|
|
402
|
-
datahub/ingestion/source/redshift/lineage_v2.py,sha256=
|
|
403
|
-
datahub/ingestion/source/redshift/profile.py,sha256=
|
|
404
|
-
datahub/ingestion/source/redshift/query.py,sha256=
|
|
405
|
-
datahub/ingestion/source/redshift/redshift.py,sha256=
|
|
402
|
+
datahub/ingestion/source/redshift/lineage.py,sha256=Gk2dNuRBEipZkY5W1sArlfRbFR7mBKutCFHHTrn3yX4,44096
|
|
403
|
+
datahub/ingestion/source/redshift/lineage_v2.py,sha256=H6Qky5dLeZEICdDWyH-My78NoKlXpExHg3m-6d5lbgo,16891
|
|
404
|
+
datahub/ingestion/source/redshift/profile.py,sha256=jqFQUSg_qzSYi1yIAq24NFwHW8yIcSDSSh-vgJ4nl6M,4287
|
|
405
|
+
datahub/ingestion/source/redshift/query.py,sha256=6Fw3I8qFLflySDu6WY5D9NjXnRnDIw0yxKisSpaHh0A,47526
|
|
406
|
+
datahub/ingestion/source/redshift/redshift.py,sha256=IZqeQws3mvDdu9K-ixPGZNalDcRRRse-l_TTwQI7B-4,43407
|
|
406
407
|
datahub/ingestion/source/redshift/redshift_data_reader.py,sha256=zc69jwXHdF-w8J4Hq-ZQ6BjHQ75Ij2iNDMpoRJlcmlU,1724
|
|
407
|
-
datahub/ingestion/source/redshift/redshift_schema.py,sha256=
|
|
408
|
-
datahub/ingestion/source/redshift/report.py,sha256=
|
|
408
|
+
datahub/ingestion/source/redshift/redshift_schema.py,sha256=WTc-j4_PYlFgaJZ3hEorGIBWKruTX57E7V_5JaUe8mU,24045
|
|
409
|
+
datahub/ingestion/source/redshift/report.py,sha256=O3QFozHlmMbH9b7KxbqhgTgr_0tCryj6FIzMiN6kRxw,3044
|
|
409
410
|
datahub/ingestion/source/redshift/usage.py,sha256=eSdB1MYZeQokkQOwl9LPdpo-oCBJSwxJBotSpJ9XjBc,17473
|
|
410
411
|
datahub/ingestion/source/s3/__init__.py,sha256=HjqFPj11WtNFZM3kcVshlDb7kOsc19-l_3LM8PBjlJM,56
|
|
411
412
|
datahub/ingestion/source/s3/config.py,sha256=lElFXgEpKDT9SVoiXvtx98wV6Gp880qP4pLQaOGJGOo,7828
|
|
@@ -464,7 +465,7 @@ datahub/ingestion/source/sql/hive.py,sha256=NRUrEWnR1JN5U0q4CHlRacdKzxJhS4unFXnX
|
|
|
464
465
|
datahub/ingestion/source/sql/hive_metastore.py,sha256=fH7bAcljapYqmF8cQE7humoufFe2RVFRYOcyavMg9yo,36103
|
|
465
466
|
datahub/ingestion/source/sql/mariadb.py,sha256=Hm102kmfs_1rd4lsTYhzVMZq5S3B6cyfvpHSzJjqvMw,737
|
|
466
467
|
datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_k94o7g-0,3350
|
|
467
|
-
datahub/ingestion/source/sql/oracle.py,sha256=
|
|
468
|
+
datahub/ingestion/source/sql/oracle.py,sha256=it9qhUkGRHTq_F5DoEsCBLYnB02divzxDlBvXACH4Pk,27712
|
|
468
469
|
datahub/ingestion/source/sql/postgres.py,sha256=uC1kYEI8VdxiZ1Y9IxMWzwmg11wtMqYN0e2fkok1rxo,11972
|
|
469
470
|
datahub/ingestion/source/sql/presto.py,sha256=PB-CS5MX2dSRFRHjlxfkLHGXLZXFNCsVAAyRBtY6HMg,3611
|
|
470
471
|
datahub/ingestion/source/sql/sql_common.py,sha256=r75Cd06Qwe2fqTDRZKWnIf7kpnR0BSxZ9PYBOgY0I6k,48785
|
|
@@ -575,8 +576,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
|
|
|
575
576
|
datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
|
|
576
577
|
datahub/lite/lite_util.py,sha256=Cm6trMTeo0X1fv4nSsW9lC0jqce7Jt-05GhOtIGzsVc,4559
|
|
577
578
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
578
|
-
datahub/metadata/_schema_classes.py,sha256=
|
|
579
|
-
datahub/metadata/schema.avsc,sha256=
|
|
579
|
+
datahub/metadata/_schema_classes.py,sha256=uafVvWsnAqPranXzeC9CrSAu7I1-XJOogtiBPhxmn-k,993397
|
|
580
|
+
datahub/metadata/schema.avsc,sha256=uPWX2Rx9A12b-p4ef4zrsjbtQPSIH8w67l3B6pq6zE0,741459
|
|
580
581
|
datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
|
|
581
582
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
582
583
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
@@ -745,7 +746,7 @@ datahub/metadata/schemas/DatasetUsageStatistics.avsc,sha256=JKNy_KlUqr3kt7o1Cu2D
|
|
|
745
746
|
datahub/metadata/schemas/Deprecation.avsc,sha256=SmbTlMB9fujdMBjYEQkzaU4XJzwM1gD6E8L2zoL1b4Q,1280
|
|
746
747
|
datahub/metadata/schemas/DisplayProperties.avsc,sha256=MTa_g2s0roxNFFggWU8rslUH3UFe3xe11uUXyh0Go_I,1732
|
|
747
748
|
datahub/metadata/schemas/Documentation.avsc,sha256=9vIJG9B08FFrC3y5c1XVaT5U3c-b5sOAc5foUxMnyCs,4836
|
|
748
|
-
datahub/metadata/schemas/DomainKey.avsc,sha256=
|
|
749
|
+
datahub/metadata/schemas/DomainKey.avsc,sha256=TYCcJRWqwbxbQuR5E68pvdeAmfVdYsJuMNhTxVphbqg,676
|
|
749
750
|
datahub/metadata/schemas/DomainProperties.avsc,sha256=6do6wZ9G6gyt1QowQyi1xldqgdTXspb05FaqWpKJ6eM,3843
|
|
750
751
|
datahub/metadata/schemas/Domains.avsc,sha256=5mRQcba6Zmp6Y1srbxhOjETutg0I_ZG4ikuS2r9fkR0,804
|
|
751
752
|
datahub/metadata/schemas/DynamicFormAssignment.avsc,sha256=SXRL5D6kIYWdGl3zLQYxPnkQX71JXQOKrjQNavFqVp0,7339
|
|
@@ -783,7 +784,7 @@ datahub/metadata/schemas/GlobalSettingsInfo.avsc,sha256=OVMM6FwhHhufHkezYcVePK0z
|
|
|
783
784
|
datahub/metadata/schemas/GlobalSettingsKey.avsc,sha256=Yj8s5IdM9yF7xrhJcLGCPCXBWqSsrPbufBaQjlZ3JlU,563
|
|
784
785
|
datahub/metadata/schemas/GlobalTags.avsc,sha256=-SurkodMqTDnPpkRV6qYqmpNWjQNvynUiPZX7EhL5uc,4624
|
|
785
786
|
datahub/metadata/schemas/GlossaryNodeInfo.avsc,sha256=G1Cb-w9VxIAEhNqyiEsDL_ABRO9QxyTpUANKU6DQrFw,1888
|
|
786
|
-
datahub/metadata/schemas/GlossaryNodeKey.avsc,sha256=
|
|
787
|
+
datahub/metadata/schemas/GlossaryNodeKey.avsc,sha256=hT8ny4TL1WvgFvnaVBjuw6AWDiPDjpkh20f83ZT-UZ8,664
|
|
787
788
|
datahub/metadata/schemas/GlossaryRelatedTerms.avsc,sha256=ZTP0mrFD4y-C6JekRy8IVuHvICUkJib-ZAYD93Gv1tA,2763
|
|
788
789
|
datahub/metadata/schemas/GlossaryTermInfo.avsc,sha256=j4s9NCyMOIF03HfaXoQEIkiMTRaCy_-euhenptfu7IA,2935
|
|
789
790
|
datahub/metadata/schemas/GlossaryTermKey.avsc,sha256=mkyrzmOX_BGRHbcj2ccUALbrPVJNdQbItU-VyKN7P98,836
|
|
@@ -806,12 +807,12 @@ datahub/metadata/schemas/MLFeatureTableKey.avsc,sha256=6_typ7K0Bz8x62T31IYqf9XS9
|
|
|
806
807
|
datahub/metadata/schemas/MLFeatureTableProperties.avsc,sha256=BtrqcsxoQXObPZXSGRNYtIBJCoeHkMK_Zr_imBWF2Zk,2008
|
|
807
808
|
datahub/metadata/schemas/MLHyperParam.avsc,sha256=dE6i5r6LTYMNrQe9yy-jKoP09GOJUf__1bO69ldpydc,833
|
|
808
809
|
datahub/metadata/schemas/MLMetric.avsc,sha256=y8WPVVwjhu3YGtqpFFJYNYK8w778RRL_d2sHG1Dc7uM,804
|
|
809
|
-
datahub/metadata/schemas/MLModelDeploymentKey.avsc,sha256=
|
|
810
|
+
datahub/metadata/schemas/MLModelDeploymentKey.avsc,sha256=vt04jFF_ZHSvWhqLoxC8C_KspiRLkvNNIXJI0aKPF1Q,2425
|
|
810
811
|
datahub/metadata/schemas/MLModelDeploymentProperties.avsc,sha256=I3v-uNOeYxO4hooPHOjafWWHuVyeGvG90oma0tzpNFg,5409
|
|
811
812
|
datahub/metadata/schemas/MLModelFactorPrompts.avsc,sha256=8kX-P4F4mVLFT980z3MwIautt1_6uA-c_Z87nYNDK-k,2712
|
|
812
|
-
datahub/metadata/schemas/MLModelGroupKey.avsc,sha256=
|
|
813
|
+
datahub/metadata/schemas/MLModelGroupKey.avsc,sha256=3LoMWejMfCwdoqz3PFinRbY1_Yy4Kypw7pwg3tL42Jg,2497
|
|
813
814
|
datahub/metadata/schemas/MLModelGroupProperties.avsc,sha256=zMl6ab6zfcYJmt31f-AUrrfeqfLoaSZQpfB3_S9JFFQ,6534
|
|
814
|
-
datahub/metadata/schemas/MLModelKey.avsc,sha256=
|
|
815
|
+
datahub/metadata/schemas/MLModelKey.avsc,sha256=pRntMhcpgTJL2T2nGK6Sf9_q2vJOqHELYFh59VMXqv0,2866
|
|
815
816
|
datahub/metadata/schemas/MLModelProperties.avsc,sha256=hDCBHxGe-cmCBeU1k0ANuQlKjtZsDcTfl2X_jWmtFqo,12355
|
|
816
817
|
datahub/metadata/schemas/MLPrimaryKeyKey.avsc,sha256=Kq2Q9WxZ6nQ8wR4P6wpPCI-J7FwXQyoa10s6BvXtkm8,1110
|
|
817
818
|
datahub/metadata/schemas/MLPrimaryKeyProperties.avsc,sha256=URIuOpS93RVk8MZVcbZ-dmTwu_cN3KSOKxSR8fm-eTo,6744
|
|
@@ -913,7 +914,7 @@ datahub/sql_parsing/query_types.py,sha256=FKjDzszZzsrCfYfm7dgD6T_8865qxWl767fdGy
|
|
|
913
914
|
datahub/sql_parsing/schema_resolver.py,sha256=ISuingLcQnOJZkNXBkc73uPwYUbbOtERAjgGhJajDiQ,10782
|
|
914
915
|
datahub/sql_parsing/split_statements.py,sha256=6KUoIPG7H8Rja3lrPjSrSfhFfwW4oqgfoNQeTbbOWNg,8953
|
|
915
916
|
datahub/sql_parsing/sql_parsing_aggregator.py,sha256=XNZWjeaRhzaT92mzsJZGJfYaxJENsyp5dSHTmL81RIc,70130
|
|
916
|
-
datahub/sql_parsing/sql_parsing_common.py,sha256=
|
|
917
|
+
datahub/sql_parsing/sql_parsing_common.py,sha256=cZ4WvVyHZuXDGjnBvKMX2_fz2EMextB5WQWcK0_saBo,3155
|
|
917
918
|
datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
|
|
918
919
|
datahub/sql_parsing/sqlglot_lineage.py,sha256=l0kT8MuRIg96X7BNJaboMznF54b-yvM2nMTLyF2d0Nw,47446
|
|
919
920
|
datahub/sql_parsing/sqlglot_utils.py,sha256=6W6MQ5Yh0xXT9_h0jd19yoGWMdXicyRBDD_FwV7nj04,14701
|
|
@@ -1021,9 +1022,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1021
1022
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1022
1023
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1023
1024
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1024
|
-
acryl_datahub-1.0.
|
|
1025
|
-
acryl_datahub-1.0.
|
|
1026
|
-
acryl_datahub-1.0.
|
|
1027
|
-
acryl_datahub-1.0.
|
|
1028
|
-
acryl_datahub-1.0.
|
|
1029
|
-
acryl_datahub-1.0.
|
|
1025
|
+
acryl_datahub-1.0.0rc11.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
1026
|
+
acryl_datahub-1.0.0rc11.dist-info/METADATA,sha256=hZCrduEZ7Qqkr76OUpdPLHm7AApR7AQHEaKKYq9uJZE,175337
|
|
1027
|
+
acryl_datahub-1.0.0rc11.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
|
1028
|
+
acryl_datahub-1.0.0rc11.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
|
|
1029
|
+
acryl_datahub-1.0.0rc11.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1030
|
+
acryl_datahub-1.0.0rc11.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
-
from typing import Dict, Optional, Type, Union
|
|
3
|
+
from typing import Dict, Optional, Type, TypeVar, Union
|
|
4
4
|
|
|
5
5
|
from avrogen.dict_wrapper import DictWrapper
|
|
6
6
|
from pydantic import BaseModel
|
|
@@ -13,6 +13,7 @@ logger = logging.getLogger(__name__)
|
|
|
13
13
|
_REMAPPED_SCHEMA_TYPES = {
|
|
14
14
|
k.replace("pegasus2avro.", ""): v for k, v in SCHEMA_TYPES.items()
|
|
15
15
|
}
|
|
16
|
+
T = TypeVar("T", bound=BaseModel)
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
class SerializedResourceValue(BaseModel):
|
|
@@ -83,8 +84,8 @@ class SerializedResourceValue(BaseModel):
|
|
|
83
84
|
)
|
|
84
85
|
|
|
85
86
|
def as_pydantic_object(
|
|
86
|
-
self, model_type: Type[
|
|
87
|
-
) ->
|
|
87
|
+
self, model_type: Type[T], validate_schema_ref: bool = False
|
|
88
|
+
) -> T:
|
|
88
89
|
"""
|
|
89
90
|
Parse the blob into a Pydantic-defined Python object based on the schema type and schema
|
|
90
91
|
ref.
|
|
@@ -6,7 +6,10 @@ from typing import Any, Dict, Optional
|
|
|
6
6
|
from humanfriendly import format_timespan
|
|
7
7
|
from pydantic import Field, validator
|
|
8
8
|
from pyiceberg.catalog import Catalog, load_catalog
|
|
9
|
+
from pyiceberg.catalog.rest import RestCatalog
|
|
10
|
+
from requests.adapters import HTTPAdapter
|
|
9
11
|
from sortedcontainers import SortedList
|
|
12
|
+
from urllib3.util import Retry
|
|
10
13
|
|
|
11
14
|
from datahub.configuration.common import AllowDenyPattern, ConfigModel
|
|
12
15
|
from datahub.configuration.source_common import DatasetSourceConfigMixin
|
|
@@ -26,6 +29,23 @@ from datahub.utilities.stats_collections import TopKDict, int_top_k_dict
|
|
|
26
29
|
|
|
27
30
|
logger = logging.getLogger(__name__)
|
|
28
31
|
|
|
32
|
+
DEFAULT_REST_TIMEOUT = 120
|
|
33
|
+
DEFAULT_REST_RETRY_POLICY = {"total": 3, "backoff_factor": 0.1}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class TimeoutHTTPAdapter(HTTPAdapter):
|
|
37
|
+
def __init__(self, *args, **kwargs):
|
|
38
|
+
if "timeout" in kwargs:
|
|
39
|
+
self.timeout = kwargs["timeout"]
|
|
40
|
+
del kwargs["timeout"]
|
|
41
|
+
super().__init__(*args, **kwargs)
|
|
42
|
+
|
|
43
|
+
def send(self, request, **kwargs):
|
|
44
|
+
timeout = kwargs.get("timeout")
|
|
45
|
+
if timeout is None and hasattr(self, "timeout"):
|
|
46
|
+
kwargs["timeout"] = self.timeout
|
|
47
|
+
return super().send(request, **kwargs)
|
|
48
|
+
|
|
29
49
|
|
|
30
50
|
class IcebergProfilingConfig(ConfigModel):
|
|
31
51
|
enabled: bool = Field(
|
|
@@ -146,7 +166,26 @@ class IcebergSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin)
|
|
|
146
166
|
logger.debug(
|
|
147
167
|
"Initializing the catalog %s with config: %s", catalog_name, catalog_config
|
|
148
168
|
)
|
|
149
|
-
|
|
169
|
+
catalog = load_catalog(name=catalog_name, **catalog_config)
|
|
170
|
+
if isinstance(catalog, RestCatalog):
|
|
171
|
+
logger.debug(
|
|
172
|
+
"Recognized REST catalog type being configured, attempting to configure HTTP Adapter for the session"
|
|
173
|
+
)
|
|
174
|
+
retry_policy: Dict[str, Any] = DEFAULT_REST_RETRY_POLICY.copy()
|
|
175
|
+
retry_policy.update(catalog_config.get("connection", {}).get("retry", {}))
|
|
176
|
+
retries = Retry(**retry_policy)
|
|
177
|
+
logger.debug(f"Retry policy to be set: {retry_policy}")
|
|
178
|
+
timeout = catalog_config.get("connection", {}).get(
|
|
179
|
+
"timeout", DEFAULT_REST_TIMEOUT
|
|
180
|
+
)
|
|
181
|
+
logger.debug(f"Timeout to be set: {timeout}")
|
|
182
|
+
catalog._session.mount(
|
|
183
|
+
"http://", TimeoutHTTPAdapter(timeout=timeout, max_retries=retries)
|
|
184
|
+
)
|
|
185
|
+
catalog._session.mount(
|
|
186
|
+
"https://", TimeoutHTTPAdapter(timeout=timeout, max_retries=retries)
|
|
187
|
+
)
|
|
188
|
+
return catalog
|
|
150
189
|
|
|
151
190
|
|
|
152
191
|
class TopTableTimings:
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import pathlib
|
|
3
|
+
import re
|
|
3
4
|
import time
|
|
4
5
|
from dataclasses import dataclass, field
|
|
5
6
|
from typing import Any, Dict, Iterable, List, Optional, TypeVar, Union
|
|
@@ -118,17 +119,58 @@ class BusinessGlossaryConfig(DefaultConfig):
|
|
|
118
119
|
return v
|
|
119
120
|
|
|
120
121
|
|
|
122
|
+
def clean_url(text: str) -> str:
|
|
123
|
+
"""
|
|
124
|
+
Clean text for use in URLs by:
|
|
125
|
+
1. Replacing spaces with hyphens
|
|
126
|
+
2. Removing special characters (preserving hyphens and periods)
|
|
127
|
+
3. Collapsing multiple hyphens and periods into single ones
|
|
128
|
+
"""
|
|
129
|
+
# Replace spaces with hyphens
|
|
130
|
+
text = text.replace(" ", "-")
|
|
131
|
+
# Remove special characters except hyphens and periods
|
|
132
|
+
text = re.sub(r"[^a-zA-Z0-9\-.]", "", text)
|
|
133
|
+
# Collapse multiple hyphens into one
|
|
134
|
+
text = re.sub(r"-+", "-", text)
|
|
135
|
+
# Collapse multiple periods into one
|
|
136
|
+
text = re.sub(r"\.+", ".", text)
|
|
137
|
+
# Remove leading/trailing hyphens and periods
|
|
138
|
+
text = text.strip("-.")
|
|
139
|
+
return text
|
|
140
|
+
|
|
141
|
+
|
|
121
142
|
def create_id(path: List[str], default_id: Optional[str], enable_auto_id: bool) -> str:
|
|
143
|
+
"""
|
|
144
|
+
Create an ID for a glossary node or term.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
path: List of path components leading to this node/term
|
|
148
|
+
default_id: Optional manually specified ID
|
|
149
|
+
enable_auto_id: Whether to generate GUIDs
|
|
150
|
+
"""
|
|
122
151
|
if default_id is not None:
|
|
123
|
-
return default_id #
|
|
152
|
+
return default_id # Use explicitly provided ID
|
|
124
153
|
|
|
125
154
|
id_: str = ".".join(path)
|
|
126
155
|
|
|
127
|
-
|
|
128
|
-
|
|
156
|
+
# Check for non-ASCII characters before cleaning
|
|
157
|
+
if any(ord(c) > 127 for c in id_):
|
|
158
|
+
return datahub_guid({"path": id_})
|
|
129
159
|
|
|
130
160
|
if enable_auto_id:
|
|
161
|
+
# Generate GUID for auto_id mode
|
|
131
162
|
id_ = datahub_guid({"path": id_})
|
|
163
|
+
else:
|
|
164
|
+
# Clean the URL for better readability when not using auto_id
|
|
165
|
+
id_ = clean_url(id_)
|
|
166
|
+
|
|
167
|
+
# Force auto_id if the cleaned URL still contains problematic characters
|
|
168
|
+
if UrnEncoder.contains_extended_reserved_char(id_):
|
|
169
|
+
logger.warning(
|
|
170
|
+
f"ID '{id_}' contains problematic characters after URL cleaning. Falling back to GUID generation for stability."
|
|
171
|
+
)
|
|
172
|
+
id_ = datahub_guid({"path": id_})
|
|
173
|
+
|
|
132
174
|
return id_
|
|
133
175
|
|
|
134
176
|
|
|
@@ -128,6 +128,10 @@ class RedshiftConfig(
|
|
|
128
128
|
default=True,
|
|
129
129
|
description="Whether lineage should be collected from copy commands",
|
|
130
130
|
)
|
|
131
|
+
include_share_lineage: bool = Field(
|
|
132
|
+
default=True,
|
|
133
|
+
description="Whether lineage should be collected from datashares",
|
|
134
|
+
)
|
|
131
135
|
|
|
132
136
|
include_usage_statistics: bool = Field(
|
|
133
137
|
default=False,
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
from typing import Dict, Iterable, List, Optional, Union
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
from datahub.api.entities.platformresource.platform_resource import (
|
|
6
|
+
ElasticPlatformResourceQuery,
|
|
7
|
+
PlatformResource,
|
|
8
|
+
PlatformResourceKey,
|
|
9
|
+
PlatformResourceSearchFields,
|
|
10
|
+
)
|
|
11
|
+
from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
|
|
12
|
+
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
13
|
+
from datahub.ingestion.graph.client import DataHubGraph
|
|
14
|
+
from datahub.ingestion.source.redshift.config import RedshiftConfig
|
|
15
|
+
from datahub.ingestion.source.redshift.redshift_schema import (
|
|
16
|
+
InboundDatashare,
|
|
17
|
+
OutboundDatashare,
|
|
18
|
+
PartialInboundDatashare,
|
|
19
|
+
RedshiftTable,
|
|
20
|
+
RedshiftView,
|
|
21
|
+
)
|
|
22
|
+
from datahub.ingestion.source.redshift.report import RedshiftReport
|
|
23
|
+
from datahub.sql_parsing.sql_parsing_aggregator import KnownLineageMapping
|
|
24
|
+
from datahub.utilities.search_utils import LogicalOperator
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class OutboundSharePlatformResource(BaseModel):
|
|
28
|
+
namespace: str
|
|
29
|
+
platform_instance: Optional[str]
|
|
30
|
+
env: str
|
|
31
|
+
source_database: str
|
|
32
|
+
share_name: str
|
|
33
|
+
|
|
34
|
+
def get_key(self) -> str:
|
|
35
|
+
return f"{self.namespace}.{self.share_name}"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
PLATFORM_RESOURCE_TYPE = "OUTBOUND_DATASHARE"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class RedshiftDatasharesHelper:
|
|
42
|
+
"""
|
|
43
|
+
Redshift datashares lineage generation relies on PlatformResource entity
|
|
44
|
+
to identify the producer namespace and its platform_instance and env
|
|
45
|
+
|
|
46
|
+
Ingestion of any database in namespace will
|
|
47
|
+
A. generate PlatformResource entity for all outbound shares in namespace.
|
|
48
|
+
B. generate lineage with upstream tables from another namespace, if the database
|
|
49
|
+
is created from an inbound share
|
|
50
|
+
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
config: RedshiftConfig,
|
|
56
|
+
report: RedshiftReport,
|
|
57
|
+
graph: Optional[DataHubGraph],
|
|
58
|
+
):
|
|
59
|
+
self.platform = "redshift"
|
|
60
|
+
self.config = config
|
|
61
|
+
self.report = report
|
|
62
|
+
self.graph = graph
|
|
63
|
+
|
|
64
|
+
def to_platform_resource(
|
|
65
|
+
self, shares: List[OutboundDatashare]
|
|
66
|
+
) -> Iterable[MetadataChangeProposalWrapper]:
|
|
67
|
+
if not shares:
|
|
68
|
+
self.report.outbound_shares_count = 0
|
|
69
|
+
return
|
|
70
|
+
|
|
71
|
+
self.report.outbound_shares_count = len(shares)
|
|
72
|
+
# Producer namespace will be current namespace for all
|
|
73
|
+
# outbound data shares
|
|
74
|
+
|
|
75
|
+
for share in shares:
|
|
76
|
+
producer_namespace = share.producer_namespace
|
|
77
|
+
try:
|
|
78
|
+
platform_resource_key = PlatformResourceKey(
|
|
79
|
+
platform=self.platform,
|
|
80
|
+
platform_instance=self.config.platform_instance,
|
|
81
|
+
resource_type=PLATFORM_RESOURCE_TYPE,
|
|
82
|
+
primary_key=share.get_key(),
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
value = OutboundSharePlatformResource(
|
|
86
|
+
namespace=producer_namespace,
|
|
87
|
+
platform_instance=self.config.platform_instance,
|
|
88
|
+
env=self.config.env,
|
|
89
|
+
source_database=share.source_database,
|
|
90
|
+
share_name=share.share_name,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
platform_resource = PlatformResource.create(
|
|
94
|
+
key=platform_resource_key,
|
|
95
|
+
value=value,
|
|
96
|
+
secondary_keys=[share.share_name, share.producer_namespace],
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
yield from platform_resource.to_mcps()
|
|
100
|
+
|
|
101
|
+
except Exception as exc:
|
|
102
|
+
self.report.warning(
|
|
103
|
+
title="Downstream lineage to outbound datashare may not work",
|
|
104
|
+
message="Failed to generate platform resource for outbound datashares",
|
|
105
|
+
context=f"Namespace {share.producer_namespace} Share {share.share_name}",
|
|
106
|
+
exc=exc,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
def generate_lineage(
|
|
110
|
+
self,
|
|
111
|
+
share: Union[InboundDatashare, PartialInboundDatashare],
|
|
112
|
+
tables: Dict[str, List[Union[RedshiftTable, RedshiftView]]],
|
|
113
|
+
) -> Iterable[KnownLineageMapping]:
|
|
114
|
+
upstream_share = self.find_upstream_share(share)
|
|
115
|
+
|
|
116
|
+
if not upstream_share:
|
|
117
|
+
return
|
|
118
|
+
|
|
119
|
+
for schema in tables:
|
|
120
|
+
for table in tables[schema]:
|
|
121
|
+
dataset_urn = self.gen_dataset_urn(
|
|
122
|
+
f"{share.consumer_database}.{schema}.{table.name}",
|
|
123
|
+
self.config.platform_instance,
|
|
124
|
+
self.config.env,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
upstream_dataset_urn = self.gen_dataset_urn(
|
|
128
|
+
f"{upstream_share.source_database}.{schema}.{table.name}",
|
|
129
|
+
upstream_share.platform_instance,
|
|
130
|
+
upstream_share.env,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
yield KnownLineageMapping(
|
|
134
|
+
upstream_urn=upstream_dataset_urn, downstream_urn=dataset_urn
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
def find_upstream_share(
|
|
138
|
+
self, share: Union[InboundDatashare, PartialInboundDatashare]
|
|
139
|
+
) -> Optional[OutboundSharePlatformResource]:
|
|
140
|
+
if not self.graph:
|
|
141
|
+
self.report.warning(
|
|
142
|
+
title="Upstream lineage of inbound datashare will be missing",
|
|
143
|
+
message="Missing datahub graph. Either use the datahub-rest sink or "
|
|
144
|
+
"set the top-level datahub_api config in the recipe",
|
|
145
|
+
)
|
|
146
|
+
else:
|
|
147
|
+
resources = self.get_platform_resources(self.graph, share)
|
|
148
|
+
|
|
149
|
+
if len(resources) == 0 or (
|
|
150
|
+
not any(
|
|
151
|
+
[
|
|
152
|
+
resource.resource_info is not None
|
|
153
|
+
and resource.resource_info.resource_type
|
|
154
|
+
== PLATFORM_RESOURCE_TYPE
|
|
155
|
+
for resource in resources
|
|
156
|
+
]
|
|
157
|
+
)
|
|
158
|
+
):
|
|
159
|
+
self.report.info(
|
|
160
|
+
title="Upstream lineage of inbound datashare will be missing",
|
|
161
|
+
message="Missing platform resource for share. "
|
|
162
|
+
"Setup redshift ingestion for namespace if not already done. If ingestion is setup, "
|
|
163
|
+
"check whether ingestion user has ALTER/SHARE permission to share.",
|
|
164
|
+
context=share.get_description(),
|
|
165
|
+
)
|
|
166
|
+
else:
|
|
167
|
+
# Ideally we should get only one resource as primary key is namespace+share
|
|
168
|
+
# and type is "OUTBOUND_DATASHARE"
|
|
169
|
+
for resource in resources:
|
|
170
|
+
try:
|
|
171
|
+
assert (
|
|
172
|
+
resource.resource_info is not None
|
|
173
|
+
and resource.resource_info.value is not None
|
|
174
|
+
)
|
|
175
|
+
return resource.resource_info.value.as_pydantic_object(
|
|
176
|
+
OutboundSharePlatformResource, True
|
|
177
|
+
)
|
|
178
|
+
except Exception as e:
|
|
179
|
+
self.report.warning(
|
|
180
|
+
title="Upstream lineage of inbound datashare will be missing",
|
|
181
|
+
message="Failed to parse platform resource for outbound datashare",
|
|
182
|
+
context=share.get_description(),
|
|
183
|
+
exc=e,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
return None
|
|
187
|
+
|
|
188
|
+
def get_platform_resources(
|
|
189
|
+
self,
|
|
190
|
+
graph: DataHubGraph,
|
|
191
|
+
share: Union[InboundDatashare, PartialInboundDatashare],
|
|
192
|
+
) -> List[PlatformResource]:
|
|
193
|
+
# NOTE: ideally we receive InboundDatashare and not PartialInboundDatashare.
|
|
194
|
+
# however due to varchar(128) type of database table that captures datashare options
|
|
195
|
+
# we may receive only partial information about inbound share
|
|
196
|
+
# Alternate option to get InboundDatashare using svv_datashares requires superuser
|
|
197
|
+
if isinstance(share, PartialInboundDatashare):
|
|
198
|
+
return list(
|
|
199
|
+
PlatformResource.search_by_filters(
|
|
200
|
+
graph,
|
|
201
|
+
ElasticPlatformResourceQuery.create_from()
|
|
202
|
+
.group(LogicalOperator.AND)
|
|
203
|
+
.add_field_match(
|
|
204
|
+
PlatformResourceSearchFields.RESOURCE_TYPE,
|
|
205
|
+
PLATFORM_RESOURCE_TYPE,
|
|
206
|
+
)
|
|
207
|
+
.add_field_match(
|
|
208
|
+
PlatformResourceSearchFields.PLATFORM, self.platform
|
|
209
|
+
)
|
|
210
|
+
.add_field_match(
|
|
211
|
+
PlatformResourceSearchFields.SECONDARY_KEYS,
|
|
212
|
+
share.share_name,
|
|
213
|
+
)
|
|
214
|
+
.add_wildcard(
|
|
215
|
+
PlatformResourceSearchFields.SECONDARY_KEYS.field_name,
|
|
216
|
+
f"{share.producer_namespace_prefix}*",
|
|
217
|
+
)
|
|
218
|
+
.end(),
|
|
219
|
+
)
|
|
220
|
+
)
|
|
221
|
+
return list(
|
|
222
|
+
PlatformResource.search_by_key(
|
|
223
|
+
graph, key=share.get_key(), primary=True, is_exact=True
|
|
224
|
+
)
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# TODO: Refactor and move to new RedshiftIdentifierBuilder class
|
|
228
|
+
def gen_dataset_urn(
|
|
229
|
+
self, datahub_dataset_name: str, platform_instance: Optional[str], env: str
|
|
230
|
+
) -> str:
|
|
231
|
+
return make_dataset_urn_with_platform_instance(
|
|
232
|
+
platform=self.platform,
|
|
233
|
+
name=datahub_dataset_name,
|
|
234
|
+
platform_instance=platform_instance,
|
|
235
|
+
env=env,
|
|
236
|
+
)
|
|
@@ -813,9 +813,13 @@ class RedshiftLineageExtractor:
|
|
|
813
813
|
)
|
|
814
814
|
|
|
815
815
|
tablename = table.name
|
|
816
|
-
if
|
|
816
|
+
if (
|
|
817
|
+
table.is_external_table
|
|
818
|
+
and schema.is_external_schema
|
|
819
|
+
and schema.external_platform
|
|
820
|
+
):
|
|
817
821
|
# external_db_params = schema.option
|
|
818
|
-
upstream_platform = schema.
|
|
822
|
+
upstream_platform = schema.external_platform.lower()
|
|
819
823
|
catalog_upstream = UpstreamClass(
|
|
820
824
|
mce_builder.make_dataset_urn_with_platform_instance(
|
|
821
825
|
upstream_platform,
|
|
@@ -401,11 +401,14 @@ class RedshiftSqlLineageV2(Closeable):
|
|
|
401
401
|
) -> None:
|
|
402
402
|
for schema_name, tables in all_tables[self.database].items():
|
|
403
403
|
for table in tables:
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
404
|
+
schema = db_schemas[self.database][schema_name]
|
|
405
|
+
if (
|
|
406
|
+
table.is_external_table
|
|
407
|
+
and schema.is_external_schema
|
|
408
|
+
and schema.external_platform
|
|
409
|
+
):
|
|
407
410
|
# external_db_params = schema.option
|
|
408
|
-
upstream_platform = schema.
|
|
411
|
+
upstream_platform = schema.external_platform.lower()
|
|
409
412
|
|
|
410
413
|
table_urn = mce_builder.make_dataset_urn_with_platform_instance(
|
|
411
414
|
self.platform,
|
|
@@ -48,7 +48,7 @@ class RedshiftProfiler(GenericProfiler):
|
|
|
48
48
|
if not self.config.schema_pattern.allowed(schema):
|
|
49
49
|
continue
|
|
50
50
|
for table in tables[db].get(schema, {}):
|
|
51
|
-
if table.
|
|
51
|
+
if table.is_external_table:
|
|
52
52
|
if not self.config.profiling.profile_external_tables:
|
|
53
53
|
# Case 1: If user did not tell us to profile external tables, simply log this.
|
|
54
54
|
self.report.profiling_skipped_other[schema] += 1
|