acryl-datahub 1.0.0rc10__py3-none-any.whl → 1.0.0rc12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc10.dist-info → acryl_datahub-1.0.0rc12.dist-info}/METADATA +2513 -2513
- {acryl_datahub-1.0.0rc10.dist-info → acryl_datahub-1.0.0rc12.dist-info}/RECORD +29 -28
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +4 -3
- datahub/cli/check_cli.py +72 -19
- datahub/ingestion/source/iceberg/iceberg_common.py +40 -1
- datahub/ingestion/source/metadata/business_glossary.py +45 -3
- datahub/ingestion/source/redshift/config.py +4 -0
- datahub/ingestion/source/redshift/datashares.py +236 -0
- datahub/ingestion/source/redshift/lineage.py +6 -2
- datahub/ingestion/source/redshift/lineage_v2.py +7 -4
- datahub/ingestion/source/redshift/profile.py +1 -1
- datahub/ingestion/source/redshift/query.py +125 -33
- datahub/ingestion/source/redshift/redshift.py +41 -72
- datahub/ingestion/source/redshift/redshift_schema.py +166 -6
- datahub/ingestion/source/redshift/report.py +3 -0
- datahub/ingestion/source/sql/oracle.py +93 -63
- datahub/metadata/_schema_classes.py +5 -5
- datahub/metadata/schema.avsc +2 -1
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupKey.avsc +2 -1
- datahub/metadata/schemas/MLModelKey.avsc +2 -1
- datahub/sql_parsing/sql_parsing_common.py +7 -0
- {acryl_datahub-1.0.0rc10.dist-info → acryl_datahub-1.0.0rc12.dist-info}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc10.dist-info → acryl_datahub-1.0.0rc12.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0rc10.dist-info → acryl_datahub-1.0.0rc12.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0rc10.dist-info → acryl_datahub-1.0.0rc12.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
|
-
datahub/_version.py,sha256=
|
|
3
|
+
datahub/_version.py,sha256=x2yvrMHLCX-bvzooDchpmcuWaCxdI_YKEWTKESO_3Ow,322
|
|
4
4
|
datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
|
|
5
5
|
datahub/errors.py,sha256=w6h8b27j9XlmPbTwqpu7-wgiTrXlHzcnUOnJ_iOrwzo,520
|
|
6
6
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -27,7 +27,7 @@ datahub/api/entities/assertion/sql_assertion.py,sha256=myJU-Wf8O-RbiyU_Xlbp2cacw
|
|
|
27
27
|
datahub/api/entities/assertion/volume_assertion.py,sha256=37bNLGP-81MvcZj_cVHvrdw5I4aBxkER0xN0ZqyB3NU,3360
|
|
28
28
|
datahub/api/entities/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
29
|
datahub/api/entities/common/data_platform_instance.py,sha256=AVqQ-yactNZi_bislIEUcQZCGovaHY-gQi1EY7PVsT4,1065
|
|
30
|
-
datahub/api/entities/common/serialized_value.py,sha256=
|
|
30
|
+
datahub/api/entities/common/serialized_value.py,sha256=DFPK7p4OwqRTOnH8luEWzqH_4vQHZSNxFIL63x_o2ok,5565
|
|
31
31
|
datahub/api/entities/corpgroup/__init__.py,sha256=Uf3SxsZUSY-yZ2Kx3-1dWwz600D1C4Ds_z_nG7hwanA,63
|
|
32
32
|
datahub/api/entities/corpgroup/corpgroup.py,sha256=XSrGHCwl7lMNtzWviMzZbw8VDdesXC2HLZP5kpHt2fQ,8878
|
|
33
33
|
datahub/api/entities/corpuser/__init__.py,sha256=RspO1ceu6q2zUqYqZqRRY_MPcP7PNdd2lQoZn-KfeQE,60
|
|
@@ -60,7 +60,7 @@ datahub/api/graphql/assertion.py,sha256=ponITypRQ8vE8kiqRNpvdoniNJzi4aeBK97UvkF0
|
|
|
60
60
|
datahub/api/graphql/base.py,sha256=9q637r6v-RGOd8Mk8HW2g0vt9zpqFexsQ5R6TPEHVbs,1614
|
|
61
61
|
datahub/api/graphql/operation.py,sha256=h7OXbVRrpJgoth1X4cgeIFhD5JY1MGKg2KjVlQK1gqE,5116
|
|
62
62
|
datahub/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
63
|
-
datahub/cli/check_cli.py,sha256=
|
|
63
|
+
datahub/cli/check_cli.py,sha256=GpAM7k1GMIIE7zQ6GHnu_78aSc4mPiEu2BaKAsYGPkA,14310
|
|
64
64
|
datahub/cli/cli_utils.py,sha256=2uvPv6WqxbRdH7UteHwhRash4E0ncU5P6XebrFLeECo,13584
|
|
65
65
|
datahub/cli/config_utils.py,sha256=yuXw7RzpRY5x_-MAoqWbv46qUkIeRNAJL4_OeJpYdBE,4879
|
|
66
66
|
datahub/cli/container_cli.py,sha256=uDOwewGEPYHQt-ppYEb8ESXhZjPNIZG0Rt3cm2FzPqc,1569
|
|
@@ -324,7 +324,7 @@ datahub/ingestion/source/grafana/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
|
324
324
|
datahub/ingestion/source/grafana/grafana_source.py,sha256=3pU3xodPgS5lmnjuQ_u7F0XPzD_Y8MnPlMxRJ86qz4g,4960
|
|
325
325
|
datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
326
326
|
datahub/ingestion/source/iceberg/iceberg.py,sha256=pMWQtn88XAYwZsRNkICX1GlQOqOnyuWdLpkcjVQEon0,29039
|
|
327
|
-
datahub/ingestion/source/iceberg/iceberg_common.py,sha256=
|
|
327
|
+
datahub/ingestion/source/iceberg/iceberg_common.py,sha256=VGosqYPmn_j6GETSnDHZ8Ay1BVOedmx2x5LHxw16I3A,12278
|
|
328
328
|
datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=CkBB5fryMVoqqCM6eLSIeb4yP85ABHONNRm0QqZKrnw,9977
|
|
329
329
|
datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
330
330
|
datahub/ingestion/source/identity/azure_ad.py,sha256=9Hrvm4CSfc02yjnPUsCYSY4Qw9fXPnDFWLexab0mcpc,28559
|
|
@@ -360,7 +360,7 @@ datahub/ingestion/source/looker/str_functions.py,sha256=zceEX2ka_4WaWwWgEdyknUSz
|
|
|
360
360
|
datahub/ingestion/source/looker/urn_functions.py,sha256=4VvqEfGvIMq3rNHHps0-HlPurMPnpqdxNtDAOOHIZww,528
|
|
361
361
|
datahub/ingestion/source/looker/view_upstream.py,sha256=4FCjZaU6p2G7npB2RJpP4Gv2yLjbvbsYWEbAg55IvjY,26110
|
|
362
362
|
datahub/ingestion/source/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
363
|
-
datahub/ingestion/source/metadata/business_glossary.py,sha256=
|
|
363
|
+
datahub/ingestion/source/metadata/business_glossary.py,sha256=T_RJHst6iQRghJNmLLPeSBMEDsbEKf3yBldOAgMcGuo,19666
|
|
364
364
|
datahub/ingestion/source/metadata/lineage.py,sha256=2iK-hsORWm7NSvMZcG4D5hb8_PH57g-u6LWbu_f7HM4,9521
|
|
365
365
|
datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
366
366
|
datahub/ingestion/source/neo4j/neo4j_source.py,sha256=O3jjdnsx7IyYPBLbxowL85Qo4zs4H-maMOH4-6ZNCk4,13063
|
|
@@ -396,16 +396,17 @@ datahub/ingestion/source/qlik_sense/qlik_api.py,sha256=KoBaD1VowYrbaRg1rjDP1_mmP
|
|
|
396
396
|
datahub/ingestion/source/qlik_sense/qlik_sense.py,sha256=bmhmOgSXzC6g-uqO1ljFLRNz2oo6Xjn400UQnWdMA1Y,22530
|
|
397
397
|
datahub/ingestion/source/qlik_sense/websocket_connection.py,sha256=jp39OInvjCN9BtnKsHU_aa1B3X9hVHqSmD25stXuqHk,1940
|
|
398
398
|
datahub/ingestion/source/redshift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
399
|
-
datahub/ingestion/source/redshift/config.py,sha256=
|
|
399
|
+
datahub/ingestion/source/redshift/config.py,sha256=l_hlgsCjvlcgcFQpd5WMKlW8nqQUhaMGec8FnUbSl6Y,8997
|
|
400
|
+
datahub/ingestion/source/redshift/datashares.py,sha256=kH3YkoenOa59XZU12XeUf283lOOAITYD9jOXpy8R06E,9227
|
|
400
401
|
datahub/ingestion/source/redshift/exception.py,sha256=dxzYUIv5B_FAWhOuzG2u5We7FX-ar4jhOXPXAlEIvgM,2055
|
|
401
|
-
datahub/ingestion/source/redshift/lineage.py,sha256=
|
|
402
|
-
datahub/ingestion/source/redshift/lineage_v2.py,sha256=
|
|
403
|
-
datahub/ingestion/source/redshift/profile.py,sha256=
|
|
404
|
-
datahub/ingestion/source/redshift/query.py,sha256=
|
|
405
|
-
datahub/ingestion/source/redshift/redshift.py,sha256=
|
|
402
|
+
datahub/ingestion/source/redshift/lineage.py,sha256=Gk2dNuRBEipZkY5W1sArlfRbFR7mBKutCFHHTrn3yX4,44096
|
|
403
|
+
datahub/ingestion/source/redshift/lineage_v2.py,sha256=H6Qky5dLeZEICdDWyH-My78NoKlXpExHg3m-6d5lbgo,16891
|
|
404
|
+
datahub/ingestion/source/redshift/profile.py,sha256=jqFQUSg_qzSYi1yIAq24NFwHW8yIcSDSSh-vgJ4nl6M,4287
|
|
405
|
+
datahub/ingestion/source/redshift/query.py,sha256=6Fw3I8qFLflySDu6WY5D9NjXnRnDIw0yxKisSpaHh0A,47526
|
|
406
|
+
datahub/ingestion/source/redshift/redshift.py,sha256=IZqeQws3mvDdu9K-ixPGZNalDcRRRse-l_TTwQI7B-4,43407
|
|
406
407
|
datahub/ingestion/source/redshift/redshift_data_reader.py,sha256=zc69jwXHdF-w8J4Hq-ZQ6BjHQ75Ij2iNDMpoRJlcmlU,1724
|
|
407
|
-
datahub/ingestion/source/redshift/redshift_schema.py,sha256=
|
|
408
|
-
datahub/ingestion/source/redshift/report.py,sha256=
|
|
408
|
+
datahub/ingestion/source/redshift/redshift_schema.py,sha256=WTc-j4_PYlFgaJZ3hEorGIBWKruTX57E7V_5JaUe8mU,24045
|
|
409
|
+
datahub/ingestion/source/redshift/report.py,sha256=O3QFozHlmMbH9b7KxbqhgTgr_0tCryj6FIzMiN6kRxw,3044
|
|
409
410
|
datahub/ingestion/source/redshift/usage.py,sha256=eSdB1MYZeQokkQOwl9LPdpo-oCBJSwxJBotSpJ9XjBc,17473
|
|
410
411
|
datahub/ingestion/source/s3/__init__.py,sha256=HjqFPj11WtNFZM3kcVshlDb7kOsc19-l_3LM8PBjlJM,56
|
|
411
412
|
datahub/ingestion/source/s3/config.py,sha256=lElFXgEpKDT9SVoiXvtx98wV6Gp880qP4pLQaOGJGOo,7828
|
|
@@ -464,7 +465,7 @@ datahub/ingestion/source/sql/hive.py,sha256=NRUrEWnR1JN5U0q4CHlRacdKzxJhS4unFXnX
|
|
|
464
465
|
datahub/ingestion/source/sql/hive_metastore.py,sha256=fH7bAcljapYqmF8cQE7humoufFe2RVFRYOcyavMg9yo,36103
|
|
465
466
|
datahub/ingestion/source/sql/mariadb.py,sha256=Hm102kmfs_1rd4lsTYhzVMZq5S3B6cyfvpHSzJjqvMw,737
|
|
466
467
|
datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_k94o7g-0,3350
|
|
467
|
-
datahub/ingestion/source/sql/oracle.py,sha256=
|
|
468
|
+
datahub/ingestion/source/sql/oracle.py,sha256=it9qhUkGRHTq_F5DoEsCBLYnB02divzxDlBvXACH4Pk,27712
|
|
468
469
|
datahub/ingestion/source/sql/postgres.py,sha256=uC1kYEI8VdxiZ1Y9IxMWzwmg11wtMqYN0e2fkok1rxo,11972
|
|
469
470
|
datahub/ingestion/source/sql/presto.py,sha256=PB-CS5MX2dSRFRHjlxfkLHGXLZXFNCsVAAyRBtY6HMg,3611
|
|
470
471
|
datahub/ingestion/source/sql/sql_common.py,sha256=r75Cd06Qwe2fqTDRZKWnIf7kpnR0BSxZ9PYBOgY0I6k,48785
|
|
@@ -575,8 +576,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
|
|
|
575
576
|
datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
|
|
576
577
|
datahub/lite/lite_util.py,sha256=Cm6trMTeo0X1fv4nSsW9lC0jqce7Jt-05GhOtIGzsVc,4559
|
|
577
578
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
578
|
-
datahub/metadata/_schema_classes.py,sha256=
|
|
579
|
-
datahub/metadata/schema.avsc,sha256=
|
|
579
|
+
datahub/metadata/_schema_classes.py,sha256=uafVvWsnAqPranXzeC9CrSAu7I1-XJOogtiBPhxmn-k,993397
|
|
580
|
+
datahub/metadata/schema.avsc,sha256=uPWX2Rx9A12b-p4ef4zrsjbtQPSIH8w67l3B6pq6zE0,741459
|
|
580
581
|
datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
|
|
581
582
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
582
583
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
@@ -745,7 +746,7 @@ datahub/metadata/schemas/DatasetUsageStatistics.avsc,sha256=JKNy_KlUqr3kt7o1Cu2D
|
|
|
745
746
|
datahub/metadata/schemas/Deprecation.avsc,sha256=SmbTlMB9fujdMBjYEQkzaU4XJzwM1gD6E8L2zoL1b4Q,1280
|
|
746
747
|
datahub/metadata/schemas/DisplayProperties.avsc,sha256=MTa_g2s0roxNFFggWU8rslUH3UFe3xe11uUXyh0Go_I,1732
|
|
747
748
|
datahub/metadata/schemas/Documentation.avsc,sha256=9vIJG9B08FFrC3y5c1XVaT5U3c-b5sOAc5foUxMnyCs,4836
|
|
748
|
-
datahub/metadata/schemas/DomainKey.avsc,sha256=
|
|
749
|
+
datahub/metadata/schemas/DomainKey.avsc,sha256=TYCcJRWqwbxbQuR5E68pvdeAmfVdYsJuMNhTxVphbqg,676
|
|
749
750
|
datahub/metadata/schemas/DomainProperties.avsc,sha256=6do6wZ9G6gyt1QowQyi1xldqgdTXspb05FaqWpKJ6eM,3843
|
|
750
751
|
datahub/metadata/schemas/Domains.avsc,sha256=5mRQcba6Zmp6Y1srbxhOjETutg0I_ZG4ikuS2r9fkR0,804
|
|
751
752
|
datahub/metadata/schemas/DynamicFormAssignment.avsc,sha256=SXRL5D6kIYWdGl3zLQYxPnkQX71JXQOKrjQNavFqVp0,7339
|
|
@@ -783,7 +784,7 @@ datahub/metadata/schemas/GlobalSettingsInfo.avsc,sha256=OVMM6FwhHhufHkezYcVePK0z
|
|
|
783
784
|
datahub/metadata/schemas/GlobalSettingsKey.avsc,sha256=Yj8s5IdM9yF7xrhJcLGCPCXBWqSsrPbufBaQjlZ3JlU,563
|
|
784
785
|
datahub/metadata/schemas/GlobalTags.avsc,sha256=-SurkodMqTDnPpkRV6qYqmpNWjQNvynUiPZX7EhL5uc,4624
|
|
785
786
|
datahub/metadata/schemas/GlossaryNodeInfo.avsc,sha256=G1Cb-w9VxIAEhNqyiEsDL_ABRO9QxyTpUANKU6DQrFw,1888
|
|
786
|
-
datahub/metadata/schemas/GlossaryNodeKey.avsc,sha256=
|
|
787
|
+
datahub/metadata/schemas/GlossaryNodeKey.avsc,sha256=hT8ny4TL1WvgFvnaVBjuw6AWDiPDjpkh20f83ZT-UZ8,664
|
|
787
788
|
datahub/metadata/schemas/GlossaryRelatedTerms.avsc,sha256=ZTP0mrFD4y-C6JekRy8IVuHvICUkJib-ZAYD93Gv1tA,2763
|
|
788
789
|
datahub/metadata/schemas/GlossaryTermInfo.avsc,sha256=j4s9NCyMOIF03HfaXoQEIkiMTRaCy_-euhenptfu7IA,2935
|
|
789
790
|
datahub/metadata/schemas/GlossaryTermKey.avsc,sha256=mkyrzmOX_BGRHbcj2ccUALbrPVJNdQbItU-VyKN7P98,836
|
|
@@ -806,12 +807,12 @@ datahub/metadata/schemas/MLFeatureTableKey.avsc,sha256=6_typ7K0Bz8x62T31IYqf9XS9
|
|
|
806
807
|
datahub/metadata/schemas/MLFeatureTableProperties.avsc,sha256=BtrqcsxoQXObPZXSGRNYtIBJCoeHkMK_Zr_imBWF2Zk,2008
|
|
807
808
|
datahub/metadata/schemas/MLHyperParam.avsc,sha256=dE6i5r6LTYMNrQe9yy-jKoP09GOJUf__1bO69ldpydc,833
|
|
808
809
|
datahub/metadata/schemas/MLMetric.avsc,sha256=y8WPVVwjhu3YGtqpFFJYNYK8w778RRL_d2sHG1Dc7uM,804
|
|
809
|
-
datahub/metadata/schemas/MLModelDeploymentKey.avsc,sha256=
|
|
810
|
+
datahub/metadata/schemas/MLModelDeploymentKey.avsc,sha256=vt04jFF_ZHSvWhqLoxC8C_KspiRLkvNNIXJI0aKPF1Q,2425
|
|
810
811
|
datahub/metadata/schemas/MLModelDeploymentProperties.avsc,sha256=I3v-uNOeYxO4hooPHOjafWWHuVyeGvG90oma0tzpNFg,5409
|
|
811
812
|
datahub/metadata/schemas/MLModelFactorPrompts.avsc,sha256=8kX-P4F4mVLFT980z3MwIautt1_6uA-c_Z87nYNDK-k,2712
|
|
812
|
-
datahub/metadata/schemas/MLModelGroupKey.avsc,sha256=
|
|
813
|
+
datahub/metadata/schemas/MLModelGroupKey.avsc,sha256=3LoMWejMfCwdoqz3PFinRbY1_Yy4Kypw7pwg3tL42Jg,2497
|
|
813
814
|
datahub/metadata/schemas/MLModelGroupProperties.avsc,sha256=zMl6ab6zfcYJmt31f-AUrrfeqfLoaSZQpfB3_S9JFFQ,6534
|
|
814
|
-
datahub/metadata/schemas/MLModelKey.avsc,sha256=
|
|
815
|
+
datahub/metadata/schemas/MLModelKey.avsc,sha256=pRntMhcpgTJL2T2nGK6Sf9_q2vJOqHELYFh59VMXqv0,2866
|
|
815
816
|
datahub/metadata/schemas/MLModelProperties.avsc,sha256=hDCBHxGe-cmCBeU1k0ANuQlKjtZsDcTfl2X_jWmtFqo,12355
|
|
816
817
|
datahub/metadata/schemas/MLPrimaryKeyKey.avsc,sha256=Kq2Q9WxZ6nQ8wR4P6wpPCI-J7FwXQyoa10s6BvXtkm8,1110
|
|
817
818
|
datahub/metadata/schemas/MLPrimaryKeyProperties.avsc,sha256=URIuOpS93RVk8MZVcbZ-dmTwu_cN3KSOKxSR8fm-eTo,6744
|
|
@@ -913,7 +914,7 @@ datahub/sql_parsing/query_types.py,sha256=FKjDzszZzsrCfYfm7dgD6T_8865qxWl767fdGy
|
|
|
913
914
|
datahub/sql_parsing/schema_resolver.py,sha256=ISuingLcQnOJZkNXBkc73uPwYUbbOtERAjgGhJajDiQ,10782
|
|
914
915
|
datahub/sql_parsing/split_statements.py,sha256=6KUoIPG7H8Rja3lrPjSrSfhFfwW4oqgfoNQeTbbOWNg,8953
|
|
915
916
|
datahub/sql_parsing/sql_parsing_aggregator.py,sha256=XNZWjeaRhzaT92mzsJZGJfYaxJENsyp5dSHTmL81RIc,70130
|
|
916
|
-
datahub/sql_parsing/sql_parsing_common.py,sha256=
|
|
917
|
+
datahub/sql_parsing/sql_parsing_common.py,sha256=cZ4WvVyHZuXDGjnBvKMX2_fz2EMextB5WQWcK0_saBo,3155
|
|
917
918
|
datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
|
|
918
919
|
datahub/sql_parsing/sqlglot_lineage.py,sha256=l0kT8MuRIg96X7BNJaboMznF54b-yvM2nMTLyF2d0Nw,47446
|
|
919
920
|
datahub/sql_parsing/sqlglot_utils.py,sha256=6W6MQ5Yh0xXT9_h0jd19yoGWMdXicyRBDD_FwV7nj04,14701
|
|
@@ -1021,9 +1022,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1021
1022
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1022
1023
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1023
1024
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1024
|
-
acryl_datahub-1.0.
|
|
1025
|
-
acryl_datahub-1.0.
|
|
1026
|
-
acryl_datahub-1.0.
|
|
1027
|
-
acryl_datahub-1.0.
|
|
1028
|
-
acryl_datahub-1.0.
|
|
1029
|
-
acryl_datahub-1.0.
|
|
1025
|
+
acryl_datahub-1.0.0rc12.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
1026
|
+
acryl_datahub-1.0.0rc12.dist-info/METADATA,sha256=kcoZdCyTkho3L5zlg7gJR-uowp30pAdc5PK_OzHEoDE,175337
|
|
1027
|
+
acryl_datahub-1.0.0rc12.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
|
1028
|
+
acryl_datahub-1.0.0rc12.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
|
|
1029
|
+
acryl_datahub-1.0.0rc12.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1030
|
+
acryl_datahub-1.0.0rc12.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
-
from typing import Dict, Optional, Type, Union
|
|
3
|
+
from typing import Dict, Optional, Type, TypeVar, Union
|
|
4
4
|
|
|
5
5
|
from avrogen.dict_wrapper import DictWrapper
|
|
6
6
|
from pydantic import BaseModel
|
|
@@ -13,6 +13,7 @@ logger = logging.getLogger(__name__)
|
|
|
13
13
|
_REMAPPED_SCHEMA_TYPES = {
|
|
14
14
|
k.replace("pegasus2avro.", ""): v for k, v in SCHEMA_TYPES.items()
|
|
15
15
|
}
|
|
16
|
+
T = TypeVar("T", bound=BaseModel)
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
class SerializedResourceValue(BaseModel):
|
|
@@ -83,8 +84,8 @@ class SerializedResourceValue(BaseModel):
|
|
|
83
84
|
)
|
|
84
85
|
|
|
85
86
|
def as_pydantic_object(
|
|
86
|
-
self, model_type: Type[
|
|
87
|
-
) ->
|
|
87
|
+
self, model_type: Type[T], validate_schema_ref: bool = False
|
|
88
|
+
) -> T:
|
|
88
89
|
"""
|
|
89
90
|
Parse the blob into a Pydantic-defined Python object based on the schema type and schema
|
|
90
91
|
ref.
|
datahub/cli/check_cli.py
CHANGED
|
@@ -5,7 +5,8 @@ import pathlib
|
|
|
5
5
|
import pprint
|
|
6
6
|
import shutil
|
|
7
7
|
import tempfile
|
|
8
|
-
from
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from typing import Any, Dict, List, Optional, Union
|
|
9
10
|
|
|
10
11
|
import click
|
|
11
12
|
|
|
@@ -20,7 +21,10 @@ from datahub.ingestion.sink.sink_registry import sink_registry
|
|
|
20
21
|
from datahub.ingestion.source.source_registry import source_registry
|
|
21
22
|
from datahub.ingestion.transformer.transform_registry import transform_registry
|
|
22
23
|
from datahub.telemetry import telemetry
|
|
23
|
-
from datahub.utilities.file_backed_collections import
|
|
24
|
+
from datahub.utilities.file_backed_collections import (
|
|
25
|
+
ConnectionWrapper,
|
|
26
|
+
FileBackedDict,
|
|
27
|
+
)
|
|
24
28
|
|
|
25
29
|
logger = logging.getLogger(__name__)
|
|
26
30
|
|
|
@@ -391,29 +395,78 @@ def test_path_spec(config: str, input: str, path_spec_key: str) -> None:
|
|
|
391
395
|
raise e
|
|
392
396
|
|
|
393
397
|
|
|
398
|
+
def _jsonify(data: Any) -> Any:
|
|
399
|
+
if dataclasses.is_dataclass(data):
|
|
400
|
+
# dataclasses.asdict() is recursive. We're doing the recursion
|
|
401
|
+
# manually here via _jsonify calls, so we can't use
|
|
402
|
+
# dataclasses.asdict() here.
|
|
403
|
+
return {
|
|
404
|
+
f.name: _jsonify(getattr(data, f.name)) for f in dataclasses.fields(data)
|
|
405
|
+
}
|
|
406
|
+
elif isinstance(data, list):
|
|
407
|
+
return [_jsonify(item) for item in data]
|
|
408
|
+
elif isinstance(data, dict):
|
|
409
|
+
return {_jsonify(k): _jsonify(v) for k, v in data.items()}
|
|
410
|
+
elif isinstance(data, datetime):
|
|
411
|
+
return data.isoformat()
|
|
412
|
+
else:
|
|
413
|
+
return data
|
|
414
|
+
|
|
415
|
+
|
|
394
416
|
@check.command()
|
|
395
|
-
@click.argument("
|
|
396
|
-
|
|
397
|
-
def extract_sql_agg_log(query_log_file: str, output: Optional[str]) -> None:
|
|
417
|
+
@click.argument("db-file", type=click.Path(exists=True, dir_okay=False))
|
|
418
|
+
def extract_sql_agg_log(db_file: str) -> None:
|
|
398
419
|
"""Convert a sqlite db generated by the SqlParsingAggregator into a JSON."""
|
|
399
420
|
|
|
400
|
-
|
|
421
|
+
if pathlib.Path(db_file).suffix != ".db":
|
|
422
|
+
raise click.UsageError("DB file must be a sqlite db")
|
|
423
|
+
|
|
424
|
+
output_dir = pathlib.Path(db_file).with_suffix("")
|
|
425
|
+
output_dir.mkdir(exist_ok=True)
|
|
426
|
+
|
|
427
|
+
shared_connection = ConnectionWrapper(pathlib.Path(db_file))
|
|
428
|
+
|
|
429
|
+
tables: List[str] = [
|
|
430
|
+
row[0]
|
|
431
|
+
for row in shared_connection.execute(
|
|
432
|
+
"""\
|
|
433
|
+
SELECT
|
|
434
|
+
name
|
|
435
|
+
FROM
|
|
436
|
+
sqlite_schema
|
|
437
|
+
WHERE
|
|
438
|
+
type ='table' AND
|
|
439
|
+
name NOT LIKE 'sqlite_%';
|
|
440
|
+
""",
|
|
441
|
+
parameters={},
|
|
442
|
+
)
|
|
443
|
+
]
|
|
444
|
+
logger.info(f"Extracting {len(tables)} tables from {db_file}: {tables}")
|
|
445
|
+
|
|
446
|
+
for table in tables:
|
|
447
|
+
table_output_path = output_dir / f"{table}.json"
|
|
448
|
+
if table_output_path.exists():
|
|
449
|
+
logger.info(f"Skipping {table_output_path} because it already exists")
|
|
450
|
+
continue
|
|
401
451
|
|
|
402
|
-
|
|
452
|
+
# Some of the tables might actually be FileBackedList. Because
|
|
453
|
+
# the list is built on top of the FileBackedDict, we don't
|
|
454
|
+
# need to distinguish between the two cases.
|
|
403
455
|
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
)
|
|
408
|
-
logger.info(f"Extracting {len(query_log)} queries from {query_log_file}")
|
|
409
|
-
queries = [dataclasses.asdict(query) for query in query_log]
|
|
456
|
+
table_data: FileBackedDict[Any] = FileBackedDict(
|
|
457
|
+
shared_connection=shared_connection, tablename=table
|
|
458
|
+
)
|
|
410
459
|
|
|
411
|
-
|
|
412
|
-
with
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
460
|
+
data = {}
|
|
461
|
+
with click.progressbar(
|
|
462
|
+
table_data.items(), length=len(table_data), label=f"Extracting {table}"
|
|
463
|
+
) as items:
|
|
464
|
+
for k, v in items:
|
|
465
|
+
data[k] = _jsonify(v)
|
|
466
|
+
|
|
467
|
+
with open(table_output_path, "w") as f:
|
|
468
|
+
json.dump(data, f, indent=2, default=str)
|
|
469
|
+
logger.info(f"Extracted {len(data)} entries to {table_output_path}")
|
|
417
470
|
|
|
418
471
|
|
|
419
472
|
@check.command()
|
|
@@ -6,7 +6,10 @@ from typing import Any, Dict, Optional
|
|
|
6
6
|
from humanfriendly import format_timespan
|
|
7
7
|
from pydantic import Field, validator
|
|
8
8
|
from pyiceberg.catalog import Catalog, load_catalog
|
|
9
|
+
from pyiceberg.catalog.rest import RestCatalog
|
|
10
|
+
from requests.adapters import HTTPAdapter
|
|
9
11
|
from sortedcontainers import SortedList
|
|
12
|
+
from urllib3.util import Retry
|
|
10
13
|
|
|
11
14
|
from datahub.configuration.common import AllowDenyPattern, ConfigModel
|
|
12
15
|
from datahub.configuration.source_common import DatasetSourceConfigMixin
|
|
@@ -26,6 +29,23 @@ from datahub.utilities.stats_collections import TopKDict, int_top_k_dict
|
|
|
26
29
|
|
|
27
30
|
logger = logging.getLogger(__name__)
|
|
28
31
|
|
|
32
|
+
DEFAULT_REST_TIMEOUT = 120
|
|
33
|
+
DEFAULT_REST_RETRY_POLICY = {"total": 3, "backoff_factor": 0.1}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class TimeoutHTTPAdapter(HTTPAdapter):
|
|
37
|
+
def __init__(self, *args, **kwargs):
|
|
38
|
+
if "timeout" in kwargs:
|
|
39
|
+
self.timeout = kwargs["timeout"]
|
|
40
|
+
del kwargs["timeout"]
|
|
41
|
+
super().__init__(*args, **kwargs)
|
|
42
|
+
|
|
43
|
+
def send(self, request, **kwargs):
|
|
44
|
+
timeout = kwargs.get("timeout")
|
|
45
|
+
if timeout is None and hasattr(self, "timeout"):
|
|
46
|
+
kwargs["timeout"] = self.timeout
|
|
47
|
+
return super().send(request, **kwargs)
|
|
48
|
+
|
|
29
49
|
|
|
30
50
|
class IcebergProfilingConfig(ConfigModel):
|
|
31
51
|
enabled: bool = Field(
|
|
@@ -146,7 +166,26 @@ class IcebergSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin)
|
|
|
146
166
|
logger.debug(
|
|
147
167
|
"Initializing the catalog %s with config: %s", catalog_name, catalog_config
|
|
148
168
|
)
|
|
149
|
-
|
|
169
|
+
catalog = load_catalog(name=catalog_name, **catalog_config)
|
|
170
|
+
if isinstance(catalog, RestCatalog):
|
|
171
|
+
logger.debug(
|
|
172
|
+
"Recognized REST catalog type being configured, attempting to configure HTTP Adapter for the session"
|
|
173
|
+
)
|
|
174
|
+
retry_policy: Dict[str, Any] = DEFAULT_REST_RETRY_POLICY.copy()
|
|
175
|
+
retry_policy.update(catalog_config.get("connection", {}).get("retry", {}))
|
|
176
|
+
retries = Retry(**retry_policy)
|
|
177
|
+
logger.debug(f"Retry policy to be set: {retry_policy}")
|
|
178
|
+
timeout = catalog_config.get("connection", {}).get(
|
|
179
|
+
"timeout", DEFAULT_REST_TIMEOUT
|
|
180
|
+
)
|
|
181
|
+
logger.debug(f"Timeout to be set: {timeout}")
|
|
182
|
+
catalog._session.mount(
|
|
183
|
+
"http://", TimeoutHTTPAdapter(timeout=timeout, max_retries=retries)
|
|
184
|
+
)
|
|
185
|
+
catalog._session.mount(
|
|
186
|
+
"https://", TimeoutHTTPAdapter(timeout=timeout, max_retries=retries)
|
|
187
|
+
)
|
|
188
|
+
return catalog
|
|
150
189
|
|
|
151
190
|
|
|
152
191
|
class TopTableTimings:
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import pathlib
|
|
3
|
+
import re
|
|
3
4
|
import time
|
|
4
5
|
from dataclasses import dataclass, field
|
|
5
6
|
from typing import Any, Dict, Iterable, List, Optional, TypeVar, Union
|
|
@@ -118,17 +119,58 @@ class BusinessGlossaryConfig(DefaultConfig):
|
|
|
118
119
|
return v
|
|
119
120
|
|
|
120
121
|
|
|
122
|
+
def clean_url(text: str) -> str:
|
|
123
|
+
"""
|
|
124
|
+
Clean text for use in URLs by:
|
|
125
|
+
1. Replacing spaces with hyphens
|
|
126
|
+
2. Removing special characters (preserving hyphens and periods)
|
|
127
|
+
3. Collapsing multiple hyphens and periods into single ones
|
|
128
|
+
"""
|
|
129
|
+
# Replace spaces with hyphens
|
|
130
|
+
text = text.replace(" ", "-")
|
|
131
|
+
# Remove special characters except hyphens and periods
|
|
132
|
+
text = re.sub(r"[^a-zA-Z0-9\-.]", "", text)
|
|
133
|
+
# Collapse multiple hyphens into one
|
|
134
|
+
text = re.sub(r"-+", "-", text)
|
|
135
|
+
# Collapse multiple periods into one
|
|
136
|
+
text = re.sub(r"\.+", ".", text)
|
|
137
|
+
# Remove leading/trailing hyphens and periods
|
|
138
|
+
text = text.strip("-.")
|
|
139
|
+
return text
|
|
140
|
+
|
|
141
|
+
|
|
121
142
|
def create_id(path: List[str], default_id: Optional[str], enable_auto_id: bool) -> str:
|
|
143
|
+
"""
|
|
144
|
+
Create an ID for a glossary node or term.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
path: List of path components leading to this node/term
|
|
148
|
+
default_id: Optional manually specified ID
|
|
149
|
+
enable_auto_id: Whether to generate GUIDs
|
|
150
|
+
"""
|
|
122
151
|
if default_id is not None:
|
|
123
|
-
return default_id #
|
|
152
|
+
return default_id # Use explicitly provided ID
|
|
124
153
|
|
|
125
154
|
id_: str = ".".join(path)
|
|
126
155
|
|
|
127
|
-
|
|
128
|
-
|
|
156
|
+
# Check for non-ASCII characters before cleaning
|
|
157
|
+
if any(ord(c) > 127 for c in id_):
|
|
158
|
+
return datahub_guid({"path": id_})
|
|
129
159
|
|
|
130
160
|
if enable_auto_id:
|
|
161
|
+
# Generate GUID for auto_id mode
|
|
131
162
|
id_ = datahub_guid({"path": id_})
|
|
163
|
+
else:
|
|
164
|
+
# Clean the URL for better readability when not using auto_id
|
|
165
|
+
id_ = clean_url(id_)
|
|
166
|
+
|
|
167
|
+
# Force auto_id if the cleaned URL still contains problematic characters
|
|
168
|
+
if UrnEncoder.contains_extended_reserved_char(id_):
|
|
169
|
+
logger.warning(
|
|
170
|
+
f"ID '{id_}' contains problematic characters after URL cleaning. Falling back to GUID generation for stability."
|
|
171
|
+
)
|
|
172
|
+
id_ = datahub_guid({"path": id_})
|
|
173
|
+
|
|
132
174
|
return id_
|
|
133
175
|
|
|
134
176
|
|
|
@@ -128,6 +128,10 @@ class RedshiftConfig(
|
|
|
128
128
|
default=True,
|
|
129
129
|
description="Whether lineage should be collected from copy commands",
|
|
130
130
|
)
|
|
131
|
+
include_share_lineage: bool = Field(
|
|
132
|
+
default=True,
|
|
133
|
+
description="Whether lineage should be collected from datashares",
|
|
134
|
+
)
|
|
131
135
|
|
|
132
136
|
include_usage_statistics: bool = Field(
|
|
133
137
|
default=False,
|