acryl-datahub 1.0.0rc10__py3-none-any.whl → 1.0.0rc12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (29) hide show
  1. {acryl_datahub-1.0.0rc10.dist-info → acryl_datahub-1.0.0rc12.dist-info}/METADATA +2513 -2513
  2. {acryl_datahub-1.0.0rc10.dist-info → acryl_datahub-1.0.0rc12.dist-info}/RECORD +29 -28
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/common/serialized_value.py +4 -3
  5. datahub/cli/check_cli.py +72 -19
  6. datahub/ingestion/source/iceberg/iceberg_common.py +40 -1
  7. datahub/ingestion/source/metadata/business_glossary.py +45 -3
  8. datahub/ingestion/source/redshift/config.py +4 -0
  9. datahub/ingestion/source/redshift/datashares.py +236 -0
  10. datahub/ingestion/source/redshift/lineage.py +6 -2
  11. datahub/ingestion/source/redshift/lineage_v2.py +7 -4
  12. datahub/ingestion/source/redshift/profile.py +1 -1
  13. datahub/ingestion/source/redshift/query.py +125 -33
  14. datahub/ingestion/source/redshift/redshift.py +41 -72
  15. datahub/ingestion/source/redshift/redshift_schema.py +166 -6
  16. datahub/ingestion/source/redshift/report.py +3 -0
  17. datahub/ingestion/source/sql/oracle.py +93 -63
  18. datahub/metadata/_schema_classes.py +5 -5
  19. datahub/metadata/schema.avsc +2 -1
  20. datahub/metadata/schemas/DomainKey.avsc +2 -1
  21. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  22. datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -1
  23. datahub/metadata/schemas/MLModelGroupKey.avsc +2 -1
  24. datahub/metadata/schemas/MLModelKey.avsc +2 -1
  25. datahub/sql_parsing/sql_parsing_common.py +7 -0
  26. {acryl_datahub-1.0.0rc10.dist-info → acryl_datahub-1.0.0rc12.dist-info}/LICENSE +0 -0
  27. {acryl_datahub-1.0.0rc10.dist-info → acryl_datahub-1.0.0rc12.dist-info}/WHEEL +0 -0
  28. {acryl_datahub-1.0.0rc10.dist-info → acryl_datahub-1.0.0rc12.dist-info}/entry_points.txt +0 -0
  29. {acryl_datahub-1.0.0rc10.dist-info → acryl_datahub-1.0.0rc12.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
- datahub/_version.py,sha256=FaV2BCpaCxlMvv5cXKYFF-MDOTi4jF-QgH3AQNNnof0,322
3
+ datahub/_version.py,sha256=x2yvrMHLCX-bvzooDchpmcuWaCxdI_YKEWTKESO_3Ow,322
4
4
  datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
5
5
  datahub/errors.py,sha256=w6h8b27j9XlmPbTwqpu7-wgiTrXlHzcnUOnJ_iOrwzo,520
6
6
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -27,7 +27,7 @@ datahub/api/entities/assertion/sql_assertion.py,sha256=myJU-Wf8O-RbiyU_Xlbp2cacw
27
27
  datahub/api/entities/assertion/volume_assertion.py,sha256=37bNLGP-81MvcZj_cVHvrdw5I4aBxkER0xN0ZqyB3NU,3360
28
28
  datahub/api/entities/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
29
  datahub/api/entities/common/data_platform_instance.py,sha256=AVqQ-yactNZi_bislIEUcQZCGovaHY-gQi1EY7PVsT4,1065
30
- datahub/api/entities/common/serialized_value.py,sha256=pfw16vH2sG6U5kgBsmuEFBxJ0vL9CZpOQ5HZGOFbbQ0,5538
30
+ datahub/api/entities/common/serialized_value.py,sha256=DFPK7p4OwqRTOnH8luEWzqH_4vQHZSNxFIL63x_o2ok,5565
31
31
  datahub/api/entities/corpgroup/__init__.py,sha256=Uf3SxsZUSY-yZ2Kx3-1dWwz600D1C4Ds_z_nG7hwanA,63
32
32
  datahub/api/entities/corpgroup/corpgroup.py,sha256=XSrGHCwl7lMNtzWviMzZbw8VDdesXC2HLZP5kpHt2fQ,8878
33
33
  datahub/api/entities/corpuser/__init__.py,sha256=RspO1ceu6q2zUqYqZqRRY_MPcP7PNdd2lQoZn-KfeQE,60
@@ -60,7 +60,7 @@ datahub/api/graphql/assertion.py,sha256=ponITypRQ8vE8kiqRNpvdoniNJzi4aeBK97UvkF0
60
60
  datahub/api/graphql/base.py,sha256=9q637r6v-RGOd8Mk8HW2g0vt9zpqFexsQ5R6TPEHVbs,1614
61
61
  datahub/api/graphql/operation.py,sha256=h7OXbVRrpJgoth1X4cgeIFhD5JY1MGKg2KjVlQK1gqE,5116
62
62
  datahub/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
63
- datahub/cli/check_cli.py,sha256=ajrWVMAHYbgvYi4OFitFXx7Y6oigvZFgIeUiKV9ECik,12859
63
+ datahub/cli/check_cli.py,sha256=GpAM7k1GMIIE7zQ6GHnu_78aSc4mPiEu2BaKAsYGPkA,14310
64
64
  datahub/cli/cli_utils.py,sha256=2uvPv6WqxbRdH7UteHwhRash4E0ncU5P6XebrFLeECo,13584
65
65
  datahub/cli/config_utils.py,sha256=yuXw7RzpRY5x_-MAoqWbv46qUkIeRNAJL4_OeJpYdBE,4879
66
66
  datahub/cli/container_cli.py,sha256=uDOwewGEPYHQt-ppYEb8ESXhZjPNIZG0Rt3cm2FzPqc,1569
@@ -324,7 +324,7 @@ datahub/ingestion/source/grafana/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
324
324
  datahub/ingestion/source/grafana/grafana_source.py,sha256=3pU3xodPgS5lmnjuQ_u7F0XPzD_Y8MnPlMxRJ86qz4g,4960
325
325
  datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
326
326
  datahub/ingestion/source/iceberg/iceberg.py,sha256=pMWQtn88XAYwZsRNkICX1GlQOqOnyuWdLpkcjVQEon0,29039
327
- datahub/ingestion/source/iceberg/iceberg_common.py,sha256=krt-41r90t0CkNeJXsiwO-p5zJIulI-tyq3xaU2yw_c,10645
327
+ datahub/ingestion/source/iceberg/iceberg_common.py,sha256=VGosqYPmn_j6GETSnDHZ8Ay1BVOedmx2x5LHxw16I3A,12278
328
328
  datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=CkBB5fryMVoqqCM6eLSIeb4yP85ABHONNRm0QqZKrnw,9977
329
329
  datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
330
330
  datahub/ingestion/source/identity/azure_ad.py,sha256=9Hrvm4CSfc02yjnPUsCYSY4Qw9fXPnDFWLexab0mcpc,28559
@@ -360,7 +360,7 @@ datahub/ingestion/source/looker/str_functions.py,sha256=zceEX2ka_4WaWwWgEdyknUSz
360
360
  datahub/ingestion/source/looker/urn_functions.py,sha256=4VvqEfGvIMq3rNHHps0-HlPurMPnpqdxNtDAOOHIZww,528
361
361
  datahub/ingestion/source/looker/view_upstream.py,sha256=4FCjZaU6p2G7npB2RJpP4Gv2yLjbvbsYWEbAg55IvjY,26110
362
362
  datahub/ingestion/source/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
363
- datahub/ingestion/source/metadata/business_glossary.py,sha256=yySwJp2SCUQp8hRwN2lQuSqvOQowIhCKDKj9syhlTZA,18210
363
+ datahub/ingestion/source/metadata/business_glossary.py,sha256=T_RJHst6iQRghJNmLLPeSBMEDsbEKf3yBldOAgMcGuo,19666
364
364
  datahub/ingestion/source/metadata/lineage.py,sha256=2iK-hsORWm7NSvMZcG4D5hb8_PH57g-u6LWbu_f7HM4,9521
365
365
  datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
366
366
  datahub/ingestion/source/neo4j/neo4j_source.py,sha256=O3jjdnsx7IyYPBLbxowL85Qo4zs4H-maMOH4-6ZNCk4,13063
@@ -396,16 +396,17 @@ datahub/ingestion/source/qlik_sense/qlik_api.py,sha256=KoBaD1VowYrbaRg1rjDP1_mmP
396
396
  datahub/ingestion/source/qlik_sense/qlik_sense.py,sha256=bmhmOgSXzC6g-uqO1ljFLRNz2oo6Xjn400UQnWdMA1Y,22530
397
397
  datahub/ingestion/source/qlik_sense/websocket_connection.py,sha256=jp39OInvjCN9BtnKsHU_aa1B3X9hVHqSmD25stXuqHk,1940
398
398
  datahub/ingestion/source/redshift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
399
- datahub/ingestion/source/redshift/config.py,sha256=S0yQ9wZKdGjWeeziWHpPECJ3wGYWBIsdhS3YJ5oAX_Y,8853
399
+ datahub/ingestion/source/redshift/config.py,sha256=l_hlgsCjvlcgcFQpd5WMKlW8nqQUhaMGec8FnUbSl6Y,8997
400
+ datahub/ingestion/source/redshift/datashares.py,sha256=kH3YkoenOa59XZU12XeUf283lOOAITYD9jOXpy8R06E,9227
400
401
  datahub/ingestion/source/redshift/exception.py,sha256=dxzYUIv5B_FAWhOuzG2u5We7FX-ar4jhOXPXAlEIvgM,2055
401
- datahub/ingestion/source/redshift/lineage.py,sha256=bUy0uJowrqSc33Z50fIxFlJkyhe-OPM_qgPh-smSTgM,43983
402
- datahub/ingestion/source/redshift/lineage_v2.py,sha256=OcVW_27sSaZOYZPTd2j-LS9SzFQ1kXz6cMzM2ZDWhJQ,16751
403
- datahub/ingestion/source/redshift/profile.py,sha256=T4H79ycq2tPobLM1tTLRtu581Qa8LlKxEok49m0AirU,4294
404
- datahub/ingestion/source/redshift/query.py,sha256=X0KlDPzM68j0SYKXhq50DkLbFUIbGuPmGCYYmr8E0v0,44353
405
- datahub/ingestion/source/redshift/redshift.py,sha256=x9dKocJdGPaNs2fRdaddaBtZNxmTJFwYDhXY5nl_5zM,44444
402
+ datahub/ingestion/source/redshift/lineage.py,sha256=Gk2dNuRBEipZkY5W1sArlfRbFR7mBKutCFHHTrn3yX4,44096
403
+ datahub/ingestion/source/redshift/lineage_v2.py,sha256=H6Qky5dLeZEICdDWyH-My78NoKlXpExHg3m-6d5lbgo,16891
404
+ datahub/ingestion/source/redshift/profile.py,sha256=jqFQUSg_qzSYi1yIAq24NFwHW8yIcSDSSh-vgJ4nl6M,4287
405
+ datahub/ingestion/source/redshift/query.py,sha256=6Fw3I8qFLflySDu6WY5D9NjXnRnDIw0yxKisSpaHh0A,47526
406
+ datahub/ingestion/source/redshift/redshift.py,sha256=IZqeQws3mvDdu9K-ixPGZNalDcRRRse-l_TTwQI7B-4,43407
406
407
  datahub/ingestion/source/redshift/redshift_data_reader.py,sha256=zc69jwXHdF-w8J4Hq-ZQ6BjHQ75Ij2iNDMpoRJlcmlU,1724
407
- datahub/ingestion/source/redshift/redshift_schema.py,sha256=9IYeUsnISenq3eVB3k-s7zK8nInWDAYViFnDrNjtkb0,19149
408
- datahub/ingestion/source/redshift/report.py,sha256=M19aUHBkd9n-BVBX4fRhyRNdVkN2b9Es6ZqInRx5ZGI,2958
408
+ datahub/ingestion/source/redshift/redshift_schema.py,sha256=WTc-j4_PYlFgaJZ3hEorGIBWKruTX57E7V_5JaUe8mU,24045
409
+ datahub/ingestion/source/redshift/report.py,sha256=O3QFozHlmMbH9b7KxbqhgTgr_0tCryj6FIzMiN6kRxw,3044
409
410
  datahub/ingestion/source/redshift/usage.py,sha256=eSdB1MYZeQokkQOwl9LPdpo-oCBJSwxJBotSpJ9XjBc,17473
410
411
  datahub/ingestion/source/s3/__init__.py,sha256=HjqFPj11WtNFZM3kcVshlDb7kOsc19-l_3LM8PBjlJM,56
411
412
  datahub/ingestion/source/s3/config.py,sha256=lElFXgEpKDT9SVoiXvtx98wV6Gp880qP4pLQaOGJGOo,7828
@@ -464,7 +465,7 @@ datahub/ingestion/source/sql/hive.py,sha256=NRUrEWnR1JN5U0q4CHlRacdKzxJhS4unFXnX
464
465
  datahub/ingestion/source/sql/hive_metastore.py,sha256=fH7bAcljapYqmF8cQE7humoufFe2RVFRYOcyavMg9yo,36103
465
466
  datahub/ingestion/source/sql/mariadb.py,sha256=Hm102kmfs_1rd4lsTYhzVMZq5S3B6cyfvpHSzJjqvMw,737
466
467
  datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_k94o7g-0,3350
467
- datahub/ingestion/source/sql/oracle.py,sha256=pQeeQarUZpC7q09zL0LlPZB0aCwHU3QBRSzxyLHGIKY,26222
468
+ datahub/ingestion/source/sql/oracle.py,sha256=it9qhUkGRHTq_F5DoEsCBLYnB02divzxDlBvXACH4Pk,27712
468
469
  datahub/ingestion/source/sql/postgres.py,sha256=uC1kYEI8VdxiZ1Y9IxMWzwmg11wtMqYN0e2fkok1rxo,11972
469
470
  datahub/ingestion/source/sql/presto.py,sha256=PB-CS5MX2dSRFRHjlxfkLHGXLZXFNCsVAAyRBtY6HMg,3611
470
471
  datahub/ingestion/source/sql/sql_common.py,sha256=r75Cd06Qwe2fqTDRZKWnIf7kpnR0BSxZ9PYBOgY0I6k,48785
@@ -575,8 +576,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
575
576
  datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
576
577
  datahub/lite/lite_util.py,sha256=Cm6trMTeo0X1fv4nSsW9lC0jqce7Jt-05GhOtIGzsVc,4559
577
578
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
578
- datahub/metadata/_schema_classes.py,sha256=lQPz3jGfTOCBidsC_Ap62XqRD9A4AKPqhHa3CV9jyL0,993316
579
- datahub/metadata/schema.avsc,sha256=iQJaPYHy4xrGQBEbRgn-RF4kGC1iNPhZawHTAYTyfW0,741430
579
+ datahub/metadata/_schema_classes.py,sha256=uafVvWsnAqPranXzeC9CrSAu7I1-XJOogtiBPhxmn-k,993397
580
+ datahub/metadata/schema.avsc,sha256=uPWX2Rx9A12b-p4ef4zrsjbtQPSIH8w67l3B6pq6zE0,741459
580
581
  datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
581
582
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
582
583
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
@@ -745,7 +746,7 @@ datahub/metadata/schemas/DatasetUsageStatistics.avsc,sha256=JKNy_KlUqr3kt7o1Cu2D
745
746
  datahub/metadata/schemas/Deprecation.avsc,sha256=SmbTlMB9fujdMBjYEQkzaU4XJzwM1gD6E8L2zoL1b4Q,1280
746
747
  datahub/metadata/schemas/DisplayProperties.avsc,sha256=MTa_g2s0roxNFFggWU8rslUH3UFe3xe11uUXyh0Go_I,1732
747
748
  datahub/metadata/schemas/Documentation.avsc,sha256=9vIJG9B08FFrC3y5c1XVaT5U3c-b5sOAc5foUxMnyCs,4836
748
- datahub/metadata/schemas/DomainKey.avsc,sha256=1_kbsMTsO2ebB3zW7KpB71QfkGGR0mAgpNOKRoWHsJU,649
749
+ datahub/metadata/schemas/DomainKey.avsc,sha256=TYCcJRWqwbxbQuR5E68pvdeAmfVdYsJuMNhTxVphbqg,676
749
750
  datahub/metadata/schemas/DomainProperties.avsc,sha256=6do6wZ9G6gyt1QowQyi1xldqgdTXspb05FaqWpKJ6eM,3843
750
751
  datahub/metadata/schemas/Domains.avsc,sha256=5mRQcba6Zmp6Y1srbxhOjETutg0I_ZG4ikuS2r9fkR0,804
751
752
  datahub/metadata/schemas/DynamicFormAssignment.avsc,sha256=SXRL5D6kIYWdGl3zLQYxPnkQX71JXQOKrjQNavFqVp0,7339
@@ -783,7 +784,7 @@ datahub/metadata/schemas/GlobalSettingsInfo.avsc,sha256=OVMM6FwhHhufHkezYcVePK0z
783
784
  datahub/metadata/schemas/GlobalSettingsKey.avsc,sha256=Yj8s5IdM9yF7xrhJcLGCPCXBWqSsrPbufBaQjlZ3JlU,563
784
785
  datahub/metadata/schemas/GlobalTags.avsc,sha256=-SurkodMqTDnPpkRV6qYqmpNWjQNvynUiPZX7EhL5uc,4624
785
786
  datahub/metadata/schemas/GlossaryNodeInfo.avsc,sha256=G1Cb-w9VxIAEhNqyiEsDL_ABRO9QxyTpUANKU6DQrFw,1888
786
- datahub/metadata/schemas/GlossaryNodeKey.avsc,sha256=XzzrUEpRCxNjWfdjXBTuYrmeMWK_-eCXw49K_eOswuw,637
787
+ datahub/metadata/schemas/GlossaryNodeKey.avsc,sha256=hT8ny4TL1WvgFvnaVBjuw6AWDiPDjpkh20f83ZT-UZ8,664
787
788
  datahub/metadata/schemas/GlossaryRelatedTerms.avsc,sha256=ZTP0mrFD4y-C6JekRy8IVuHvICUkJib-ZAYD93Gv1tA,2763
788
789
  datahub/metadata/schemas/GlossaryTermInfo.avsc,sha256=j4s9NCyMOIF03HfaXoQEIkiMTRaCy_-euhenptfu7IA,2935
789
790
  datahub/metadata/schemas/GlossaryTermKey.avsc,sha256=mkyrzmOX_BGRHbcj2ccUALbrPVJNdQbItU-VyKN7P98,836
@@ -806,12 +807,12 @@ datahub/metadata/schemas/MLFeatureTableKey.avsc,sha256=6_typ7K0Bz8x62T31IYqf9XS9
806
807
  datahub/metadata/schemas/MLFeatureTableProperties.avsc,sha256=BtrqcsxoQXObPZXSGRNYtIBJCoeHkMK_Zr_imBWF2Zk,2008
807
808
  datahub/metadata/schemas/MLHyperParam.avsc,sha256=dE6i5r6LTYMNrQe9yy-jKoP09GOJUf__1bO69ldpydc,833
808
809
  datahub/metadata/schemas/MLMetric.avsc,sha256=y8WPVVwjhu3YGtqpFFJYNYK8w778RRL_d2sHG1Dc7uM,804
809
- datahub/metadata/schemas/MLModelDeploymentKey.avsc,sha256=gmXaUYxII8BVLnXOFdlPmyhD1rUhrw455R_hL77foSU,2406
810
+ datahub/metadata/schemas/MLModelDeploymentKey.avsc,sha256=vt04jFF_ZHSvWhqLoxC8C_KspiRLkvNNIXJI0aKPF1Q,2425
810
811
  datahub/metadata/schemas/MLModelDeploymentProperties.avsc,sha256=I3v-uNOeYxO4hooPHOjafWWHuVyeGvG90oma0tzpNFg,5409
811
812
  datahub/metadata/schemas/MLModelFactorPrompts.avsc,sha256=8kX-P4F4mVLFT980z3MwIautt1_6uA-c_Z87nYNDK-k,2712
812
- datahub/metadata/schemas/MLModelGroupKey.avsc,sha256=CL3DS8BfC2hOPmuUPaLcur0IUFg9Cnexc7bQ2lRgBfI,2478
813
+ datahub/metadata/schemas/MLModelGroupKey.avsc,sha256=3LoMWejMfCwdoqz3PFinRbY1_Yy4Kypw7pwg3tL42Jg,2497
813
814
  datahub/metadata/schemas/MLModelGroupProperties.avsc,sha256=zMl6ab6zfcYJmt31f-AUrrfeqfLoaSZQpfB3_S9JFFQ,6534
814
- datahub/metadata/schemas/MLModelKey.avsc,sha256=0D6IECBL-Y5FHMdUpMEnd3e0JjoQ1YCtNTjoUovJuHU,2847
815
+ datahub/metadata/schemas/MLModelKey.avsc,sha256=pRntMhcpgTJL2T2nGK6Sf9_q2vJOqHELYFh59VMXqv0,2866
815
816
  datahub/metadata/schemas/MLModelProperties.avsc,sha256=hDCBHxGe-cmCBeU1k0ANuQlKjtZsDcTfl2X_jWmtFqo,12355
816
817
  datahub/metadata/schemas/MLPrimaryKeyKey.avsc,sha256=Kq2Q9WxZ6nQ8wR4P6wpPCI-J7FwXQyoa10s6BvXtkm8,1110
817
818
  datahub/metadata/schemas/MLPrimaryKeyProperties.avsc,sha256=URIuOpS93RVk8MZVcbZ-dmTwu_cN3KSOKxSR8fm-eTo,6744
@@ -913,7 +914,7 @@ datahub/sql_parsing/query_types.py,sha256=FKjDzszZzsrCfYfm7dgD6T_8865qxWl767fdGy
913
914
  datahub/sql_parsing/schema_resolver.py,sha256=ISuingLcQnOJZkNXBkc73uPwYUbbOtERAjgGhJajDiQ,10782
914
915
  datahub/sql_parsing/split_statements.py,sha256=6KUoIPG7H8Rja3lrPjSrSfhFfwW4oqgfoNQeTbbOWNg,8953
915
916
  datahub/sql_parsing/sql_parsing_aggregator.py,sha256=XNZWjeaRhzaT92mzsJZGJfYaxJENsyp5dSHTmL81RIc,70130
916
- datahub/sql_parsing/sql_parsing_common.py,sha256=h_V_m54hJ9EUh5kczq7cYOIeNeo4bgf0Px0H-Nq-UIg,2602
917
+ datahub/sql_parsing/sql_parsing_common.py,sha256=cZ4WvVyHZuXDGjnBvKMX2_fz2EMextB5WQWcK0_saBo,3155
917
918
  datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
918
919
  datahub/sql_parsing/sqlglot_lineage.py,sha256=l0kT8MuRIg96X7BNJaboMznF54b-yvM2nMTLyF2d0Nw,47446
919
920
  datahub/sql_parsing/sqlglot_utils.py,sha256=6W6MQ5Yh0xXT9_h0jd19yoGWMdXicyRBDD_FwV7nj04,14701
@@ -1021,9 +1022,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1021
1022
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1022
1023
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1023
1024
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1024
- acryl_datahub-1.0.0rc10.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1025
- acryl_datahub-1.0.0rc10.dist-info/METADATA,sha256=KibwklUZSYWViDayJ8MX8bAVCp7PPGAQsqfyBdWoSHM,175337
1026
- acryl_datahub-1.0.0rc10.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
1027
- acryl_datahub-1.0.0rc10.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
1028
- acryl_datahub-1.0.0rc10.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1029
- acryl_datahub-1.0.0rc10.dist-info/RECORD,,
1025
+ acryl_datahub-1.0.0rc12.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1026
+ acryl_datahub-1.0.0rc12.dist-info/METADATA,sha256=kcoZdCyTkho3L5zlg7gJR-uowp30pAdc5PK_OzHEoDE,175337
1027
+ acryl_datahub-1.0.0rc12.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
1028
+ acryl_datahub-1.0.0rc12.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
1029
+ acryl_datahub-1.0.0rc12.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1030
+ acryl_datahub-1.0.0rc12.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.0.0rc10"
3
+ __version__ = "1.0.0rc12"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -1,6 +1,6 @@
1
1
  import json
2
2
  import logging
3
- from typing import Dict, Optional, Type, Union
3
+ from typing import Dict, Optional, Type, TypeVar, Union
4
4
 
5
5
  from avrogen.dict_wrapper import DictWrapper
6
6
  from pydantic import BaseModel
@@ -13,6 +13,7 @@ logger = logging.getLogger(__name__)
13
13
  _REMAPPED_SCHEMA_TYPES = {
14
14
  k.replace("pegasus2avro.", ""): v for k, v in SCHEMA_TYPES.items()
15
15
  }
16
+ T = TypeVar("T", bound=BaseModel)
16
17
 
17
18
 
18
19
  class SerializedResourceValue(BaseModel):
@@ -83,8 +84,8 @@ class SerializedResourceValue(BaseModel):
83
84
  )
84
85
 
85
86
  def as_pydantic_object(
86
- self, model_type: Type[BaseModel], validate_schema_ref: bool = False
87
- ) -> BaseModel:
87
+ self, model_type: Type[T], validate_schema_ref: bool = False
88
+ ) -> T:
88
89
  """
89
90
  Parse the blob into a Pydantic-defined Python object based on the schema type and schema
90
91
  ref.
datahub/cli/check_cli.py CHANGED
@@ -5,7 +5,8 @@ import pathlib
5
5
  import pprint
6
6
  import shutil
7
7
  import tempfile
8
- from typing import Dict, List, Optional, Union
8
+ from datetime import datetime
9
+ from typing import Any, Dict, List, Optional, Union
9
10
 
10
11
  import click
11
12
 
@@ -20,7 +21,10 @@ from datahub.ingestion.sink.sink_registry import sink_registry
20
21
  from datahub.ingestion.source.source_registry import source_registry
21
22
  from datahub.ingestion.transformer.transform_registry import transform_registry
22
23
  from datahub.telemetry import telemetry
23
- from datahub.utilities.file_backed_collections import ConnectionWrapper, FileBackedList
24
+ from datahub.utilities.file_backed_collections import (
25
+ ConnectionWrapper,
26
+ FileBackedDict,
27
+ )
24
28
 
25
29
  logger = logging.getLogger(__name__)
26
30
 
@@ -391,29 +395,78 @@ def test_path_spec(config: str, input: str, path_spec_key: str) -> None:
391
395
  raise e
392
396
 
393
397
 
398
+ def _jsonify(data: Any) -> Any:
399
+ if dataclasses.is_dataclass(data):
400
+ # dataclasses.asdict() is recursive. We're doing the recursion
401
+ # manually here via _jsonify calls, so we can't use
402
+ # dataclasses.asdict() here.
403
+ return {
404
+ f.name: _jsonify(getattr(data, f.name)) for f in dataclasses.fields(data)
405
+ }
406
+ elif isinstance(data, list):
407
+ return [_jsonify(item) for item in data]
408
+ elif isinstance(data, dict):
409
+ return {_jsonify(k): _jsonify(v) for k, v in data.items()}
410
+ elif isinstance(data, datetime):
411
+ return data.isoformat()
412
+ else:
413
+ return data
414
+
415
+
394
416
  @check.command()
395
- @click.argument("query-log-file", type=click.Path(exists=True, dir_okay=False))
396
- @click.option("--output", type=click.Path())
397
- def extract_sql_agg_log(query_log_file: str, output: Optional[str]) -> None:
417
+ @click.argument("db-file", type=click.Path(exists=True, dir_okay=False))
418
+ def extract_sql_agg_log(db_file: str) -> None:
398
419
  """Convert a sqlite db generated by the SqlParsingAggregator into a JSON."""
399
420
 
400
- from datahub.sql_parsing.sql_parsing_aggregator import LoggedQuery
421
+ if pathlib.Path(db_file).suffix != ".db":
422
+ raise click.UsageError("DB file must be a sqlite db")
423
+
424
+ output_dir = pathlib.Path(db_file).with_suffix("")
425
+ output_dir.mkdir(exist_ok=True)
426
+
427
+ shared_connection = ConnectionWrapper(pathlib.Path(db_file))
428
+
429
+ tables: List[str] = [
430
+ row[0]
431
+ for row in shared_connection.execute(
432
+ """\
433
+ SELECT
434
+ name
435
+ FROM
436
+ sqlite_schema
437
+ WHERE
438
+ type ='table' AND
439
+ name NOT LIKE 'sqlite_%';
440
+ """,
441
+ parameters={},
442
+ )
443
+ ]
444
+ logger.info(f"Extracting {len(tables)} tables from {db_file}: {tables}")
445
+
446
+ for table in tables:
447
+ table_output_path = output_dir / f"{table}.json"
448
+ if table_output_path.exists():
449
+ logger.info(f"Skipping {table_output_path} because it already exists")
450
+ continue
401
451
 
402
- assert dataclasses.is_dataclass(LoggedQuery)
452
+ # Some of the tables might actually be FileBackedList. Because
453
+ # the list is built on top of the FileBackedDict, we don't
454
+ # need to distinguish between the two cases.
403
455
 
404
- shared_connection = ConnectionWrapper(pathlib.Path(query_log_file))
405
- query_log = FileBackedList[LoggedQuery](
406
- shared_connection=shared_connection, tablename="stored_queries"
407
- )
408
- logger.info(f"Extracting {len(query_log)} queries from {query_log_file}")
409
- queries = [dataclasses.asdict(query) for query in query_log]
456
+ table_data: FileBackedDict[Any] = FileBackedDict(
457
+ shared_connection=shared_connection, tablename=table
458
+ )
410
459
 
411
- if output:
412
- with open(output, "w") as f:
413
- json.dump(queries, f, indent=2, default=str)
414
- logger.info(f"Extracted {len(queries)} queries to {output}")
415
- else:
416
- click.echo(json.dumps(queries, indent=2))
460
+ data = {}
461
+ with click.progressbar(
462
+ table_data.items(), length=len(table_data), label=f"Extracting {table}"
463
+ ) as items:
464
+ for k, v in items:
465
+ data[k] = _jsonify(v)
466
+
467
+ with open(table_output_path, "w") as f:
468
+ json.dump(data, f, indent=2, default=str)
469
+ logger.info(f"Extracted {len(data)} entries to {table_output_path}")
417
470
 
418
471
 
419
472
  @check.command()
@@ -6,7 +6,10 @@ from typing import Any, Dict, Optional
6
6
  from humanfriendly import format_timespan
7
7
  from pydantic import Field, validator
8
8
  from pyiceberg.catalog import Catalog, load_catalog
9
+ from pyiceberg.catalog.rest import RestCatalog
10
+ from requests.adapters import HTTPAdapter
9
11
  from sortedcontainers import SortedList
12
+ from urllib3.util import Retry
10
13
 
11
14
  from datahub.configuration.common import AllowDenyPattern, ConfigModel
12
15
  from datahub.configuration.source_common import DatasetSourceConfigMixin
@@ -26,6 +29,23 @@ from datahub.utilities.stats_collections import TopKDict, int_top_k_dict
26
29
 
27
30
  logger = logging.getLogger(__name__)
28
31
 
32
+ DEFAULT_REST_TIMEOUT = 120
33
+ DEFAULT_REST_RETRY_POLICY = {"total": 3, "backoff_factor": 0.1}
34
+
35
+
36
+ class TimeoutHTTPAdapter(HTTPAdapter):
37
+ def __init__(self, *args, **kwargs):
38
+ if "timeout" in kwargs:
39
+ self.timeout = kwargs["timeout"]
40
+ del kwargs["timeout"]
41
+ super().__init__(*args, **kwargs)
42
+
43
+ def send(self, request, **kwargs):
44
+ timeout = kwargs.get("timeout")
45
+ if timeout is None and hasattr(self, "timeout"):
46
+ kwargs["timeout"] = self.timeout
47
+ return super().send(request, **kwargs)
48
+
29
49
 
30
50
  class IcebergProfilingConfig(ConfigModel):
31
51
  enabled: bool = Field(
@@ -146,7 +166,26 @@ class IcebergSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin)
146
166
  logger.debug(
147
167
  "Initializing the catalog %s with config: %s", catalog_name, catalog_config
148
168
  )
149
- return load_catalog(name=catalog_name, **catalog_config)
169
+ catalog = load_catalog(name=catalog_name, **catalog_config)
170
+ if isinstance(catalog, RestCatalog):
171
+ logger.debug(
172
+ "Recognized REST catalog type being configured, attempting to configure HTTP Adapter for the session"
173
+ )
174
+ retry_policy: Dict[str, Any] = DEFAULT_REST_RETRY_POLICY.copy()
175
+ retry_policy.update(catalog_config.get("connection", {}).get("retry", {}))
176
+ retries = Retry(**retry_policy)
177
+ logger.debug(f"Retry policy to be set: {retry_policy}")
178
+ timeout = catalog_config.get("connection", {}).get(
179
+ "timeout", DEFAULT_REST_TIMEOUT
180
+ )
181
+ logger.debug(f"Timeout to be set: {timeout}")
182
+ catalog._session.mount(
183
+ "http://", TimeoutHTTPAdapter(timeout=timeout, max_retries=retries)
184
+ )
185
+ catalog._session.mount(
186
+ "https://", TimeoutHTTPAdapter(timeout=timeout, max_retries=retries)
187
+ )
188
+ return catalog
150
189
 
151
190
 
152
191
  class TopTableTimings:
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  import pathlib
3
+ import re
3
4
  import time
4
5
  from dataclasses import dataclass, field
5
6
  from typing import Any, Dict, Iterable, List, Optional, TypeVar, Union
@@ -118,17 +119,58 @@ class BusinessGlossaryConfig(DefaultConfig):
118
119
  return v
119
120
 
120
121
 
122
+ def clean_url(text: str) -> str:
123
+ """
124
+ Clean text for use in URLs by:
125
+ 1. Replacing spaces with hyphens
126
+ 2. Removing special characters (preserving hyphens and periods)
127
+ 3. Collapsing multiple hyphens and periods into single ones
128
+ """
129
+ # Replace spaces with hyphens
130
+ text = text.replace(" ", "-")
131
+ # Remove special characters except hyphens and periods
132
+ text = re.sub(r"[^a-zA-Z0-9\-.]", "", text)
133
+ # Collapse multiple hyphens into one
134
+ text = re.sub(r"-+", "-", text)
135
+ # Collapse multiple periods into one
136
+ text = re.sub(r"\.+", ".", text)
137
+ # Remove leading/trailing hyphens and periods
138
+ text = text.strip("-.")
139
+ return text
140
+
141
+
121
142
  def create_id(path: List[str], default_id: Optional[str], enable_auto_id: bool) -> str:
143
+ """
144
+ Create an ID for a glossary node or term.
145
+
146
+ Args:
147
+ path: List of path components leading to this node/term
148
+ default_id: Optional manually specified ID
149
+ enable_auto_id: Whether to generate GUIDs
150
+ """
122
151
  if default_id is not None:
123
- return default_id # No need to create id from path as default_id is provided
152
+ return default_id # Use explicitly provided ID
124
153
 
125
154
  id_: str = ".".join(path)
126
155
 
127
- if UrnEncoder.contains_extended_reserved_char(id_):
128
- enable_auto_id = True
156
+ # Check for non-ASCII characters before cleaning
157
+ if any(ord(c) > 127 for c in id_):
158
+ return datahub_guid({"path": id_})
129
159
 
130
160
  if enable_auto_id:
161
+ # Generate GUID for auto_id mode
131
162
  id_ = datahub_guid({"path": id_})
163
+ else:
164
+ # Clean the URL for better readability when not using auto_id
165
+ id_ = clean_url(id_)
166
+
167
+ # Force auto_id if the cleaned URL still contains problematic characters
168
+ if UrnEncoder.contains_extended_reserved_char(id_):
169
+ logger.warning(
170
+ f"ID '{id_}' contains problematic characters after URL cleaning. Falling back to GUID generation for stability."
171
+ )
172
+ id_ = datahub_guid({"path": id_})
173
+
132
174
  return id_
133
175
 
134
176
 
@@ -128,6 +128,10 @@ class RedshiftConfig(
128
128
  default=True,
129
129
  description="Whether lineage should be collected from copy commands",
130
130
  )
131
+ include_share_lineage: bool = Field(
132
+ default=True,
133
+ description="Whether lineage should be collected from datashares",
134
+ )
131
135
 
132
136
  include_usage_statistics: bool = Field(
133
137
  default=False,