acryl-datahub 1.2.0.2rc3__py3-none-any.whl → 1.2.0.3rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,7 +1,7 @@
1
- acryl_datahub-1.2.0.2rc3.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.2.0.3rc2.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=cyLeAlWzIql-O65xt-MALgl-0jJ-qmzJ-gFq-t6FkdE,323
4
+ datahub/_version.py,sha256=68k6koV9OpoFsqms-Y85vFl7s7-exthVNAueW6OVqNk,323
5
5
  datahub/entrypoints.py,sha256=9Qf-37rNnTzbGlx8S75OCDazIclFp6zWNcCEL1zCZto,9015
6
6
  datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -132,7 +132,7 @@ datahub/emitter/mcp_builder.py,sha256=8IwJAlolQkPpMqQJPLtGrsUqAcuFNs98nrI5iYUxga
132
132
  datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
133
133
  datahub/emitter/request_helper.py,sha256=2Sij9VJqgA7xZI6I7IuxsA8ioakbz0FJ3gvazxU_z3M,5738
134
134
  datahub/emitter/response_helper.py,sha256=qGm45n43CepW7j6kP9wTXuP-U-SZnn7hQdJTdVaoqhQ,7504
135
- datahub/emitter/rest_emitter.py,sha256=lMqjtDyPOArIrNgL47kq1cbB4xiR17CHfRRxpGYriDY,38793
135
+ datahub/emitter/rest_emitter.py,sha256=za2b8C0f8Mpo8E7DVh3jNENYlNMTV0nwdC3FLm8n2DQ,39532
136
136
  datahub/emitter/serialization_helper.py,sha256=q12Avmf70Vy4ttQGMJoTKlE5EsybMKNg2w3MQeZiHvk,3652
137
137
  datahub/emitter/sql_parsing_builder.py,sha256=Cr5imZrm3dYDSCACt5MFscgHCtVbHTD6IjUmsvsKoEs,11991
138
138
  datahub/emitter/synchronized_file_emitter.py,sha256=s4ATuxalI4GDAkrZTaGSegxBdvvNPZ9jRSdtElU0kNs,1805
@@ -181,7 +181,7 @@ datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGo
181
181
  datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
182
182
  datahub/ingestion/glossary/datahub_classifier.py,sha256=O7wm6gQT1Jf2QSKdWjJQbS5oSzJwplXzfza26Gdq5Mg,7555
183
183
  datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
184
- datahub/ingestion/graph/client.py,sha256=b23UCgszAFcW_UjNBEvY5c3oqMNGifz2445vl2IkKyo,74224
184
+ datahub/ingestion/graph/client.py,sha256=xUURT6KxwOhwuAbUznxrOzmGuXxHI-3MmDgJQHFpaGk,74671
185
185
  datahub/ingestion/graph/config.py,sha256=rmkcqAL8fJoY9QyAeS0Xm8HvwHzV3pCjY-Om-50JJTI,1015
186
186
  datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
187
187
  datahub/ingestion/graph/entity_versioning.py,sha256=nrcNz0Qm6kpE6oTu_mrYUQDx14KPspBTc6R9SyFUY6c,6901
@@ -201,7 +201,7 @@ datahub/ingestion/sink/blackhole.py,sha256=-jYcWo4i8q7312bCIoHrGr7nT9JdPvA7c4jvS
201
201
  datahub/ingestion/sink/console.py,sha256=TZfhA0Ec2eNCrMH7RRy2JOdUE-U-hkoIQrPm1CmKLQs,591
202
202
  datahub/ingestion/sink/datahub_kafka.py,sha256=_cjuXu5I6G0zJ2UK7hMbaKjMPZXeIwRMgm7CVeTiNtc,2578
203
203
  datahub/ingestion/sink/datahub_lite.py,sha256=7u2aWm7ENLshKHl-PkjJg6Mrw4bWs8sTfKIBz4mm8Ak,1879
204
- datahub/ingestion/sink/datahub_rest.py,sha256=DOhtTHqKpmqgI3rUY9ri2QZAyXYDFINWMG6ne7VYUXI,13463
204
+ datahub/ingestion/sink/datahub_rest.py,sha256=QrtR-hJ6yljN1quXcjoUHdAmJueZclrFZFrhU7c4YJM,13563
205
205
  datahub/ingestion/sink/file.py,sha256=SxXJPJpkIGoaqRjCcSmj2ZE3xE4rLlBABBGwpTj5LWI,3271
206
206
  datahub/ingestion/sink/sink_registry.py,sha256=JRBWx8qEYg0ubSTyhqwgSWctgxwyp6fva9GoN2LwBao,490
207
207
  datahub/ingestion/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -227,7 +227,7 @@ datahub/ingestion/source/pulsar.py,sha256=u5F8QnCLJsht5-7XCiUTsnfhCPIpKVB_l32CgM
227
227
  datahub/ingestion/source/redash.py,sha256=YxjSad-X_wPmxYH8dJmFz_VCFhiLTCTSlK99WdvcYiA,30653
228
228
  datahub/ingestion/source/salesforce.py,sha256=Pa_w1XszxFd8fyhpSWOfc2nOnevHwwstIvnRrQT4R9M,40584
229
229
  datahub/ingestion/source/source_registry.py,sha256=a2mLjJPLkSI-gYCTb_7U7Jo4D8jGknNQ_yScPIihXFk,1208
230
- datahub/ingestion/source/sql_queries.py,sha256=1SdEZGAmojfWbU1xKbezH6uqwRd2G0rgpK_Sh9MRj1U,11791
230
+ datahub/ingestion/source/sql_queries.py,sha256=9ICUC6tpXpxvtwfK-9lytJzFcLe8MrNlciwy9DIFM-4,13764
231
231
  datahub/ingestion/source/superset.py,sha256=oi7F2jlvkVr9ItJ_r1Jm4bYfXHYu4vPAFPMPaGJKB84,50608
232
232
  datahub/ingestion/source/abs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
233
233
  datahub/ingestion/source/abs/config.py,sha256=mBQe0JTaP-Rcv4HnMUUySoYbSr4r3jDEMioxaXHnxXU,6709
@@ -301,7 +301,7 @@ datahub/ingestion/source/datahub/report.py,sha256=VHBfCbwFRzdLdB7hQG9ST4EiZxl_vB
301
301
  datahub/ingestion/source/datahub/state.py,sha256=PZoT7sSK1wadVf5vN6phrgr7I6LL7ePP-EJjP1OO0bQ,3507
302
302
  datahub/ingestion/source/dbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
303
303
  datahub/ingestion/source/dbt/dbt_cloud.py,sha256=4gWOFSX0YU8EAJgO4J47NBE4QbNtJ-5nUe66vry-oGc,18160
304
- datahub/ingestion/source/dbt/dbt_common.py,sha256=ByCqzjkToXgfhOyxxc6VEuD8BZbYbPsD5yrLRMMPUcI,85640
304
+ datahub/ingestion/source/dbt/dbt_common.py,sha256=3NcCYsJSDfC5j7ajC_Mr3MnA_sF9DTq1ka6ft3b0u6A,85997
305
305
  datahub/ingestion/source/dbt/dbt_core.py,sha256=WVI2ZYXOMxgFzJnJqsqmEGS-5xdfiVIDsCb78lvSeQ0,24930
306
306
  datahub/ingestion/source/dbt/dbt_tests.py,sha256=pOZJaP4VsbaE5j4qVlE_E3ifno_KQpidfGTvOi5fr6I,9839
307
307
  datahub/ingestion/source/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -356,7 +356,7 @@ datahub/ingestion/source/hex/constants.py,sha256=8hUTMWyG5keTNfXoLu_Dh413Hw_mGGJ
356
356
  datahub/ingestion/source/hex/hex.py,sha256=tUYNcvwKVoQuRWv4KhcDnMeOpICh4JwhD8oF988Tjg4,13199
357
357
  datahub/ingestion/source/hex/mapper.py,sha256=N3mTlEcrOmhv9ia1dnHGFgFJD2ddyTtU3H5IUbb-UxU,13344
358
358
  datahub/ingestion/source/hex/model.py,sha256=S9bUhfFcjzuio2dBS6HzSyRVPiSJvRvMQ0qyVrjV5-E,1766
359
- datahub/ingestion/source/hex/query_fetcher.py,sha256=0VqDfviyfR14gUHvIBovCXEqwW4ftFehPSB2VzaYk14,13312
359
+ datahub/ingestion/source/hex/query_fetcher.py,sha256=r9UvF_qwswkRlNY7AI8p46eqAYSxVtjVE2e7eO4XagA,13384
360
360
  datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
361
361
  datahub/ingestion/source/iceberg/iceberg.py,sha256=BNDGooK9cmqpOjzkV1u4rpsduVPNWg_97Uca6aLurNU,35431
362
362
  datahub/ingestion/source/iceberg/iceberg_common.py,sha256=CD_yHQ_wEgivyLQUTRO9BZJB29S7j5fUVllki-BPwUU,12292
@@ -499,7 +499,7 @@ datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=asZW8DztIB1TcGzOoZFmK6
499
499
  datahub/ingestion/source/snowflake/stored_proc_lineage.py,sha256=rOb78iHiWiK8v8WdVs1xDwVut4Y0OHmszej6IopQfCo,5341
500
500
  datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
501
501
  datahub/ingestion/source/sql/athena.py,sha256=TPKwL9oRiZlVnqIsOSBWUEwyvoW-1ssXvY4PfjxOR6g,28175
502
- datahub/ingestion/source/sql/athena_properties_extractor.py,sha256=n2SvqeUbNWxiWWdkDs8VYlUPlLwfZzZy9AIa-V4D7AY,28531
502
+ datahub/ingestion/source/sql/athena_properties_extractor.py,sha256=OS2E2HD7xTn0MBy__pIvjKXMfGp02Zf93hQRAPMXE_Y,28533
503
503
  datahub/ingestion/source/sql/clickhouse.py,sha256=zd5qE6XPw0AXtY_71-n0yz4ua69xP3oxMuIoabAuT3Q,25987
504
504
  datahub/ingestion/source/sql/cockroachdb.py,sha256=WoOKCq7YjsuzSPm1SmKIYZ9CrvlSF8zWmP1fNHn4G3Q,1360
505
505
  datahub/ingestion/source/sql/druid.py,sha256=_tzgTa5jhPUXk6WCmS7p10feCwJm6yUFcOgMZA-OcE8,2922
@@ -524,7 +524,7 @@ datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py,sha256=KOpbmDIE2h1hyYEsbVH
524
524
  datahub/ingestion/source/sql/teradata.py,sha256=xL_c_UEM_JT-xoMw5Nb5UvSNBUfTGol5CpOkgK5Bsjk,65412
525
525
  datahub/ingestion/source/sql/trino.py,sha256=zIfQ6GvW8Sbw4sxqsTcnibT51STka_nzNYvmld6HfHw,18947
526
526
  datahub/ingestion/source/sql/two_tier_sql_source.py,sha256=AB3Gtx4omAy_08zadHQpmUGmIGufkZ6o_ihWNnfvzYc,5783
527
- datahub/ingestion/source/sql/vertica.py,sha256=MeohL8j68ISES1RhrBXLQlkT_YqgT-AvHRxuVCJSMbE,33458
527
+ datahub/ingestion/source/sql/vertica.py,sha256=blnu1-H7vnSQD3ZD5QTotoQ2DQJWJeR0uxz_clxiPGo,33518
528
528
  datahub/ingestion/source/sql/mssql/__init__.py,sha256=1agpl8S_uDW40olkhCX_W19dbr5GO9qgjS3R7pLRZSk,87
529
529
  datahub/ingestion/source/sql/mssql/job_models.py,sha256=nAo3rciu-w2-dXCz6_ekDEbGMEjCMEfh8WvSfXoF2l0,9359
530
530
  datahub/ingestion/source/sql/mssql/source.py,sha256=Uise_u6yXKU__9B_U3D3yObWNIVDzrz2AgEDZOlk6bQ,43101
@@ -558,11 +558,11 @@ datahub/ingestion/source/unity/config.py,sha256=7QosoBthg9kirHfXev_vhefkobUxYnp1
558
558
  datahub/ingestion/source/unity/connection_test.py,sha256=B143Wb28fS0V4GhygU9hzKqiArWBjsQO54IUCPf23dc,2586
559
559
  datahub/ingestion/source/unity/ge_profiler.py,sha256=NBRHZceq-f95iUn7u0h7cgcd9nAc48Aa-lmp_BqE0As,8409
560
560
  datahub/ingestion/source/unity/hive_metastore_proxy.py,sha256=IAWWJjaW0si_UF52Se2D7wmdYRY_afUG4QlVmQu6xaw,15351
561
- datahub/ingestion/source/unity/proxy.py,sha256=jfQ1N8Xrp08zeYN2j74YTweusygXtK4Q-5_FBbwCVTE,22803
561
+ datahub/ingestion/source/unity/proxy.py,sha256=iZ2ftKOXkxpFr0_2bEYEm31ci9OZJWFYgna3DNLCXrQ,26706
562
562
  datahub/ingestion/source/unity/proxy_profiling.py,sha256=WLqvYP6MziaisA4LYL4T_GA-kPt6Xdde7bfaYsjYw40,9663
563
563
  datahub/ingestion/source/unity/proxy_types.py,sha256=qrvHiwPzl5cPX-KRvcIGGeJVdr0I8XUQmoAI6ErZ-v8,9371
564
564
  datahub/ingestion/source/unity/report.py,sha256=XFT9oQfvEB4RkTvWGgFOoQuLPUN_AIoPXZ79xeDhGHQ,2831
565
- datahub/ingestion/source/unity/source.py,sha256=47sWCYb3pd21RjIqs9NH4h6VMtF_YMvqS9-6cegGi1w,49980
565
+ datahub/ingestion/source/unity/source.py,sha256=udK1_WI7RO2Uzx9xis0mx264nHXDiMeP4Kah3-CFXis,49832
566
566
  datahub/ingestion/source/unity/tag_entities.py,sha256=iWl6nRAWSye1hoFDx_Xh4aT53PN0sGzlX7n1-oTVUv8,11568
567
567
  datahub/ingestion/source/unity/usage.py,sha256=0wETBAaZvHI_EGgBlxX3bKsVHEAdnUV8_bKI_lbyWjY,11500
568
568
  datahub/ingestion/source/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -627,7 +627,7 @@ datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1
627
627
  datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
628
628
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
629
629
  datahub/metadata/_internal_schema_classes.py,sha256=Zh2volhvkUCENRavXDwPsmwfRe62k8_O6f5QT8_bh-g,1051205
630
- datahub/metadata/schema.avsc,sha256=4X6Jx5TFcOGY8Qxdm-FSgbGkzG2wND992brsxwgumSU,737966
630
+ datahub/metadata/schema.avsc,sha256=u5iUlz9AnFfJijjJ9xcZx3MoiBfxWSmhr7pZIOg0tMo,738363
631
631
  datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
632
632
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
633
633
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
@@ -781,7 +781,7 @@ datahub/metadata/schemas/DataHubUpgradeResult.avsc,sha256=VydVb4yqjIviR73-T6TooF
781
781
  datahub/metadata/schemas/DataHubViewInfo.avsc,sha256=U3fBIoG9ietLUpOknfQGNekqBdPQYwvhhv9RQv6gEeg,11642
782
782
  datahub/metadata/schemas/DataHubViewKey.avsc,sha256=p53axIdSVbubo3r23Vpsed7NqRcQBMGveVikEHAVAok,424
783
783
  datahub/metadata/schemas/DataJobInfo.avsc,sha256=Bc9qdDcXI0GQdEgNTpgHaBbnrppDKQ-1xR26diOSVIQ,7488
784
- datahub/metadata/schemas/DataJobInputOutput.avsc,sha256=H1O8eAzZV34tvULdu67iBSWkdn08rt7wS208b8Nisbk,15268
784
+ datahub/metadata/schemas/DataJobInputOutput.avsc,sha256=BYKImZ8kQQHqWbSBMKXWD0tGi96yzUt8zJFW3_twVVM,15575
785
785
  datahub/metadata/schemas/DataJobKey.avsc,sha256=S7egH8jWjKW52MG6Pg7plDoP15XfTTiMde5V6nR6ycE,1624
786
786
  datahub/metadata/schemas/DataPlatformInfo.avsc,sha256=WGPFumBNHbR75vsLrivnRCbBc8vSCuxDw2UlylMieh4,2686
787
787
  datahub/metadata/schemas/DataPlatformInstance.avsc,sha256=SNd3v_YyyLaDflv8Rd5cQR9GrVuky_cDTkYM6FqJiM8,1058
@@ -884,7 +884,7 @@ datahub/metadata/schemas/MLModelProperties.avsc,sha256=hDCBHxGe-cmCBeU1k0ANuQlKj
884
884
  datahub/metadata/schemas/MLPrimaryKeyKey.avsc,sha256=F3lgpMnHBhXsqGncHE9x06P-0RiNCrzbUUWlMkPJxFI,1132
885
885
  datahub/metadata/schemas/MLPrimaryKeyProperties.avsc,sha256=URIuOpS93RVk8MZVcbZ-dmTwu_cN3KSOKxSR8fm-eTo,6744
886
886
  datahub/metadata/schemas/MLTrainingRunProperties.avsc,sha256=WGgj0MuQrGD4UgvyHCJHzTnHja2LlJTOr1gLu8SySj0,4269
887
- datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=l3tVuQces7sKrwWsaIJrn3nMRUiCl3MHqCJJHcw7Ylc,377705
887
+ datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=oNK0N8WrBsM_AoZkdYAMJQlhYzbao_QWaAMOjqEvPBw,378228
888
888
  datahub/metadata/schemas/MetadataChangeLog.avsc,sha256=soCmgrcEBE5yS-mQIm-RIefhb74ONj9Fqayxa0-59KE,13254
889
889
  datahub/metadata/schemas/MetadataChangeProposal.avsc,sha256=pT14vUmpj7VJ8hinQ0pcCUtRKx6RAGHWh1eJixkqaE8,12647
890
890
  datahub/metadata/schemas/Metrics.avsc,sha256=O7DJGjOwmHbb1x_Zj7AuM_HaHKjBvkfJKfUsX8icXD4,690
@@ -933,7 +933,7 @@ datahub/metadata/schemas/TestInfo.avsc,sha256=rye90gdY_lxZt_1gpa_Xum923CJgDU6i_e
933
933
  datahub/metadata/schemas/TestKey.avsc,sha256=eL-S4Z8EuN1JEXV1t4fy3LwmdA2dJURasFcKygP2rLY,421
934
934
  datahub/metadata/schemas/TestResults.avsc,sha256=uspC95AzRvz2_AgHVb5-fxELm5u8NmBTaFVJvGunmh0,5178
935
935
  datahub/metadata/schemas/TrainingData.avsc,sha256=7p7sFBA_UyV5IbNU5qLgS3vVu70yevKCfJKSGmTzVTg,2069
936
- datahub/metadata/schemas/UpstreamLineage.avsc,sha256=iaeFRbL2aVSYFwj-HQHyfIVaHRrK3kLbkkLXgIfJTsk,10639
936
+ datahub/metadata/schemas/UpstreamLineage.avsc,sha256=dtpI7KUv9kYyGZmIlKfR2zLwgqsHO5P20egvIeup1EU,11000
937
937
  datahub/metadata/schemas/UsageAggregation.avsc,sha256=QaF6lyWGUq8IlRel2h4qIXOXCMxBhrwjoaUELsd-I6g,4538
938
938
  datahub/metadata/schemas/VersionInfo.avsc,sha256=9gMcZ8tjuhgcZiq2gOAp_EOV9q9jvuOgfph6m6v_X7c,1189
939
939
  datahub/metadata/schemas/VersionProperties.avsc,sha256=ME8V01JzG8lEsLXgYWnSYCehmpPcvv1UbE5Y8-8Ys9k,8022
@@ -1101,8 +1101,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1101
1101
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1102
1102
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1103
1103
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1104
- acryl_datahub-1.2.0.2rc3.dist-info/METADATA,sha256=ixYC_JxEXERi_Ik-6tXxNn5W0EMN_poTxliZ99bCS6o,182014
1105
- acryl_datahub-1.2.0.2rc3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1106
- acryl_datahub-1.2.0.2rc3.dist-info/entry_points.txt,sha256=bnGf6eX9UhiW8yVHtt6MJCVcmLErvrVQxTJAayA-PKc,9885
1107
- acryl_datahub-1.2.0.2rc3.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1108
- acryl_datahub-1.2.0.2rc3.dist-info/RECORD,,
1104
+ acryl_datahub-1.2.0.3rc2.dist-info/METADATA,sha256=sUemCtB9B2qW0ADg8AmidAEwuqUwCjVp-qYmFaX-dPc,182014
1105
+ acryl_datahub-1.2.0.3rc2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1106
+ acryl_datahub-1.2.0.3rc2.dist-info/entry_points.txt,sha256=bnGf6eX9UhiW8yVHtt6MJCVcmLErvrVQxTJAayA-PKc,9885
1107
+ acryl_datahub-1.2.0.3rc2.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1108
+ acryl_datahub-1.2.0.3rc2.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.2.0.2rc3"
3
+ __version__ = "1.2.0.3rc2"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -95,7 +95,7 @@ TRACE_INITIAL_BACKOFF = 1.0 # Start with 1 second
95
95
  TRACE_MAX_BACKOFF = 300.0 # Cap at 5 minutes
96
96
  TRACE_BACKOFF_FACTOR = 2.0 # Double the wait time each attempt
97
97
 
98
- # The limit is 16mb. We will use a max of 15mb to have some space
98
+ # The limit is 16,000,000 bytes. We will use a max of 15mb to have some space
99
99
  # for overhead like request headers.
100
100
  # This applies to pretty much all calls to GMS.
101
101
  INGEST_MAX_PAYLOAD_BYTES = int(
@@ -586,6 +586,11 @@ class DataHubRestEmitter(Closeable, Emitter):
586
586
  "systemMetadata": system_metadata_obj,
587
587
  }
588
588
  payload = json.dumps(snapshot)
589
+ if len(payload) > INGEST_MAX_PAYLOAD_BYTES:
590
+ logger.warning(
591
+ f"MCE object has size {len(payload)} that exceeds the max payload size of {INGEST_MAX_PAYLOAD_BYTES}, "
592
+ "so this metadata will likely fail to be emitted."
593
+ )
589
594
 
590
595
  self._emit_generic(url, payload)
591
596
 
@@ -764,16 +769,24 @@ class DataHubRestEmitter(Closeable, Emitter):
764
769
  url = f"{self._gms_server}/aspects?action=ingestProposalBatch"
765
770
 
766
771
  mcp_objs = [pre_json_transform(mcp.to_obj()) for mcp in mcps]
772
+ if len(mcp_objs) == 0:
773
+ return 0
767
774
 
768
775
  # As a safety mechanism, we need to make sure we don't exceed the max payload size for GMS.
769
776
  # If we will exceed the limit, we need to break it up into chunks.
770
- mcp_obj_chunks: List[List[str]] = []
771
- current_chunk_size = INGEST_MAX_PAYLOAD_BYTES
777
+ mcp_obj_chunks: List[List[str]] = [[]]
778
+ current_chunk_size = 0
772
779
  for mcp_obj in mcp_objs:
780
+ mcp_identifier = f"{mcp_obj.get('entityUrn')}-{mcp_obj.get('aspectName')}"
773
781
  mcp_obj_size = len(json.dumps(mcp_obj))
774
782
  if _DATAHUB_EMITTER_TRACE:
775
783
  logger.debug(
776
- f"Iterating through object with size {mcp_obj_size} (type: {mcp_obj.get('aspectName')}"
784
+ f"Iterating through object ({mcp_identifier}) with size {mcp_obj_size}"
785
+ )
786
+ if mcp_obj_size > INGEST_MAX_PAYLOAD_BYTES:
787
+ logger.warning(
788
+ f"MCP object {mcp_identifier} has size {mcp_obj_size} that exceeds the max payload size of {INGEST_MAX_PAYLOAD_BYTES}, "
789
+ "so this metadata will likely fail to be emitted."
777
790
  )
778
791
 
779
792
  if (
@@ -786,7 +799,7 @@ class DataHubRestEmitter(Closeable, Emitter):
786
799
  current_chunk_size = 0
787
800
  mcp_obj_chunks[-1].append(mcp_obj)
788
801
  current_chunk_size += mcp_obj_size
789
- if len(mcp_obj_chunks) > 0:
802
+ if len(mcp_obj_chunks) > 1 or _DATAHUB_EMITTER_TRACE:
790
803
  logger.debug(
791
804
  f"Decided to send {len(mcps)} MCP batch in {len(mcp_obj_chunks)} chunks"
792
805
  )
@@ -76,7 +76,15 @@ from datahub.metadata.schema_classes import (
76
76
  SystemMetadataClass,
77
77
  TelemetryClientIdClass,
78
78
  )
79
- from datahub.metadata.urns import CorpUserUrn, Urn
79
+ from datahub.metadata.urns import (
80
+ CorpUserUrn,
81
+ MlFeatureTableUrn,
82
+ MlFeatureUrn,
83
+ MlModelGroupUrn,
84
+ MlModelUrn,
85
+ MlPrimaryKeyUrn,
86
+ Urn,
87
+ )
80
88
  from datahub.telemetry.telemetry import telemetry_instance
81
89
  from datahub.utilities.perf_timer import PerfTimer
82
90
  from datahub.utilities.str_enum import StrEnum
@@ -118,8 +126,16 @@ def entity_type_to_graphql(entity_type: str) -> str:
118
126
  """Convert the entity types into GraphQL "EntityType" enum values."""
119
127
 
120
128
  # Hard-coded special cases.
121
- if entity_type == CorpUserUrn.ENTITY_TYPE:
122
- return "CORP_USER"
129
+ special_cases = {
130
+ CorpUserUrn.ENTITY_TYPE: "CORP_USER",
131
+ MlModelUrn.ENTITY_TYPE: "MLMODEL",
132
+ MlModelGroupUrn.ENTITY_TYPE: "MLMODEL_GROUP",
133
+ MlFeatureTableUrn.ENTITY_TYPE: "MLFEATURE_TABLE",
134
+ MlFeatureUrn.ENTITY_TYPE: "MLFEATURE",
135
+ MlPrimaryKeyUrn.ENTITY_TYPE: "MLPRIMARY_KEY",
136
+ }
137
+ if entity_type in special_cases:
138
+ return special_cases[entity_type]
123
139
 
124
140
  # Convert camelCase to UPPER_UNDERSCORE.
125
141
  entity_type = (
@@ -92,6 +92,7 @@ class DatahubRestSinkConfig(DatahubClientConfig):
92
92
  @dataclasses.dataclass
93
93
  class DataHubRestSinkReport(SinkReport):
94
94
  mode: Optional[RestSinkMode] = None
95
+ endpoint: Optional[RestSinkEndpoint] = None
95
96
  max_threads: Optional[int] = None
96
97
  gms_version: Optional[str] = None
97
98
  pending_requests: int = 0
@@ -142,6 +143,7 @@ class DatahubRestSink(Sink[DatahubRestSinkConfig, DataHubRestSinkReport]):
142
143
 
143
144
  self.report.gms_version = gms_config.service_version
144
145
  self.report.mode = self.config.mode
146
+ self.report.endpoint = self.config.endpoint
145
147
  self.report.max_threads = self.config.max_threads
146
148
  logger.debug("Setting env variables to override config")
147
149
  logger.debug("Setting gms config")
@@ -120,6 +120,7 @@ logger = logging.getLogger(__name__)
120
120
  DBT_PLATFORM = "dbt"
121
121
 
122
122
  _DEFAULT_ACTOR = mce_builder.make_user_urn("unknown")
123
+ _DBT_MAX_COMPILED_CODE_LENGTH = 1 * 1024 * 1024 # 1MB
123
124
 
124
125
 
125
126
  @dataclass
@@ -1684,6 +1685,12 @@ class DBTSourceBase(StatefulIngestionSourceBase):
1684
1685
  def get_external_url(self, node: DBTNode) -> Optional[str]:
1685
1686
  pass
1686
1687
 
1688
+ @staticmethod
1689
+ def _truncate_code(code: str, max_length: int) -> str:
1690
+ if len(code) > max_length:
1691
+ return code[:max_length] + "..."
1692
+ return code
1693
+
1687
1694
  def _create_view_properties_aspect(
1688
1695
  self, node: DBTNode
1689
1696
  ) -> Optional[ViewPropertiesClass]:
@@ -1695,6 +1702,9 @@ class DBTSourceBase(StatefulIngestionSourceBase):
1695
1702
  compiled_code = try_format_query(
1696
1703
  node.compiled_code, platform=self.config.target_platform
1697
1704
  )
1705
+ compiled_code = self._truncate_code(
1706
+ compiled_code, _DBT_MAX_COMPILED_CODE_LENGTH
1707
+ )
1698
1708
 
1699
1709
  materialized = node.materialization in {"table", "incremental", "snapshot"}
1700
1710
  view_properties = ViewPropertiesClass(
@@ -97,7 +97,7 @@ class HexQueryFetcher:
97
97
  if not query_urns or not entities_by_urn:
98
98
  self.report.warning(
99
99
  title="No Queries found with Hex as origin",
100
- message="No lineage because of no Queries found with Hex as origin in the given time range; you may consider extending the time range to fetch more queries.",
100
+ message="No lineage because of no Queries found with Hex as origin in the given time range. You may need to set use_queries_v2: true on your warehouse ingestion or you may consider extending the time range to fetch more queries.",
101
101
  context=str(
102
102
  dict(
103
103
  workspace_name=self.workspace_name,
@@ -99,10 +99,10 @@ class AthenaPropertiesExtractor:
99
99
  """A class to extract properties from Athena CREATE TABLE statements."""
100
100
 
101
101
  CREATE_TABLE_REGEXP = re.compile(
102
- "(CREATE TABLE[\s\n]*)(.*?)(\s*\()", re.MULTILINE | re.IGNORECASE
102
+ r"(CREATE TABLE[\s\n]*)(.*?)(\s*\()", re.MULTILINE | re.IGNORECASE
103
103
  )
104
104
  PARTITIONED_BY_REGEXP = re.compile(
105
- "(PARTITIONED BY[\s\n]*\()((?:[^()]|\([^)]*\))*?)(\))",
105
+ r"(PARTITIONED BY[\s\n]*\()((?:[^()]|\([^)]*\))*?)(\))",
106
106
  re.MULTILINE | re.IGNORECASE,
107
107
  )
108
108
 
@@ -4,6 +4,7 @@ from dataclasses import dataclass
4
4
  from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Set, Tuple, Union
5
5
 
6
6
  import pydantic
7
+ import pytest
7
8
  from pydantic.class_validators import validator
8
9
  from vertica_sqlalchemy_dialect.base import VerticaInspector
9
10
 
@@ -55,6 +56,8 @@ from datahub.utilities import config_clean
55
56
 
56
57
  if TYPE_CHECKING:
57
58
  from datahub.ingestion.source.ge_data_profiler import GEProfilerRequest
59
+
60
+ pytestmark = pytest.mark.integration_batch_4
58
61
  logger: logging.Logger = logging.getLogger(__name__)
59
62
 
60
63
 
@@ -2,12 +2,13 @@ import json
2
2
  import logging
3
3
  import os
4
4
  from dataclasses import dataclass
5
- from datetime import datetime, timezone
5
+ from datetime import datetime
6
6
  from functools import partial
7
- from typing import Iterable, List, Optional, Union
7
+ from typing import ClassVar, Iterable, List, Optional, Union
8
8
 
9
- from pydantic import Field
9
+ from pydantic import BaseModel, Field, validator
10
10
 
11
+ from datahub.configuration.datetimes import parse_user_datetime
11
12
  from datahub.configuration.source_common import (
12
13
  EnvConfigMixin,
13
14
  PlatformInstanceConfigMixin,
@@ -35,7 +36,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
35
36
  from datahub.ingestion.graph.client import DataHubGraph
36
37
  from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
37
38
  from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
38
- from datahub.metadata.urns import CorpUserUrn
39
+ from datahub.metadata.urns import CorpUserUrn, DatasetUrn
39
40
  from datahub.sql_parsing.schema_resolver import SchemaResolver
40
41
  from datahub.sql_parsing.sql_parsing_aggregator import (
41
42
  KnownQueryLineageInfo,
@@ -208,19 +209,40 @@ class SqlQueriesSource(Source):
208
209
  def _add_query_to_aggregator(self, query_entry: "QueryEntry") -> None:
209
210
  """Add a query to the SQL parsing aggregator."""
210
211
  try:
211
- # If we have explicit lineage, use it directly
212
- if query_entry.upstream_tables or query_entry.downstream_tables:
212
+ # If we have both upstream and downstream tables, use explicit lineage
213
+ if query_entry.upstream_tables and query_entry.downstream_tables:
213
214
  logger.debug("Using explicit lineage from query file")
214
215
  for downstream_table in query_entry.downstream_tables:
215
216
  known_lineage = KnownQueryLineageInfo(
216
217
  query_text=query_entry.query,
217
- downstream=downstream_table,
218
- upstreams=query_entry.upstream_tables,
218
+ downstream=str(downstream_table),
219
+ upstreams=[str(urn) for urn in query_entry.upstream_tables],
219
220
  timestamp=query_entry.timestamp,
220
221
  session_id=query_entry.session_id,
221
222
  )
222
223
  self.aggregator.add_known_query_lineage(known_lineage)
223
224
  else:
225
+ # Warn if only partial lineage information is provided
226
+ # XOR: true if exactly one of upstream_tables or downstream_tables is provided
227
+ if bool(query_entry.upstream_tables) ^ bool(
228
+ query_entry.downstream_tables
229
+ ):
230
+ query_preview = (
231
+ query_entry.query[:150] + "..."
232
+ if len(query_entry.query) > 150
233
+ else query_entry.query
234
+ )
235
+ missing_upstream = (
236
+ "Missing upstream. " if not query_entry.upstream_tables else ""
237
+ )
238
+ missing_downstream = (
239
+ "Missing downstream. "
240
+ if not query_entry.downstream_tables
241
+ else ""
242
+ )
243
+ logger.info(
244
+ f"Only partial lineage information provided, falling back to SQL parsing for complete lineage detection. {missing_upstream}{missing_downstream}Query: {query_preview}"
245
+ )
224
246
  # No explicit lineage, rely on parsing
225
247
  observed_query = ObservedQuery(
226
248
  query=query_entry.query,
@@ -243,46 +265,66 @@ class SqlQueriesSource(Source):
243
265
  )
244
266
 
245
267
 
246
- @dataclass
247
- class QueryEntry:
268
+ class QueryEntry(BaseModel):
248
269
  query: str
249
- timestamp: Optional[datetime]
250
- user: Optional[CorpUserUrn]
251
- operation_type: Optional[str]
252
- downstream_tables: List[str]
253
- upstream_tables: List[str]
270
+ timestamp: Optional[datetime] = None
271
+ user: Optional[CorpUserUrn] = None
272
+ operation_type: Optional[str] = None
273
+ downstream_tables: List[DatasetUrn] = Field(default_factory=list)
274
+ upstream_tables: List[DatasetUrn] = Field(default_factory=list)
254
275
  session_id: Optional[str] = None
255
276
 
277
+ # Validation context for URN creation
278
+ _validation_context: ClassVar[Optional[SqlQueriesSourceConfig]] = None
279
+
280
+ class Config:
281
+ arbitrary_types_allowed = True
282
+
283
+ @validator("timestamp", pre=True)
284
+ def parse_timestamp(cls, v):
285
+ return None if v is None else parse_user_datetime(str(v))
286
+
287
+ @validator("user", pre=True)
288
+ def parse_user(cls, v):
289
+ if v is None:
290
+ return None
291
+
292
+ return v if isinstance(v, CorpUserUrn) else CorpUserUrn(v)
293
+
294
+ @validator("downstream_tables", "upstream_tables", pre=True)
295
+ def parse_tables(cls, v):
296
+ if not v:
297
+ return []
298
+
299
+ result = []
300
+ for item in v:
301
+ if isinstance(item, DatasetUrn):
302
+ result.append(item)
303
+ elif isinstance(item, str):
304
+ # Skip empty/whitespace-only strings
305
+ if item and item.strip():
306
+ # Convert to URN using validation context
307
+ assert cls._validation_context, (
308
+ "Validation context must be set for URN creation"
309
+ )
310
+ urn_string = make_dataset_urn_with_platform_instance(
311
+ name=item,
312
+ platform=cls._validation_context.platform,
313
+ platform_instance=cls._validation_context.platform_instance,
314
+ env=cls._validation_context.env,
315
+ )
316
+ result.append(DatasetUrn.from_string(urn_string))
317
+
318
+ return result
319
+
256
320
  @classmethod
257
321
  def create(
258
322
  cls, entry_dict: dict, *, config: SqlQueriesSourceConfig
259
323
  ) -> "QueryEntry":
260
- return cls(
261
- query=entry_dict["query"],
262
- timestamp=(
263
- datetime.fromtimestamp(entry_dict["timestamp"], tz=timezone.utc)
264
- if "timestamp" in entry_dict
265
- else None
266
- ),
267
- user=CorpUserUrn(entry_dict["user"]) if "user" in entry_dict else None,
268
- operation_type=entry_dict.get("operation_type"),
269
- downstream_tables=[
270
- make_dataset_urn_with_platform_instance(
271
- name=table,
272
- platform=config.platform,
273
- platform_instance=config.platform_instance,
274
- env=config.env,
275
- )
276
- for table in entry_dict.get("downstream_tables", [])
277
- ],
278
- upstream_tables=[
279
- make_dataset_urn_with_platform_instance(
280
- name=table,
281
- platform=config.platform,
282
- platform_instance=config.platform_instance,
283
- env=config.env,
284
- )
285
- for table in entry_dict.get("upstream_tables", [])
286
- ],
287
- session_id=entry_dict.get("session_id"),
288
- )
324
+ """Create QueryEntry from dict with config context."""
325
+ # Set validation context for URN creation
326
+ cls._validation_context = config
327
+ try:
328
+ return cls.parse_obj(entry_dict)
329
+ finally:
330
+ cls._validation_context = None