acryl-datahub 1.2.0.2rc3__py3-none-any.whl → 1.2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.2rc3.dist-info → acryl_datahub-1.2.0.3.dist-info}/METADATA +2384 -2384
- {acryl_datahub-1.2.0.2rc3.dist-info → acryl_datahub-1.2.0.3.dist-info}/RECORD +21 -21
- datahub/_version.py +1 -1
- datahub/emitter/rest_emitter.py +18 -5
- datahub/ingestion/graph/client.py +19 -3
- datahub/ingestion/sink/datahub_rest.py +2 -0
- datahub/ingestion/source/dbt/dbt_common.py +10 -0
- datahub/ingestion/source/hex/query_fetcher.py +1 -1
- datahub/ingestion/source/sql/athena_properties_extractor.py +2 -2
- datahub/ingestion/source/sql/vertica.py +3 -0
- datahub/ingestion/source/sql_queries.py +86 -44
- datahub/ingestion/source/unity/proxy.py +112 -22
- datahub/ingestion/source/unity/source.py +7 -10
- datahub/metadata/schema.avsc +9 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +9 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- {acryl_datahub-1.2.0.2rc3.dist-info → acryl_datahub-1.2.0.3.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.2rc3.dist-info → acryl_datahub-1.2.0.3.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.2.0.2rc3.dist-info → acryl_datahub-1.2.0.3.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.2rc3.dist-info → acryl_datahub-1.2.0.3.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
acryl_datahub-1.2.0.
|
|
1
|
+
acryl_datahub-1.2.0.3.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=3vbyl4pDs10qjxTeqdbfjmpJqYoyU66BOXzc2ni_rKU,320
|
|
5
5
|
datahub/entrypoints.py,sha256=9Qf-37rNnTzbGlx8S75OCDazIclFp6zWNcCEL1zCZto,9015
|
|
6
6
|
datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -132,7 +132,7 @@ datahub/emitter/mcp_builder.py,sha256=8IwJAlolQkPpMqQJPLtGrsUqAcuFNs98nrI5iYUxga
|
|
|
132
132
|
datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
|
|
133
133
|
datahub/emitter/request_helper.py,sha256=2Sij9VJqgA7xZI6I7IuxsA8ioakbz0FJ3gvazxU_z3M,5738
|
|
134
134
|
datahub/emitter/response_helper.py,sha256=qGm45n43CepW7j6kP9wTXuP-U-SZnn7hQdJTdVaoqhQ,7504
|
|
135
|
-
datahub/emitter/rest_emitter.py,sha256=
|
|
135
|
+
datahub/emitter/rest_emitter.py,sha256=za2b8C0f8Mpo8E7DVh3jNENYlNMTV0nwdC3FLm8n2DQ,39532
|
|
136
136
|
datahub/emitter/serialization_helper.py,sha256=q12Avmf70Vy4ttQGMJoTKlE5EsybMKNg2w3MQeZiHvk,3652
|
|
137
137
|
datahub/emitter/sql_parsing_builder.py,sha256=Cr5imZrm3dYDSCACt5MFscgHCtVbHTD6IjUmsvsKoEs,11991
|
|
138
138
|
datahub/emitter/synchronized_file_emitter.py,sha256=s4ATuxalI4GDAkrZTaGSegxBdvvNPZ9jRSdtElU0kNs,1805
|
|
@@ -181,7 +181,7 @@ datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGo
|
|
|
181
181
|
datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
|
|
182
182
|
datahub/ingestion/glossary/datahub_classifier.py,sha256=O7wm6gQT1Jf2QSKdWjJQbS5oSzJwplXzfza26Gdq5Mg,7555
|
|
183
183
|
datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
184
|
-
datahub/ingestion/graph/client.py,sha256=
|
|
184
|
+
datahub/ingestion/graph/client.py,sha256=xUURT6KxwOhwuAbUznxrOzmGuXxHI-3MmDgJQHFpaGk,74671
|
|
185
185
|
datahub/ingestion/graph/config.py,sha256=rmkcqAL8fJoY9QyAeS0Xm8HvwHzV3pCjY-Om-50JJTI,1015
|
|
186
186
|
datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
|
|
187
187
|
datahub/ingestion/graph/entity_versioning.py,sha256=nrcNz0Qm6kpE6oTu_mrYUQDx14KPspBTc6R9SyFUY6c,6901
|
|
@@ -201,7 +201,7 @@ datahub/ingestion/sink/blackhole.py,sha256=-jYcWo4i8q7312bCIoHrGr7nT9JdPvA7c4jvS
|
|
|
201
201
|
datahub/ingestion/sink/console.py,sha256=TZfhA0Ec2eNCrMH7RRy2JOdUE-U-hkoIQrPm1CmKLQs,591
|
|
202
202
|
datahub/ingestion/sink/datahub_kafka.py,sha256=_cjuXu5I6G0zJ2UK7hMbaKjMPZXeIwRMgm7CVeTiNtc,2578
|
|
203
203
|
datahub/ingestion/sink/datahub_lite.py,sha256=7u2aWm7ENLshKHl-PkjJg6Mrw4bWs8sTfKIBz4mm8Ak,1879
|
|
204
|
-
datahub/ingestion/sink/datahub_rest.py,sha256=
|
|
204
|
+
datahub/ingestion/sink/datahub_rest.py,sha256=QrtR-hJ6yljN1quXcjoUHdAmJueZclrFZFrhU7c4YJM,13563
|
|
205
205
|
datahub/ingestion/sink/file.py,sha256=SxXJPJpkIGoaqRjCcSmj2ZE3xE4rLlBABBGwpTj5LWI,3271
|
|
206
206
|
datahub/ingestion/sink/sink_registry.py,sha256=JRBWx8qEYg0ubSTyhqwgSWctgxwyp6fva9GoN2LwBao,490
|
|
207
207
|
datahub/ingestion/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -227,7 +227,7 @@ datahub/ingestion/source/pulsar.py,sha256=u5F8QnCLJsht5-7XCiUTsnfhCPIpKVB_l32CgM
|
|
|
227
227
|
datahub/ingestion/source/redash.py,sha256=YxjSad-X_wPmxYH8dJmFz_VCFhiLTCTSlK99WdvcYiA,30653
|
|
228
228
|
datahub/ingestion/source/salesforce.py,sha256=Pa_w1XszxFd8fyhpSWOfc2nOnevHwwstIvnRrQT4R9M,40584
|
|
229
229
|
datahub/ingestion/source/source_registry.py,sha256=a2mLjJPLkSI-gYCTb_7U7Jo4D8jGknNQ_yScPIihXFk,1208
|
|
230
|
-
datahub/ingestion/source/sql_queries.py,sha256=
|
|
230
|
+
datahub/ingestion/source/sql_queries.py,sha256=9ICUC6tpXpxvtwfK-9lytJzFcLe8MrNlciwy9DIFM-4,13764
|
|
231
231
|
datahub/ingestion/source/superset.py,sha256=oi7F2jlvkVr9ItJ_r1Jm4bYfXHYu4vPAFPMPaGJKB84,50608
|
|
232
232
|
datahub/ingestion/source/abs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
233
233
|
datahub/ingestion/source/abs/config.py,sha256=mBQe0JTaP-Rcv4HnMUUySoYbSr4r3jDEMioxaXHnxXU,6709
|
|
@@ -301,7 +301,7 @@ datahub/ingestion/source/datahub/report.py,sha256=VHBfCbwFRzdLdB7hQG9ST4EiZxl_vB
|
|
|
301
301
|
datahub/ingestion/source/datahub/state.py,sha256=PZoT7sSK1wadVf5vN6phrgr7I6LL7ePP-EJjP1OO0bQ,3507
|
|
302
302
|
datahub/ingestion/source/dbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
303
303
|
datahub/ingestion/source/dbt/dbt_cloud.py,sha256=4gWOFSX0YU8EAJgO4J47NBE4QbNtJ-5nUe66vry-oGc,18160
|
|
304
|
-
datahub/ingestion/source/dbt/dbt_common.py,sha256=
|
|
304
|
+
datahub/ingestion/source/dbt/dbt_common.py,sha256=3NcCYsJSDfC5j7ajC_Mr3MnA_sF9DTq1ka6ft3b0u6A,85997
|
|
305
305
|
datahub/ingestion/source/dbt/dbt_core.py,sha256=WVI2ZYXOMxgFzJnJqsqmEGS-5xdfiVIDsCb78lvSeQ0,24930
|
|
306
306
|
datahub/ingestion/source/dbt/dbt_tests.py,sha256=pOZJaP4VsbaE5j4qVlE_E3ifno_KQpidfGTvOi5fr6I,9839
|
|
307
307
|
datahub/ingestion/source/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -356,7 +356,7 @@ datahub/ingestion/source/hex/constants.py,sha256=8hUTMWyG5keTNfXoLu_Dh413Hw_mGGJ
|
|
|
356
356
|
datahub/ingestion/source/hex/hex.py,sha256=tUYNcvwKVoQuRWv4KhcDnMeOpICh4JwhD8oF988Tjg4,13199
|
|
357
357
|
datahub/ingestion/source/hex/mapper.py,sha256=N3mTlEcrOmhv9ia1dnHGFgFJD2ddyTtU3H5IUbb-UxU,13344
|
|
358
358
|
datahub/ingestion/source/hex/model.py,sha256=S9bUhfFcjzuio2dBS6HzSyRVPiSJvRvMQ0qyVrjV5-E,1766
|
|
359
|
-
datahub/ingestion/source/hex/query_fetcher.py,sha256=
|
|
359
|
+
datahub/ingestion/source/hex/query_fetcher.py,sha256=r9UvF_qwswkRlNY7AI8p46eqAYSxVtjVE2e7eO4XagA,13384
|
|
360
360
|
datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
361
361
|
datahub/ingestion/source/iceberg/iceberg.py,sha256=BNDGooK9cmqpOjzkV1u4rpsduVPNWg_97Uca6aLurNU,35431
|
|
362
362
|
datahub/ingestion/source/iceberg/iceberg_common.py,sha256=CD_yHQ_wEgivyLQUTRO9BZJB29S7j5fUVllki-BPwUU,12292
|
|
@@ -499,7 +499,7 @@ datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=asZW8DztIB1TcGzOoZFmK6
|
|
|
499
499
|
datahub/ingestion/source/snowflake/stored_proc_lineage.py,sha256=rOb78iHiWiK8v8WdVs1xDwVut4Y0OHmszej6IopQfCo,5341
|
|
500
500
|
datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
501
501
|
datahub/ingestion/source/sql/athena.py,sha256=TPKwL9oRiZlVnqIsOSBWUEwyvoW-1ssXvY4PfjxOR6g,28175
|
|
502
|
-
datahub/ingestion/source/sql/athena_properties_extractor.py,sha256=
|
|
502
|
+
datahub/ingestion/source/sql/athena_properties_extractor.py,sha256=OS2E2HD7xTn0MBy__pIvjKXMfGp02Zf93hQRAPMXE_Y,28533
|
|
503
503
|
datahub/ingestion/source/sql/clickhouse.py,sha256=zd5qE6XPw0AXtY_71-n0yz4ua69xP3oxMuIoabAuT3Q,25987
|
|
504
504
|
datahub/ingestion/source/sql/cockroachdb.py,sha256=WoOKCq7YjsuzSPm1SmKIYZ9CrvlSF8zWmP1fNHn4G3Q,1360
|
|
505
505
|
datahub/ingestion/source/sql/druid.py,sha256=_tzgTa5jhPUXk6WCmS7p10feCwJm6yUFcOgMZA-OcE8,2922
|
|
@@ -524,7 +524,7 @@ datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py,sha256=KOpbmDIE2h1hyYEsbVH
|
|
|
524
524
|
datahub/ingestion/source/sql/teradata.py,sha256=xL_c_UEM_JT-xoMw5Nb5UvSNBUfTGol5CpOkgK5Bsjk,65412
|
|
525
525
|
datahub/ingestion/source/sql/trino.py,sha256=zIfQ6GvW8Sbw4sxqsTcnibT51STka_nzNYvmld6HfHw,18947
|
|
526
526
|
datahub/ingestion/source/sql/two_tier_sql_source.py,sha256=AB3Gtx4omAy_08zadHQpmUGmIGufkZ6o_ihWNnfvzYc,5783
|
|
527
|
-
datahub/ingestion/source/sql/vertica.py,sha256=
|
|
527
|
+
datahub/ingestion/source/sql/vertica.py,sha256=blnu1-H7vnSQD3ZD5QTotoQ2DQJWJeR0uxz_clxiPGo,33518
|
|
528
528
|
datahub/ingestion/source/sql/mssql/__init__.py,sha256=1agpl8S_uDW40olkhCX_W19dbr5GO9qgjS3R7pLRZSk,87
|
|
529
529
|
datahub/ingestion/source/sql/mssql/job_models.py,sha256=nAo3rciu-w2-dXCz6_ekDEbGMEjCMEfh8WvSfXoF2l0,9359
|
|
530
530
|
datahub/ingestion/source/sql/mssql/source.py,sha256=Uise_u6yXKU__9B_U3D3yObWNIVDzrz2AgEDZOlk6bQ,43101
|
|
@@ -558,11 +558,11 @@ datahub/ingestion/source/unity/config.py,sha256=7QosoBthg9kirHfXev_vhefkobUxYnp1
|
|
|
558
558
|
datahub/ingestion/source/unity/connection_test.py,sha256=B143Wb28fS0V4GhygU9hzKqiArWBjsQO54IUCPf23dc,2586
|
|
559
559
|
datahub/ingestion/source/unity/ge_profiler.py,sha256=NBRHZceq-f95iUn7u0h7cgcd9nAc48Aa-lmp_BqE0As,8409
|
|
560
560
|
datahub/ingestion/source/unity/hive_metastore_proxy.py,sha256=IAWWJjaW0si_UF52Se2D7wmdYRY_afUG4QlVmQu6xaw,15351
|
|
561
|
-
datahub/ingestion/source/unity/proxy.py,sha256=
|
|
561
|
+
datahub/ingestion/source/unity/proxy.py,sha256=iZ2ftKOXkxpFr0_2bEYEm31ci9OZJWFYgna3DNLCXrQ,26706
|
|
562
562
|
datahub/ingestion/source/unity/proxy_profiling.py,sha256=WLqvYP6MziaisA4LYL4T_GA-kPt6Xdde7bfaYsjYw40,9663
|
|
563
563
|
datahub/ingestion/source/unity/proxy_types.py,sha256=qrvHiwPzl5cPX-KRvcIGGeJVdr0I8XUQmoAI6ErZ-v8,9371
|
|
564
564
|
datahub/ingestion/source/unity/report.py,sha256=XFT9oQfvEB4RkTvWGgFOoQuLPUN_AIoPXZ79xeDhGHQ,2831
|
|
565
|
-
datahub/ingestion/source/unity/source.py,sha256=
|
|
565
|
+
datahub/ingestion/source/unity/source.py,sha256=udK1_WI7RO2Uzx9xis0mx264nHXDiMeP4Kah3-CFXis,49832
|
|
566
566
|
datahub/ingestion/source/unity/tag_entities.py,sha256=iWl6nRAWSye1hoFDx_Xh4aT53PN0sGzlX7n1-oTVUv8,11568
|
|
567
567
|
datahub/ingestion/source/unity/usage.py,sha256=0wETBAaZvHI_EGgBlxX3bKsVHEAdnUV8_bKI_lbyWjY,11500
|
|
568
568
|
datahub/ingestion/source/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -627,7 +627,7 @@ datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1
|
|
|
627
627
|
datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
|
|
628
628
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
629
629
|
datahub/metadata/_internal_schema_classes.py,sha256=Zh2volhvkUCENRavXDwPsmwfRe62k8_O6f5QT8_bh-g,1051205
|
|
630
|
-
datahub/metadata/schema.avsc,sha256=
|
|
630
|
+
datahub/metadata/schema.avsc,sha256=u5iUlz9AnFfJijjJ9xcZx3MoiBfxWSmhr7pZIOg0tMo,738363
|
|
631
631
|
datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
|
|
632
632
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
633
633
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
@@ -781,7 +781,7 @@ datahub/metadata/schemas/DataHubUpgradeResult.avsc,sha256=VydVb4yqjIviR73-T6TooF
|
|
|
781
781
|
datahub/metadata/schemas/DataHubViewInfo.avsc,sha256=U3fBIoG9ietLUpOknfQGNekqBdPQYwvhhv9RQv6gEeg,11642
|
|
782
782
|
datahub/metadata/schemas/DataHubViewKey.avsc,sha256=p53axIdSVbubo3r23Vpsed7NqRcQBMGveVikEHAVAok,424
|
|
783
783
|
datahub/metadata/schemas/DataJobInfo.avsc,sha256=Bc9qdDcXI0GQdEgNTpgHaBbnrppDKQ-1xR26diOSVIQ,7488
|
|
784
|
-
datahub/metadata/schemas/DataJobInputOutput.avsc,sha256=
|
|
784
|
+
datahub/metadata/schemas/DataJobInputOutput.avsc,sha256=BYKImZ8kQQHqWbSBMKXWD0tGi96yzUt8zJFW3_twVVM,15575
|
|
785
785
|
datahub/metadata/schemas/DataJobKey.avsc,sha256=S7egH8jWjKW52MG6Pg7plDoP15XfTTiMde5V6nR6ycE,1624
|
|
786
786
|
datahub/metadata/schemas/DataPlatformInfo.avsc,sha256=WGPFumBNHbR75vsLrivnRCbBc8vSCuxDw2UlylMieh4,2686
|
|
787
787
|
datahub/metadata/schemas/DataPlatformInstance.avsc,sha256=SNd3v_YyyLaDflv8Rd5cQR9GrVuky_cDTkYM6FqJiM8,1058
|
|
@@ -884,7 +884,7 @@ datahub/metadata/schemas/MLModelProperties.avsc,sha256=hDCBHxGe-cmCBeU1k0ANuQlKj
|
|
|
884
884
|
datahub/metadata/schemas/MLPrimaryKeyKey.avsc,sha256=F3lgpMnHBhXsqGncHE9x06P-0RiNCrzbUUWlMkPJxFI,1132
|
|
885
885
|
datahub/metadata/schemas/MLPrimaryKeyProperties.avsc,sha256=URIuOpS93RVk8MZVcbZ-dmTwu_cN3KSOKxSR8fm-eTo,6744
|
|
886
886
|
datahub/metadata/schemas/MLTrainingRunProperties.avsc,sha256=WGgj0MuQrGD4UgvyHCJHzTnHja2LlJTOr1gLu8SySj0,4269
|
|
887
|
-
datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=
|
|
887
|
+
datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=oNK0N8WrBsM_AoZkdYAMJQlhYzbao_QWaAMOjqEvPBw,378228
|
|
888
888
|
datahub/metadata/schemas/MetadataChangeLog.avsc,sha256=soCmgrcEBE5yS-mQIm-RIefhb74ONj9Fqayxa0-59KE,13254
|
|
889
889
|
datahub/metadata/schemas/MetadataChangeProposal.avsc,sha256=pT14vUmpj7VJ8hinQ0pcCUtRKx6RAGHWh1eJixkqaE8,12647
|
|
890
890
|
datahub/metadata/schemas/Metrics.avsc,sha256=O7DJGjOwmHbb1x_Zj7AuM_HaHKjBvkfJKfUsX8icXD4,690
|
|
@@ -933,7 +933,7 @@ datahub/metadata/schemas/TestInfo.avsc,sha256=rye90gdY_lxZt_1gpa_Xum923CJgDU6i_e
|
|
|
933
933
|
datahub/metadata/schemas/TestKey.avsc,sha256=eL-S4Z8EuN1JEXV1t4fy3LwmdA2dJURasFcKygP2rLY,421
|
|
934
934
|
datahub/metadata/schemas/TestResults.avsc,sha256=uspC95AzRvz2_AgHVb5-fxELm5u8NmBTaFVJvGunmh0,5178
|
|
935
935
|
datahub/metadata/schemas/TrainingData.avsc,sha256=7p7sFBA_UyV5IbNU5qLgS3vVu70yevKCfJKSGmTzVTg,2069
|
|
936
|
-
datahub/metadata/schemas/UpstreamLineage.avsc,sha256=
|
|
936
|
+
datahub/metadata/schemas/UpstreamLineage.avsc,sha256=dtpI7KUv9kYyGZmIlKfR2zLwgqsHO5P20egvIeup1EU,11000
|
|
937
937
|
datahub/metadata/schemas/UsageAggregation.avsc,sha256=QaF6lyWGUq8IlRel2h4qIXOXCMxBhrwjoaUELsd-I6g,4538
|
|
938
938
|
datahub/metadata/schemas/VersionInfo.avsc,sha256=9gMcZ8tjuhgcZiq2gOAp_EOV9q9jvuOgfph6m6v_X7c,1189
|
|
939
939
|
datahub/metadata/schemas/VersionProperties.avsc,sha256=ME8V01JzG8lEsLXgYWnSYCehmpPcvv1UbE5Y8-8Ys9k,8022
|
|
@@ -1101,8 +1101,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1101
1101
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1102
1102
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1103
1103
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1104
|
-
acryl_datahub-1.2.0.
|
|
1105
|
-
acryl_datahub-1.2.0.
|
|
1106
|
-
acryl_datahub-1.2.0.
|
|
1107
|
-
acryl_datahub-1.2.0.
|
|
1108
|
-
acryl_datahub-1.2.0.
|
|
1104
|
+
acryl_datahub-1.2.0.3.dist-info/METADATA,sha256=_FyY3RlWzBgv9L5qGdq8iSnRgFNIWwv9QdcQbMW-BEY,182005
|
|
1105
|
+
acryl_datahub-1.2.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
1106
|
+
acryl_datahub-1.2.0.3.dist-info/entry_points.txt,sha256=bnGf6eX9UhiW8yVHtt6MJCVcmLErvrVQxTJAayA-PKc,9885
|
|
1107
|
+
acryl_datahub-1.2.0.3.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1108
|
+
acryl_datahub-1.2.0.3.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
datahub/emitter/rest_emitter.py
CHANGED
|
@@ -95,7 +95,7 @@ TRACE_INITIAL_BACKOFF = 1.0 # Start with 1 second
|
|
|
95
95
|
TRACE_MAX_BACKOFF = 300.0 # Cap at 5 minutes
|
|
96
96
|
TRACE_BACKOFF_FACTOR = 2.0 # Double the wait time each attempt
|
|
97
97
|
|
|
98
|
-
# The limit is
|
|
98
|
+
# The limit is 16,000,000 bytes. We will use a max of 15mb to have some space
|
|
99
99
|
# for overhead like request headers.
|
|
100
100
|
# This applies to pretty much all calls to GMS.
|
|
101
101
|
INGEST_MAX_PAYLOAD_BYTES = int(
|
|
@@ -586,6 +586,11 @@ class DataHubRestEmitter(Closeable, Emitter):
|
|
|
586
586
|
"systemMetadata": system_metadata_obj,
|
|
587
587
|
}
|
|
588
588
|
payload = json.dumps(snapshot)
|
|
589
|
+
if len(payload) > INGEST_MAX_PAYLOAD_BYTES:
|
|
590
|
+
logger.warning(
|
|
591
|
+
f"MCE object has size {len(payload)} that exceeds the max payload size of {INGEST_MAX_PAYLOAD_BYTES}, "
|
|
592
|
+
"so this metadata will likely fail to be emitted."
|
|
593
|
+
)
|
|
589
594
|
|
|
590
595
|
self._emit_generic(url, payload)
|
|
591
596
|
|
|
@@ -764,16 +769,24 @@ class DataHubRestEmitter(Closeable, Emitter):
|
|
|
764
769
|
url = f"{self._gms_server}/aspects?action=ingestProposalBatch"
|
|
765
770
|
|
|
766
771
|
mcp_objs = [pre_json_transform(mcp.to_obj()) for mcp in mcps]
|
|
772
|
+
if len(mcp_objs) == 0:
|
|
773
|
+
return 0
|
|
767
774
|
|
|
768
775
|
# As a safety mechanism, we need to make sure we don't exceed the max payload size for GMS.
|
|
769
776
|
# If we will exceed the limit, we need to break it up into chunks.
|
|
770
|
-
mcp_obj_chunks: List[List[str]] = []
|
|
771
|
-
current_chunk_size =
|
|
777
|
+
mcp_obj_chunks: List[List[str]] = [[]]
|
|
778
|
+
current_chunk_size = 0
|
|
772
779
|
for mcp_obj in mcp_objs:
|
|
780
|
+
mcp_identifier = f"{mcp_obj.get('entityUrn')}-{mcp_obj.get('aspectName')}"
|
|
773
781
|
mcp_obj_size = len(json.dumps(mcp_obj))
|
|
774
782
|
if _DATAHUB_EMITTER_TRACE:
|
|
775
783
|
logger.debug(
|
|
776
|
-
f"Iterating through object with size {mcp_obj_size}
|
|
784
|
+
f"Iterating through object ({mcp_identifier}) with size {mcp_obj_size}"
|
|
785
|
+
)
|
|
786
|
+
if mcp_obj_size > INGEST_MAX_PAYLOAD_BYTES:
|
|
787
|
+
logger.warning(
|
|
788
|
+
f"MCP object {mcp_identifier} has size {mcp_obj_size} that exceeds the max payload size of {INGEST_MAX_PAYLOAD_BYTES}, "
|
|
789
|
+
"so this metadata will likely fail to be emitted."
|
|
777
790
|
)
|
|
778
791
|
|
|
779
792
|
if (
|
|
@@ -786,7 +799,7 @@ class DataHubRestEmitter(Closeable, Emitter):
|
|
|
786
799
|
current_chunk_size = 0
|
|
787
800
|
mcp_obj_chunks[-1].append(mcp_obj)
|
|
788
801
|
current_chunk_size += mcp_obj_size
|
|
789
|
-
if len(mcp_obj_chunks) >
|
|
802
|
+
if len(mcp_obj_chunks) > 1 or _DATAHUB_EMITTER_TRACE:
|
|
790
803
|
logger.debug(
|
|
791
804
|
f"Decided to send {len(mcps)} MCP batch in {len(mcp_obj_chunks)} chunks"
|
|
792
805
|
)
|
|
@@ -76,7 +76,15 @@ from datahub.metadata.schema_classes import (
|
|
|
76
76
|
SystemMetadataClass,
|
|
77
77
|
TelemetryClientIdClass,
|
|
78
78
|
)
|
|
79
|
-
from datahub.metadata.urns import
|
|
79
|
+
from datahub.metadata.urns import (
|
|
80
|
+
CorpUserUrn,
|
|
81
|
+
MlFeatureTableUrn,
|
|
82
|
+
MlFeatureUrn,
|
|
83
|
+
MlModelGroupUrn,
|
|
84
|
+
MlModelUrn,
|
|
85
|
+
MlPrimaryKeyUrn,
|
|
86
|
+
Urn,
|
|
87
|
+
)
|
|
80
88
|
from datahub.telemetry.telemetry import telemetry_instance
|
|
81
89
|
from datahub.utilities.perf_timer import PerfTimer
|
|
82
90
|
from datahub.utilities.str_enum import StrEnum
|
|
@@ -118,8 +126,16 @@ def entity_type_to_graphql(entity_type: str) -> str:
|
|
|
118
126
|
"""Convert the entity types into GraphQL "EntityType" enum values."""
|
|
119
127
|
|
|
120
128
|
# Hard-coded special cases.
|
|
121
|
-
|
|
122
|
-
|
|
129
|
+
special_cases = {
|
|
130
|
+
CorpUserUrn.ENTITY_TYPE: "CORP_USER",
|
|
131
|
+
MlModelUrn.ENTITY_TYPE: "MLMODEL",
|
|
132
|
+
MlModelGroupUrn.ENTITY_TYPE: "MLMODEL_GROUP",
|
|
133
|
+
MlFeatureTableUrn.ENTITY_TYPE: "MLFEATURE_TABLE",
|
|
134
|
+
MlFeatureUrn.ENTITY_TYPE: "MLFEATURE",
|
|
135
|
+
MlPrimaryKeyUrn.ENTITY_TYPE: "MLPRIMARY_KEY",
|
|
136
|
+
}
|
|
137
|
+
if entity_type in special_cases:
|
|
138
|
+
return special_cases[entity_type]
|
|
123
139
|
|
|
124
140
|
# Convert camelCase to UPPER_UNDERSCORE.
|
|
125
141
|
entity_type = (
|
|
@@ -92,6 +92,7 @@ class DatahubRestSinkConfig(DatahubClientConfig):
|
|
|
92
92
|
@dataclasses.dataclass
|
|
93
93
|
class DataHubRestSinkReport(SinkReport):
|
|
94
94
|
mode: Optional[RestSinkMode] = None
|
|
95
|
+
endpoint: Optional[RestSinkEndpoint] = None
|
|
95
96
|
max_threads: Optional[int] = None
|
|
96
97
|
gms_version: Optional[str] = None
|
|
97
98
|
pending_requests: int = 0
|
|
@@ -142,6 +143,7 @@ class DatahubRestSink(Sink[DatahubRestSinkConfig, DataHubRestSinkReport]):
|
|
|
142
143
|
|
|
143
144
|
self.report.gms_version = gms_config.service_version
|
|
144
145
|
self.report.mode = self.config.mode
|
|
146
|
+
self.report.endpoint = self.config.endpoint
|
|
145
147
|
self.report.max_threads = self.config.max_threads
|
|
146
148
|
logger.debug("Setting env variables to override config")
|
|
147
149
|
logger.debug("Setting gms config")
|
|
@@ -120,6 +120,7 @@ logger = logging.getLogger(__name__)
|
|
|
120
120
|
DBT_PLATFORM = "dbt"
|
|
121
121
|
|
|
122
122
|
_DEFAULT_ACTOR = mce_builder.make_user_urn("unknown")
|
|
123
|
+
_DBT_MAX_COMPILED_CODE_LENGTH = 1 * 1024 * 1024 # 1MB
|
|
123
124
|
|
|
124
125
|
|
|
125
126
|
@dataclass
|
|
@@ -1684,6 +1685,12 @@ class DBTSourceBase(StatefulIngestionSourceBase):
|
|
|
1684
1685
|
def get_external_url(self, node: DBTNode) -> Optional[str]:
|
|
1685
1686
|
pass
|
|
1686
1687
|
|
|
1688
|
+
@staticmethod
|
|
1689
|
+
def _truncate_code(code: str, max_length: int) -> str:
|
|
1690
|
+
if len(code) > max_length:
|
|
1691
|
+
return code[:max_length] + "..."
|
|
1692
|
+
return code
|
|
1693
|
+
|
|
1687
1694
|
def _create_view_properties_aspect(
|
|
1688
1695
|
self, node: DBTNode
|
|
1689
1696
|
) -> Optional[ViewPropertiesClass]:
|
|
@@ -1695,6 +1702,9 @@ class DBTSourceBase(StatefulIngestionSourceBase):
|
|
|
1695
1702
|
compiled_code = try_format_query(
|
|
1696
1703
|
node.compiled_code, platform=self.config.target_platform
|
|
1697
1704
|
)
|
|
1705
|
+
compiled_code = self._truncate_code(
|
|
1706
|
+
compiled_code, _DBT_MAX_COMPILED_CODE_LENGTH
|
|
1707
|
+
)
|
|
1698
1708
|
|
|
1699
1709
|
materialized = node.materialization in {"table", "incremental", "snapshot"}
|
|
1700
1710
|
view_properties = ViewPropertiesClass(
|
|
@@ -97,7 +97,7 @@ class HexQueryFetcher:
|
|
|
97
97
|
if not query_urns or not entities_by_urn:
|
|
98
98
|
self.report.warning(
|
|
99
99
|
title="No Queries found with Hex as origin",
|
|
100
|
-
message="No lineage because of no Queries found with Hex as origin in the given time range
|
|
100
|
+
message="No lineage because of no Queries found with Hex as origin in the given time range. You may need to set use_queries_v2: true on your warehouse ingestion or you may consider extending the time range to fetch more queries.",
|
|
101
101
|
context=str(
|
|
102
102
|
dict(
|
|
103
103
|
workspace_name=self.workspace_name,
|
|
@@ -99,10 +99,10 @@ class AthenaPropertiesExtractor:
|
|
|
99
99
|
"""A class to extract properties from Athena CREATE TABLE statements."""
|
|
100
100
|
|
|
101
101
|
CREATE_TABLE_REGEXP = re.compile(
|
|
102
|
-
"(CREATE TABLE[\s\n]*)(.*?)(\s*\()", re.MULTILINE | re.IGNORECASE
|
|
102
|
+
r"(CREATE TABLE[\s\n]*)(.*?)(\s*\()", re.MULTILINE | re.IGNORECASE
|
|
103
103
|
)
|
|
104
104
|
PARTITIONED_BY_REGEXP = re.compile(
|
|
105
|
-
"(PARTITIONED BY[\s\n]*\()((?:[^()]|\([^)]*\))*?)(\))",
|
|
105
|
+
r"(PARTITIONED BY[\s\n]*\()((?:[^()]|\([^)]*\))*?)(\))",
|
|
106
106
|
re.MULTILINE | re.IGNORECASE,
|
|
107
107
|
)
|
|
108
108
|
|
|
@@ -4,6 +4,7 @@ from dataclasses import dataclass
|
|
|
4
4
|
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Set, Tuple, Union
|
|
5
5
|
|
|
6
6
|
import pydantic
|
|
7
|
+
import pytest
|
|
7
8
|
from pydantic.class_validators import validator
|
|
8
9
|
from vertica_sqlalchemy_dialect.base import VerticaInspector
|
|
9
10
|
|
|
@@ -55,6 +56,8 @@ from datahub.utilities import config_clean
|
|
|
55
56
|
|
|
56
57
|
if TYPE_CHECKING:
|
|
57
58
|
from datahub.ingestion.source.ge_data_profiler import GEProfilerRequest
|
|
59
|
+
|
|
60
|
+
pytestmark = pytest.mark.integration_batch_4
|
|
58
61
|
logger: logging.Logger = logging.getLogger(__name__)
|
|
59
62
|
|
|
60
63
|
|
|
@@ -2,12 +2,13 @@ import json
|
|
|
2
2
|
import logging
|
|
3
3
|
import os
|
|
4
4
|
from dataclasses import dataclass
|
|
5
|
-
from datetime import datetime
|
|
5
|
+
from datetime import datetime
|
|
6
6
|
from functools import partial
|
|
7
|
-
from typing import Iterable, List, Optional, Union
|
|
7
|
+
from typing import ClassVar, Iterable, List, Optional, Union
|
|
8
8
|
|
|
9
|
-
from pydantic import Field
|
|
9
|
+
from pydantic import BaseModel, Field, validator
|
|
10
10
|
|
|
11
|
+
from datahub.configuration.datetimes import parse_user_datetime
|
|
11
12
|
from datahub.configuration.source_common import (
|
|
12
13
|
EnvConfigMixin,
|
|
13
14
|
PlatformInstanceConfigMixin,
|
|
@@ -35,7 +36,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
|
35
36
|
from datahub.ingestion.graph.client import DataHubGraph
|
|
36
37
|
from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
|
|
37
38
|
from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
|
|
38
|
-
from datahub.metadata.urns import CorpUserUrn
|
|
39
|
+
from datahub.metadata.urns import CorpUserUrn, DatasetUrn
|
|
39
40
|
from datahub.sql_parsing.schema_resolver import SchemaResolver
|
|
40
41
|
from datahub.sql_parsing.sql_parsing_aggregator import (
|
|
41
42
|
KnownQueryLineageInfo,
|
|
@@ -208,19 +209,40 @@ class SqlQueriesSource(Source):
|
|
|
208
209
|
def _add_query_to_aggregator(self, query_entry: "QueryEntry") -> None:
|
|
209
210
|
"""Add a query to the SQL parsing aggregator."""
|
|
210
211
|
try:
|
|
211
|
-
# If we have
|
|
212
|
-
if query_entry.upstream_tables
|
|
212
|
+
# If we have both upstream and downstream tables, use explicit lineage
|
|
213
|
+
if query_entry.upstream_tables and query_entry.downstream_tables:
|
|
213
214
|
logger.debug("Using explicit lineage from query file")
|
|
214
215
|
for downstream_table in query_entry.downstream_tables:
|
|
215
216
|
known_lineage = KnownQueryLineageInfo(
|
|
216
217
|
query_text=query_entry.query,
|
|
217
|
-
downstream=downstream_table,
|
|
218
|
-
upstreams=query_entry.upstream_tables,
|
|
218
|
+
downstream=str(downstream_table),
|
|
219
|
+
upstreams=[str(urn) for urn in query_entry.upstream_tables],
|
|
219
220
|
timestamp=query_entry.timestamp,
|
|
220
221
|
session_id=query_entry.session_id,
|
|
221
222
|
)
|
|
222
223
|
self.aggregator.add_known_query_lineage(known_lineage)
|
|
223
224
|
else:
|
|
225
|
+
# Warn if only partial lineage information is provided
|
|
226
|
+
# XOR: true if exactly one of upstream_tables or downstream_tables is provided
|
|
227
|
+
if bool(query_entry.upstream_tables) ^ bool(
|
|
228
|
+
query_entry.downstream_tables
|
|
229
|
+
):
|
|
230
|
+
query_preview = (
|
|
231
|
+
query_entry.query[:150] + "..."
|
|
232
|
+
if len(query_entry.query) > 150
|
|
233
|
+
else query_entry.query
|
|
234
|
+
)
|
|
235
|
+
missing_upstream = (
|
|
236
|
+
"Missing upstream. " if not query_entry.upstream_tables else ""
|
|
237
|
+
)
|
|
238
|
+
missing_downstream = (
|
|
239
|
+
"Missing downstream. "
|
|
240
|
+
if not query_entry.downstream_tables
|
|
241
|
+
else ""
|
|
242
|
+
)
|
|
243
|
+
logger.info(
|
|
244
|
+
f"Only partial lineage information provided, falling back to SQL parsing for complete lineage detection. {missing_upstream}{missing_downstream}Query: {query_preview}"
|
|
245
|
+
)
|
|
224
246
|
# No explicit lineage, rely on parsing
|
|
225
247
|
observed_query = ObservedQuery(
|
|
226
248
|
query=query_entry.query,
|
|
@@ -243,46 +265,66 @@ class SqlQueriesSource(Source):
|
|
|
243
265
|
)
|
|
244
266
|
|
|
245
267
|
|
|
246
|
-
|
|
247
|
-
class QueryEntry:
|
|
268
|
+
class QueryEntry(BaseModel):
|
|
248
269
|
query: str
|
|
249
|
-
timestamp: Optional[datetime]
|
|
250
|
-
user: Optional[CorpUserUrn]
|
|
251
|
-
operation_type: Optional[str]
|
|
252
|
-
downstream_tables: List[
|
|
253
|
-
upstream_tables: List[
|
|
270
|
+
timestamp: Optional[datetime] = None
|
|
271
|
+
user: Optional[CorpUserUrn] = None
|
|
272
|
+
operation_type: Optional[str] = None
|
|
273
|
+
downstream_tables: List[DatasetUrn] = Field(default_factory=list)
|
|
274
|
+
upstream_tables: List[DatasetUrn] = Field(default_factory=list)
|
|
254
275
|
session_id: Optional[str] = None
|
|
255
276
|
|
|
277
|
+
# Validation context for URN creation
|
|
278
|
+
_validation_context: ClassVar[Optional[SqlQueriesSourceConfig]] = None
|
|
279
|
+
|
|
280
|
+
class Config:
|
|
281
|
+
arbitrary_types_allowed = True
|
|
282
|
+
|
|
283
|
+
@validator("timestamp", pre=True)
|
|
284
|
+
def parse_timestamp(cls, v):
|
|
285
|
+
return None if v is None else parse_user_datetime(str(v))
|
|
286
|
+
|
|
287
|
+
@validator("user", pre=True)
|
|
288
|
+
def parse_user(cls, v):
|
|
289
|
+
if v is None:
|
|
290
|
+
return None
|
|
291
|
+
|
|
292
|
+
return v if isinstance(v, CorpUserUrn) else CorpUserUrn(v)
|
|
293
|
+
|
|
294
|
+
@validator("downstream_tables", "upstream_tables", pre=True)
|
|
295
|
+
def parse_tables(cls, v):
|
|
296
|
+
if not v:
|
|
297
|
+
return []
|
|
298
|
+
|
|
299
|
+
result = []
|
|
300
|
+
for item in v:
|
|
301
|
+
if isinstance(item, DatasetUrn):
|
|
302
|
+
result.append(item)
|
|
303
|
+
elif isinstance(item, str):
|
|
304
|
+
# Skip empty/whitespace-only strings
|
|
305
|
+
if item and item.strip():
|
|
306
|
+
# Convert to URN using validation context
|
|
307
|
+
assert cls._validation_context, (
|
|
308
|
+
"Validation context must be set for URN creation"
|
|
309
|
+
)
|
|
310
|
+
urn_string = make_dataset_urn_with_platform_instance(
|
|
311
|
+
name=item,
|
|
312
|
+
platform=cls._validation_context.platform,
|
|
313
|
+
platform_instance=cls._validation_context.platform_instance,
|
|
314
|
+
env=cls._validation_context.env,
|
|
315
|
+
)
|
|
316
|
+
result.append(DatasetUrn.from_string(urn_string))
|
|
317
|
+
|
|
318
|
+
return result
|
|
319
|
+
|
|
256
320
|
@classmethod
|
|
257
321
|
def create(
|
|
258
322
|
cls, entry_dict: dict, *, config: SqlQueriesSourceConfig
|
|
259
323
|
) -> "QueryEntry":
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
user=CorpUserUrn(entry_dict["user"]) if "user" in entry_dict else None,
|
|
268
|
-
operation_type=entry_dict.get("operation_type"),
|
|
269
|
-
downstream_tables=[
|
|
270
|
-
make_dataset_urn_with_platform_instance(
|
|
271
|
-
name=table,
|
|
272
|
-
platform=config.platform,
|
|
273
|
-
platform_instance=config.platform_instance,
|
|
274
|
-
env=config.env,
|
|
275
|
-
)
|
|
276
|
-
for table in entry_dict.get("downstream_tables", [])
|
|
277
|
-
],
|
|
278
|
-
upstream_tables=[
|
|
279
|
-
make_dataset_urn_with_platform_instance(
|
|
280
|
-
name=table,
|
|
281
|
-
platform=config.platform,
|
|
282
|
-
platform_instance=config.platform_instance,
|
|
283
|
-
env=config.env,
|
|
284
|
-
)
|
|
285
|
-
for table in entry_dict.get("upstream_tables", [])
|
|
286
|
-
],
|
|
287
|
-
session_id=entry_dict.get("session_id"),
|
|
288
|
-
)
|
|
324
|
+
"""Create QueryEntry from dict with config context."""
|
|
325
|
+
# Set validation context for URN creation
|
|
326
|
+
cls._validation_context = config
|
|
327
|
+
try:
|
|
328
|
+
return cls.parse_obj(entry_dict)
|
|
329
|
+
finally:
|
|
330
|
+
cls._validation_context = None
|