acryl-datahub 1.0.0.1rc7__py3-none-any.whl → 1.0.0.2rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0.1rc7.dist-info → acryl_datahub-1.0.0.2rc1.dist-info}/METADATA +2513 -2513
- {acryl_datahub-1.0.0.1rc7.dist-info → acryl_datahub-1.0.0.2rc1.dist-info}/RECORD +17 -17
- datahub/_version.py +1 -1
- datahub/cli/ingest_cli.py +4 -4
- datahub/emitter/mcp_builder.py +4 -0
- datahub/ingestion/graph/client.py +104 -0
- datahub/ingestion/source/iceberg/iceberg.py +9 -9
- datahub/ingestion/source/mlflow.py +3 -7
- datahub/ingestion/source/powerbi/powerbi.py +14 -1
- datahub/ingestion/source/sql/trino.py +4 -3
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/vertexai/vertexai.py +7 -7
- datahub/utilities/ingest_utils.py +2 -2
- {acryl_datahub-1.0.0.1rc7.dist-info → acryl_datahub-1.0.0.2rc1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0.1rc7.dist-info → acryl_datahub-1.0.0.2rc1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0.1rc7.dist-info → acryl_datahub-1.0.0.2rc1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.0.0.1rc7.dist-info → acryl_datahub-1.0.0.2rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
acryl_datahub-1.0.0.
|
|
1
|
+
acryl_datahub-1.0.0.2rc1.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=vzyBMegu61oWM-Gce9R3y5zLfMrINPSGDEFO-MHhthA,323
|
|
5
5
|
datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
|
|
6
6
|
datahub/errors.py,sha256=BzKdcmYseHOt36zfjJXc17WNutFhp9Y23cU_L6cIkxc,612
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -72,7 +72,7 @@ datahub/cli/env_utils.py,sha256=RQzjg4JE29hjPt4v7p-RuqoOr99w8E3DBHWiN2Sm7T4,252
|
|
|
72
72
|
datahub/cli/exists_cli.py,sha256=IsuU86R-g7BJjAl1vULH6d-BWJHAKa4XHLZl5WxGUEM,1233
|
|
73
73
|
datahub/cli/get_cli.py,sha256=VV80BCXfZ0-C8fr2k43SIuN9DB-fOYP9StWsTHnXwFw,2327
|
|
74
74
|
datahub/cli/iceberg_cli.py,sha256=-XT3wpkr8b-HFMafYk7lSon3Lys6XjTQA8U1b698ByM,23003
|
|
75
|
-
datahub/cli/ingest_cli.py,sha256=
|
|
75
|
+
datahub/cli/ingest_cli.py,sha256=Welutg0LOjuEiBOnQdNTnEZFDyKZiiWxqyQK-Go8dL8,20540
|
|
76
76
|
datahub/cli/json_file.py,sha256=nWo-VVthaaW4Do1eUqgrzk0fShb29MjiKXvZVOTq76c,943
|
|
77
77
|
datahub/cli/lite_cli.py,sha256=XKMejSuYUToKBvgN3YmmnxjRcaG5WPw23gJuQK8pgRc,13099
|
|
78
78
|
datahub/cli/migrate.py,sha256=3orGfLNsdh1Q7gkPaCaf2bBWM5b3Ih4fGFw3poe0wiA,17937
|
|
@@ -122,7 +122,7 @@ datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvVi
|
|
|
122
122
|
datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
|
|
123
123
|
datahub/emitter/mce_builder.py,sha256=i-iLLdnuy7h1JrzwC2sCtQthbso-cNj1uijOQZKHbeA,16717
|
|
124
124
|
datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
|
|
125
|
-
datahub/emitter/mcp_builder.py,sha256=
|
|
125
|
+
datahub/emitter/mcp_builder.py,sha256=8IwJAlolQkPpMqQJPLtGrsUqAcuFNs98nrI5iYUxgaU,11920
|
|
126
126
|
datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
|
|
127
127
|
datahub/emitter/request_helper.py,sha256=HpI9a9W0TzoVbrs584rF8P8w-IT_iKLmvYmO_6IHhXs,1008
|
|
128
128
|
datahub/emitter/response_helper.py,sha256=h2hrZYiv4xfauD_lHPW_fN_AV8KhWNM4CVd-Lat2vT0,4608
|
|
@@ -171,7 +171,7 @@ datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGo
|
|
|
171
171
|
datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
|
|
172
172
|
datahub/ingestion/glossary/datahub_classifier.py,sha256=O7wm6gQT1Jf2QSKdWjJQbS5oSzJwplXzfza26Gdq5Mg,7555
|
|
173
173
|
datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
174
|
-
datahub/ingestion/graph/client.py,sha256=
|
|
174
|
+
datahub/ingestion/graph/client.py,sha256=DUOy3fzwUMT0wJ2GAcLiYa5bzelwfZjBmzhbm95IIys,69918
|
|
175
175
|
datahub/ingestion/graph/config.py,sha256=_oha8Je7P80ZmrkZUAaRHyYbdMmTkMI5JkYjEP2Ri1Q,751
|
|
176
176
|
datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
|
|
177
177
|
datahub/ingestion/graph/entity_versioning.py,sha256=nrcNz0Qm6kpE6oTu_mrYUQDx14KPspBTc6R9SyFUY6c,6901
|
|
@@ -205,7 +205,7 @@ datahub/ingestion/source/ge_profiling_config.py,sha256=FlWfXoVoayabVXNMB9qETEU0G
|
|
|
205
205
|
datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suabri1yV5geaAPjzg2eORDc,2559
|
|
206
206
|
datahub/ingestion/source/ldap.py,sha256=CNr3foofIpoCXu_GGqfcajlQE2qkHr5isYwVcDutdkk,18695
|
|
207
207
|
datahub/ingestion/source/metabase.py,sha256=j8DRV2GvisezidL1JZ5HJLF_hdFdtvaoyDoEdEyh0Ks,32603
|
|
208
|
-
datahub/ingestion/source/mlflow.py,sha256=
|
|
208
|
+
datahub/ingestion/source/mlflow.py,sha256=6uN1fjyubs9rjAsdtkSRMKf7h3_89UvFfWDqkgdvPdY,32422
|
|
209
209
|
datahub/ingestion/source/mode.py,sha256=20vWflnRIMWqK8q2Mt2PorMBLnzRAA4bMjcWEyqhTio,64506
|
|
210
210
|
datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-OPSNGU,21237
|
|
211
211
|
datahub/ingestion/source/nifi.py,sha256=w5TPnqPmpotvzSsJROi6nUiHWPUVC6u1g0CzXIE6FNs,56903
|
|
@@ -333,7 +333,7 @@ datahub/ingestion/source/hex/hex.py,sha256=DPpsi5e-sdUgbS0Okyvx1mvc00Adu47zA65oF
|
|
|
333
333
|
datahub/ingestion/source/hex/mapper.py,sha256=6dsGvvhPAOAbAG1ayxLwipgJGt1q7YanWYfMX3rZeiM,12603
|
|
334
334
|
datahub/ingestion/source/hex/model.py,sha256=hmMfOLEGZcKjwy2DW29OPf_9_Q_TesgnUTCen2br_fA,1471
|
|
335
335
|
datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
336
|
-
datahub/ingestion/source/iceberg/iceberg.py,sha256=
|
|
336
|
+
datahub/ingestion/source/iceberg/iceberg.py,sha256=PhLLXWgBdfZ3hL7LgLvDr6aTK-QKmiZCFNz5jD-mxZM,30773
|
|
337
337
|
datahub/ingestion/source/iceberg/iceberg_common.py,sha256=VGosqYPmn_j6GETSnDHZ8Ay1BVOedmx2x5LHxw16I3A,12278
|
|
338
338
|
datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=9iwp2vpQTi4OMbIKoDZV5lAdvjMR0ls6Llpck9grJIE,9875
|
|
339
339
|
datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -378,7 +378,7 @@ datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
|
378
378
|
datahub/ingestion/source/powerbi/config.py,sha256=5rG62dspGF9jIo8l6HLpB6ECv5n-t1un2ZyGiisD784,24219
|
|
379
379
|
datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=-njW1kJOy-LY5JFwJLhVQ0bMBj9NQz5TZhQqsSi_KsM,2285
|
|
380
380
|
datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=5df3qvalCS9hZ46DPXs6XDcw9-IofGf8Eol_rUC7LHI,20329
|
|
381
|
-
datahub/ingestion/source/powerbi/powerbi.py,sha256=
|
|
381
|
+
datahub/ingestion/source/powerbi/powerbi.py,sha256=f0vwzVdKAU7Qp7gRHgTOPq45ThUmsbFXCwcIDaS1S34,56464
|
|
382
382
|
datahub/ingestion/source/powerbi/m_query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
383
383
|
datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=yDi0C13ko2dVxdLJBYvUuGbT4Q2hxQRse3sL7Ul1ZU0,2050
|
|
384
384
|
datahub/ingestion/source/powerbi/m_query/native_sql_parser.py,sha256=zzKVDGeUM3Yv3-zNah4D6mSnr6jXsstNuLmzczcPQEE,3683
|
|
@@ -488,7 +488,7 @@ datahub/ingestion/source/sql/sql_utils.py,sha256=q-Bsk6WxlsRtrw9RXBxvqI3zuaMTC_F
|
|
|
488
488
|
datahub/ingestion/source/sql/sqlalchemy_data_reader.py,sha256=FvHZ4JEK3aR2DYOBZiT_ZsAy12RjTu4t_KIR_92B11k,2644
|
|
489
489
|
datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py,sha256=KOpbmDIE2h1hyYEsbVHJi2B7FlsyUMTXZx4diyzltQg,1826
|
|
490
490
|
datahub/ingestion/source/sql/teradata.py,sha256=9WdrxDy02lRJi9IZgsAATFsmxcQnIw5Gr6yCqHJQy5k,33507
|
|
491
|
-
datahub/ingestion/source/sql/trino.py,sha256=
|
|
491
|
+
datahub/ingestion/source/sql/trino.py,sha256=gSLDyETKavSVR8l9wdebrfoc41cqAWz6ApqIicW0BF8,17892
|
|
492
492
|
datahub/ingestion/source/sql/two_tier_sql_source.py,sha256=YDrGBb5WKVls6qv17QU5foKrf71SydzEltc3WsVAhQc,5732
|
|
493
493
|
datahub/ingestion/source/sql/vertica.py,sha256=_9OgSgIgqBml0av063rb8nACiT3SAmzpw0ouyF91wv8,33382
|
|
494
494
|
datahub/ingestion/source/sql/mssql/__init__.py,sha256=1agpl8S_uDW40olkhCX_W19dbr5GO9qgjS3R7pLRZSk,87
|
|
@@ -504,7 +504,7 @@ datahub/ingestion/source/state/profiling_state.py,sha256=lsWu7oZhB9nSlqoklvjs-Lj
|
|
|
504
504
|
datahub/ingestion/source/state/profiling_state_handler.py,sha256=jDMiIrAq8k4GrYoh9Ymh0ZAmzejYFk8E1W7-kuw6lXg,4295
|
|
505
505
|
datahub/ingestion/source/state/redundant_run_skip_handler.py,sha256=h28twxcsMNvI74bUjAKleRYid8kfIyWS7Y11aBldDlY,9435
|
|
506
506
|
datahub/ingestion/source/state/sql_common_state.py,sha256=OtJpJfMTBSgyR37dn3w-nnZwlc0nFNb2GoUzIWhnyAc,143
|
|
507
|
-
datahub/ingestion/source/state/stale_entity_removal_handler.py,sha256
|
|
507
|
+
datahub/ingestion/source/state/stale_entity_removal_handler.py,sha256=Lr2HYGx_b2FQ8A36s7s11tl-4-mGIM13bfy5JbQ3LtM,14890
|
|
508
508
|
datahub/ingestion/source/state/stateful_ingestion_base.py,sha256=N0Qzp2t2qAf27WndhsvGbCYCd29dSrLY3TSfbO0hoKA,17369
|
|
509
509
|
datahub/ingestion/source/state/usage_common_state.py,sha256=TJyb0CpwibsduJYI854EFdtrwWnz7JC-IkzKUXVGDx0,983
|
|
510
510
|
datahub/ingestion/source/state/use_case_handler.py,sha256=3g8ddTvGXHe0dCiyTkyFeNmR8a3bhwywtIt8EpK5oQs,1271
|
|
@@ -535,7 +535,7 @@ datahub/ingestion/source/usage/clickhouse_usage.py,sha256=jJ-EUJdS7t4d9RVjLWQQ2e
|
|
|
535
535
|
datahub/ingestion/source/usage/starburst_trino_usage.py,sha256=O3EDEZsXPNdsKGD-jStREA8e4-iTlnqd3ocqtAYFKNA,10544
|
|
536
536
|
datahub/ingestion/source/usage/usage_common.py,sha256=uuCgIduhlRL2zIAN8rymZ5cZn1WF6akZ-ZbbaVYo9_w,9813
|
|
537
537
|
datahub/ingestion/source/vertexai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
538
|
-
datahub/ingestion/source/vertexai/vertexai.py,sha256=
|
|
538
|
+
datahub/ingestion/source/vertexai/vertexai.py,sha256=ipq2Zb2lXTZkUg9r78kvuIIhX7mc-5hr-o83395IWpo,43589
|
|
539
539
|
datahub/ingestion/source/vertexai/vertexai_config.py,sha256=uMnsv3b6TsPRH26u_JE_v1u0db7ANEAFlVxU5A6ELRM,989
|
|
540
540
|
datahub/ingestion/source/vertexai/vertexai_result_type_utils.py,sha256=fE2l_xXvKONqb4jabl4LtKRBZDnP3koMLJV520wEAMg,2555
|
|
541
541
|
datahub/ingestion/source_config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -970,7 +970,7 @@ datahub/utilities/file_backed_collections.py,sha256=zW-Xy6zO1E6R-FRJKjlimMKr-emW
|
|
|
970
970
|
datahub/utilities/global_warning_util.py,sha256=adrEl3WhetQ-bymrPINjd976ZFndhbvk3QosUYGsos8,261
|
|
971
971
|
datahub/utilities/groupby.py,sha256=pe6rP4ZCttYB98yjbs0Aey8C32aLb7rq-NJ_BFky0H4,524
|
|
972
972
|
datahub/utilities/hive_schema_to_avro.py,sha256=1MP0a6FFVEYxLg_4lKF7hPxbHJJy0uRQYkML5zRwV3Q,11622
|
|
973
|
-
datahub/utilities/ingest_utils.py,sha256=
|
|
973
|
+
datahub/utilities/ingest_utils.py,sha256=OgETzX_9g6DcYlxGH0_xgAmAlWLSROr25ydDL-mBhKM,3137
|
|
974
974
|
datahub/utilities/is_pytest.py,sha256=2m9T4S9IIKhI5RfTqrB2ZmumzHocdxBHpM1HroWj2XQ,138
|
|
975
975
|
datahub/utilities/logging_manager.py,sha256=bc-x5VZGvFUHT0HD-TF3Uz_nzw3dpKdJSbz6kjpAqAQ,10073
|
|
976
976
|
datahub/utilities/lossy_collections.py,sha256=5rdtfK2pjwvOrrzLf_KGFOMiVvLLmoXj5EVQXTFSR3E,5704
|
|
@@ -1043,8 +1043,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1043
1043
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1044
1044
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1045
1045
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1046
|
-
acryl_datahub-1.0.0.
|
|
1047
|
-
acryl_datahub-1.0.0.
|
|
1048
|
-
acryl_datahub-1.0.0.
|
|
1049
|
-
acryl_datahub-1.0.0.
|
|
1050
|
-
acryl_datahub-1.0.0.
|
|
1046
|
+
acryl_datahub-1.0.0.2rc1.dist-info/METADATA,sha256=IE26ZK9HREmhmiMf2zQds-JatSIyAh9gcaVjGyOAGLE,176849
|
|
1047
|
+
acryl_datahub-1.0.0.2rc1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
1048
|
+
acryl_datahub-1.0.0.2rc1.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
|
|
1049
|
+
acryl_datahub-1.0.0.2rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1050
|
+
acryl_datahub-1.0.0.2rc1.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
datahub/cli/ingest_cli.py
CHANGED
|
@@ -216,9 +216,9 @@ def run(
|
|
|
216
216
|
@click.option(
|
|
217
217
|
"--executor-id",
|
|
218
218
|
type=str,
|
|
219
|
-
default="default",
|
|
220
219
|
help="Executor id to route execution requests to. Do not use this unless you have configured a custom executor.",
|
|
221
220
|
required=False,
|
|
221
|
+
default=None,
|
|
222
222
|
)
|
|
223
223
|
@click.option(
|
|
224
224
|
"--cli-version",
|
|
@@ -239,7 +239,7 @@ def run(
|
|
|
239
239
|
type=str,
|
|
240
240
|
help="Timezone for the schedule in 'America/New_York' format. Uses UTC by default.",
|
|
241
241
|
required=False,
|
|
242
|
-
default=
|
|
242
|
+
default=None,
|
|
243
243
|
)
|
|
244
244
|
@click.option(
|
|
245
245
|
"--debug", type=bool, help="Should we debug.", required=False, default=False
|
|
@@ -255,10 +255,10 @@ def deploy(
|
|
|
255
255
|
name: Optional[str],
|
|
256
256
|
config: str,
|
|
257
257
|
urn: Optional[str],
|
|
258
|
-
executor_id: str,
|
|
258
|
+
executor_id: Optional[str],
|
|
259
259
|
cli_version: Optional[str],
|
|
260
260
|
schedule: Optional[str],
|
|
261
|
-
time_zone: str,
|
|
261
|
+
time_zone: Optional[str],
|
|
262
262
|
extra_pip: Optional[str],
|
|
263
263
|
debug: bool = False,
|
|
264
264
|
) -> None:
|
datahub/emitter/mcp_builder.py
CHANGED
|
@@ -27,6 +27,7 @@ from pydantic import BaseModel
|
|
|
27
27
|
from requests.models import HTTPError
|
|
28
28
|
from typing_extensions import deprecated
|
|
29
29
|
|
|
30
|
+
from datahub._codegen.aspect import _Aspect
|
|
30
31
|
from datahub.cli import config_utils
|
|
31
32
|
from datahub.configuration.common import ConfigModel, GraphError, OperationalError
|
|
32
33
|
from datahub.emitter.aspect import TIMESERIES_ASPECT_MAP
|
|
@@ -1697,6 +1698,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1697
1698
|
|
|
1698
1699
|
return res["runAssertionsForAsset"]
|
|
1699
1700
|
|
|
1701
|
+
@deprecated("Use get_entities instead which returns typed aspects")
|
|
1700
1702
|
def get_entities_v2(
|
|
1701
1703
|
self,
|
|
1702
1704
|
entity_name: str,
|
|
@@ -1736,6 +1738,108 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1736
1738
|
retval[entity_urn][aspect_key] = aspect_value
|
|
1737
1739
|
return retval
|
|
1738
1740
|
|
|
1741
|
+
def get_entities(
|
|
1742
|
+
self,
|
|
1743
|
+
entity_name: str,
|
|
1744
|
+
urns: List[str],
|
|
1745
|
+
aspects: Optional[List[str]] = None,
|
|
1746
|
+
with_system_metadata: bool = False,
|
|
1747
|
+
) -> Dict[str, Dict[str, Tuple[_Aspect, Optional[SystemMetadataClass]]]]:
|
|
1748
|
+
"""
|
|
1749
|
+
Get entities using the OpenAPI v3 endpoint, deserializing aspects into typed objects.
|
|
1750
|
+
|
|
1751
|
+
Args:
|
|
1752
|
+
entity_name: The entity type name
|
|
1753
|
+
urns: List of entity URNs to fetch
|
|
1754
|
+
aspects: Optional list of aspect names to fetch. If None, all aspects will be fetched.
|
|
1755
|
+
with_system_metadata: If True, return system metadata along with each aspect.
|
|
1756
|
+
|
|
1757
|
+
Returns:
|
|
1758
|
+
A dictionary mapping URNs to a dictionary of aspect name to tuples of
|
|
1759
|
+
(typed aspect object, system metadata). If with_system_metadata is False,
|
|
1760
|
+
the system metadata in the tuple will be None.
|
|
1761
|
+
"""
|
|
1762
|
+
aspects = aspects or []
|
|
1763
|
+
|
|
1764
|
+
request_payload = []
|
|
1765
|
+
for urn in urns:
|
|
1766
|
+
entity_request: Dict[str, Any] = {"urn": urn}
|
|
1767
|
+
for aspect_name in aspects:
|
|
1768
|
+
entity_request[aspect_name] = {}
|
|
1769
|
+
request_payload.append(entity_request)
|
|
1770
|
+
|
|
1771
|
+
headers: Dict[str, Any] = {
|
|
1772
|
+
"Accept": "application/json",
|
|
1773
|
+
"Content-Type": "application/json",
|
|
1774
|
+
}
|
|
1775
|
+
|
|
1776
|
+
url = f"{self.config.server}/openapi/v3/entity/{entity_name}/batchGet"
|
|
1777
|
+
if with_system_metadata:
|
|
1778
|
+
url += "?systemMetadata=true"
|
|
1779
|
+
|
|
1780
|
+
response = self._session.post(
|
|
1781
|
+
url, data=json.dumps(request_payload), headers=headers
|
|
1782
|
+
)
|
|
1783
|
+
response.raise_for_status()
|
|
1784
|
+
entities = response.json()
|
|
1785
|
+
|
|
1786
|
+
result: Dict[str, Dict[str, Tuple[_Aspect, Optional[SystemMetadataClass]]]] = {}
|
|
1787
|
+
|
|
1788
|
+
for entity in entities:
|
|
1789
|
+
entity_urn = entity.get("urn")
|
|
1790
|
+
if entity_urn is None:
|
|
1791
|
+
logger.warning(
|
|
1792
|
+
f"Missing URN in entity response: {entity}, skipping deserialization"
|
|
1793
|
+
)
|
|
1794
|
+
continue
|
|
1795
|
+
|
|
1796
|
+
entity_aspects: Dict[
|
|
1797
|
+
str, Tuple[_Aspect, Optional[SystemMetadataClass]]
|
|
1798
|
+
] = {}
|
|
1799
|
+
|
|
1800
|
+
for aspect_name, aspect_obj in entity.items():
|
|
1801
|
+
if aspect_name == "urn":
|
|
1802
|
+
continue
|
|
1803
|
+
|
|
1804
|
+
aspect_class = ASPECT_NAME_MAP.get(aspect_name)
|
|
1805
|
+
if aspect_class is None:
|
|
1806
|
+
logger.warning(
|
|
1807
|
+
f"Unknown aspect type {aspect_name}, skipping deserialization"
|
|
1808
|
+
)
|
|
1809
|
+
continue
|
|
1810
|
+
|
|
1811
|
+
aspect_value = aspect_obj.get("value")
|
|
1812
|
+
if aspect_value is None:
|
|
1813
|
+
logger.warning(
|
|
1814
|
+
f"Unknown aspect value for aspect {aspect_name}, skipping deserialization"
|
|
1815
|
+
)
|
|
1816
|
+
continue
|
|
1817
|
+
|
|
1818
|
+
try:
|
|
1819
|
+
post_json_obj = post_json_transform(aspect_value)
|
|
1820
|
+
typed_aspect = aspect_class.from_obj(post_json_obj)
|
|
1821
|
+
assert isinstance(typed_aspect, aspect_class) and isinstance(
|
|
1822
|
+
typed_aspect, _Aspect
|
|
1823
|
+
)
|
|
1824
|
+
|
|
1825
|
+
system_metadata = None
|
|
1826
|
+
if with_system_metadata:
|
|
1827
|
+
system_metadata_obj = aspect_obj.get("systemMetadata")
|
|
1828
|
+
if system_metadata_obj:
|
|
1829
|
+
system_metadata = SystemMetadataClass.from_obj(
|
|
1830
|
+
system_metadata_obj
|
|
1831
|
+
)
|
|
1832
|
+
|
|
1833
|
+
entity_aspects[aspect_name] = (typed_aspect, system_metadata)
|
|
1834
|
+
except Exception as e:
|
|
1835
|
+
logger.error(f"Error deserializing aspect {aspect_name}: {e}")
|
|
1836
|
+
raise
|
|
1837
|
+
|
|
1838
|
+
if entity_aspects:
|
|
1839
|
+
result[entity_urn] = entity_aspects
|
|
1840
|
+
|
|
1841
|
+
return result
|
|
1842
|
+
|
|
1739
1843
|
def upsert_custom_assertion(
|
|
1740
1844
|
self,
|
|
1741
1845
|
urn: Optional[str],
|
|
@@ -425,23 +425,21 @@ class IcebergSource(StatefulIngestionSourceBase):
|
|
|
425
425
|
def _get_dataset_properties_aspect(
|
|
426
426
|
self, dataset_name: str, table: Table
|
|
427
427
|
) -> DatasetPropertiesClass:
|
|
428
|
-
|
|
428
|
+
created: Optional[TimeStampClass] = None
|
|
429
429
|
custom_properties = table.metadata.properties.copy()
|
|
430
430
|
custom_properties["location"] = table.metadata.location
|
|
431
431
|
custom_properties["format-version"] = str(table.metadata.format_version)
|
|
432
432
|
custom_properties["partition-spec"] = str(self._get_partition_aspect(table))
|
|
433
|
+
last_modified: Optional[int] = table.metadata.last_updated_ms
|
|
433
434
|
if table.current_snapshot():
|
|
434
435
|
custom_properties["snapshot-id"] = str(table.current_snapshot().snapshot_id)
|
|
435
436
|
custom_properties["manifest-list"] = table.current_snapshot().manifest_list
|
|
436
|
-
|
|
437
|
-
int(table.current_snapshot().timestamp_ms)
|
|
438
|
-
)
|
|
437
|
+
if not last_modified:
|
|
438
|
+
last_modified = int(table.current_snapshot().timestamp_ms)
|
|
439
439
|
if "created-at" in custom_properties:
|
|
440
440
|
try:
|
|
441
441
|
dt = dateutil_parser.isoparse(custom_properties["created-at"])
|
|
442
|
-
|
|
443
|
-
int(dt.timestamp() * 1000)
|
|
444
|
-
)
|
|
442
|
+
created = TimeStampClass(int(dt.timestamp() * 1000))
|
|
445
443
|
except Exception as ex:
|
|
446
444
|
LOGGER.warning(
|
|
447
445
|
f"Exception while trying to parse creation date {custom_properties['created-at']}, ignoring: {ex}"
|
|
@@ -451,8 +449,10 @@ class IcebergSource(StatefulIngestionSourceBase):
|
|
|
451
449
|
name=table.name()[-1],
|
|
452
450
|
description=table.metadata.properties.get("comment", None),
|
|
453
451
|
customProperties=custom_properties,
|
|
454
|
-
lastModified=
|
|
455
|
-
|
|
452
|
+
lastModified=TimeStampClass(last_modified)
|
|
453
|
+
if last_modified is not None
|
|
454
|
+
else None,
|
|
455
|
+
created=created,
|
|
456
456
|
qualifiedName=dataset_name,
|
|
457
457
|
)
|
|
458
458
|
|
|
@@ -16,7 +16,7 @@ from datahub.api.entities.dataprocess.dataprocess_instance import (
|
|
|
16
16
|
)
|
|
17
17
|
from datahub.configuration.source_common import EnvConfigMixin
|
|
18
18
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
19
|
-
from datahub.emitter.mcp_builder import
|
|
19
|
+
from datahub.emitter.mcp_builder import ExperimentKey
|
|
20
20
|
from datahub.ingestion.api.common import PipelineContext
|
|
21
21
|
from datahub.ingestion.api.decorators import (
|
|
22
22
|
SupportStatus,
|
|
@@ -77,10 +77,6 @@ from datahub.sdk.dataset import Dataset
|
|
|
77
77
|
T = TypeVar("T")
|
|
78
78
|
|
|
79
79
|
|
|
80
|
-
class ContainerKeyWithId(ContainerKey):
|
|
81
|
-
id: str
|
|
82
|
-
|
|
83
|
-
|
|
84
80
|
class MLflowConfig(StatefulIngestionConfigBase, EnvConfigMixin):
|
|
85
81
|
tracking_uri: Optional[str] = Field(
|
|
86
82
|
default=None,
|
|
@@ -252,7 +248,7 @@ class MLflowSource(StatefulIngestionSourceBase):
|
|
|
252
248
|
self, experiment: Experiment
|
|
253
249
|
) -> Iterable[MetadataWorkUnit]:
|
|
254
250
|
experiment_container = Container(
|
|
255
|
-
container_key=
|
|
251
|
+
container_key=ExperimentKey(
|
|
256
252
|
platform=str(DataPlatformUrn(platform_name=self.platform)),
|
|
257
253
|
id=experiment.name,
|
|
258
254
|
),
|
|
@@ -470,7 +466,7 @@ class MLflowSource(StatefulIngestionSourceBase):
|
|
|
470
466
|
def _get_run_workunits(
|
|
471
467
|
self, experiment: Experiment, run: Run
|
|
472
468
|
) -> Iterable[MetadataWorkUnit]:
|
|
473
|
-
experiment_key =
|
|
469
|
+
experiment_key = ExperimentKey(
|
|
474
470
|
platform=str(DataPlatformUrn(self.platform)), id=experiment.name
|
|
475
471
|
)
|
|
476
472
|
|
|
@@ -94,7 +94,7 @@ from datahub.metadata.schema_classes import (
|
|
|
94
94
|
UpstreamLineageClass,
|
|
95
95
|
ViewPropertiesClass,
|
|
96
96
|
)
|
|
97
|
-
from datahub.metadata.urns import ChartUrn
|
|
97
|
+
from datahub.metadata.urns import ChartUrn, DatasetUrn
|
|
98
98
|
from datahub.sql_parsing.sqlglot_lineage import ColumnLineageInfo
|
|
99
99
|
from datahub.utilities.dedup_list import deduplicate_list
|
|
100
100
|
from datahub.utilities.urns.urn_iter import lowercase_dataset_urn
|
|
@@ -1083,6 +1083,7 @@ class Mapper:
|
|
|
1083
1083
|
report: powerbi_data_classes.Report,
|
|
1084
1084
|
chart_mcps: List[MetadataChangeProposalWrapper],
|
|
1085
1085
|
user_mcps: List[MetadataChangeProposalWrapper],
|
|
1086
|
+
dataset_edges: List[EdgeClass],
|
|
1086
1087
|
) -> List[MetadataChangeProposalWrapper]:
|
|
1087
1088
|
"""
|
|
1088
1089
|
Map PowerBi report to Datahub dashboard
|
|
@@ -1104,6 +1105,7 @@ class Mapper:
|
|
|
1104
1105
|
charts=chart_urn_list,
|
|
1105
1106
|
lastModified=ChangeAuditStamps(),
|
|
1106
1107
|
dashboardUrl=report.webUrl,
|
|
1108
|
+
datasetEdges=dataset_edges,
|
|
1107
1109
|
)
|
|
1108
1110
|
|
|
1109
1111
|
info_mcp = self.new_mcp(
|
|
@@ -1197,12 +1199,23 @@ class Mapper:
|
|
|
1197
1199
|
ds_mcps = self.to_datahub_dataset(report.dataset, workspace)
|
|
1198
1200
|
chart_mcps = self.pages_to_chart(report.pages, workspace, ds_mcps)
|
|
1199
1201
|
|
|
1202
|
+
# collect all upstream datasets; using a set to retain unique urns
|
|
1203
|
+
dataset_urns = {
|
|
1204
|
+
dataset.entityUrn
|
|
1205
|
+
for dataset in ds_mcps
|
|
1206
|
+
if dataset.entityType == DatasetUrn.ENTITY_TYPE and dataset.entityUrn
|
|
1207
|
+
}
|
|
1208
|
+
dataset_edges = [
|
|
1209
|
+
EdgeClass(destinationUrn=dataset_urn) for dataset_urn in dataset_urns
|
|
1210
|
+
]
|
|
1211
|
+
|
|
1200
1212
|
# Let's convert report to datahub dashboard
|
|
1201
1213
|
report_mcps = self.report_to_dashboard(
|
|
1202
1214
|
workspace=workspace,
|
|
1203
1215
|
report=report,
|
|
1204
1216
|
chart_mcps=chart_mcps,
|
|
1205
1217
|
user_mcps=user_mcps,
|
|
1218
|
+
dataset_edges=dataset_edges,
|
|
1206
1219
|
)
|
|
1207
1220
|
|
|
1208
1221
|
# Now add MCPs in sequence
|
|
@@ -128,9 +128,10 @@ def get_table_comment(self, connection, table_name: str, schema: str = None, **k
|
|
|
128
128
|
if catalog_name is None:
|
|
129
129
|
raise exc.NoSuchTableError("catalog is required in connection")
|
|
130
130
|
connector_name = get_catalog_connector_name(connection.engine, catalog_name)
|
|
131
|
-
if
|
|
132
|
-
|
|
133
|
-
|
|
131
|
+
if (
|
|
132
|
+
connector_name is not None
|
|
133
|
+
and connector_name in PROPERTIES_TABLE_SUPPORTED_CONNECTORS
|
|
134
|
+
):
|
|
134
135
|
properties_table = self._get_full_table(f"{table_name}$properties", schema)
|
|
135
136
|
query = f"SELECT * FROM {properties_table}"
|
|
136
137
|
row = connection.execute(sql.text(query)).fetchone()
|
|
@@ -45,7 +45,6 @@ class StatefulStaleMetadataRemovalConfig(StatefulIngestionConfig):
|
|
|
45
45
|
description="Prevents large amount of soft deletes & the state from committing from accidental changes to the source configuration if the relative change percent in entities compared to the previous state is above the 'fail_safe_threshold'.",
|
|
46
46
|
le=100.0,
|
|
47
47
|
ge=0.0,
|
|
48
|
-
hidden_from_docs=True,
|
|
49
48
|
)
|
|
50
49
|
|
|
51
50
|
|
|
@@ -22,7 +22,11 @@ from google.oauth2 import service_account
|
|
|
22
22
|
|
|
23
23
|
import datahub.emitter.mce_builder as builder
|
|
24
24
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
25
|
-
from datahub.emitter.mcp_builder import
|
|
25
|
+
from datahub.emitter.mcp_builder import (
|
|
26
|
+
ExperimentKey,
|
|
27
|
+
ProjectIdKey,
|
|
28
|
+
gen_containers,
|
|
29
|
+
)
|
|
26
30
|
from datahub.ingestion.api.common import PipelineContext
|
|
27
31
|
from datahub.ingestion.api.decorators import (
|
|
28
32
|
SupportStatus,
|
|
@@ -96,10 +100,6 @@ class ModelMetadata:
|
|
|
96
100
|
endpoints: Optional[List[Endpoint]] = None
|
|
97
101
|
|
|
98
102
|
|
|
99
|
-
class ContainerKeyWithId(ContainerKey):
|
|
100
|
-
id: str
|
|
101
|
-
|
|
102
|
-
|
|
103
103
|
@platform_name("Vertex AI", id="vertexai")
|
|
104
104
|
@config_class(VertexAIConfig)
|
|
105
105
|
@support_status(SupportStatus.TESTING)
|
|
@@ -173,7 +173,7 @@ class VertexAISource(Source):
|
|
|
173
173
|
) -> Iterable[MetadataWorkUnit]:
|
|
174
174
|
yield from gen_containers(
|
|
175
175
|
parent_container_key=self._get_project_container(),
|
|
176
|
-
container_key=
|
|
176
|
+
container_key=ExperimentKey(
|
|
177
177
|
platform=self.platform,
|
|
178
178
|
id=self._make_vertexai_experiment_name(experiment.name),
|
|
179
179
|
),
|
|
@@ -309,7 +309,7 @@ class VertexAISource(Source):
|
|
|
309
309
|
def _gen_experiment_run_mcps(
|
|
310
310
|
self, experiment: Experiment, run: ExperimentRun
|
|
311
311
|
) -> Iterable[MetadataChangeProposalWrapper]:
|
|
312
|
-
experiment_key =
|
|
312
|
+
experiment_key = ExperimentKey(
|
|
313
313
|
platform=self.platform,
|
|
314
314
|
id=self._make_vertexai_experiment_name(experiment.name),
|
|
315
315
|
)
|
|
@@ -32,10 +32,10 @@ def deploy_source_vars(
|
|
|
32
32
|
name: Optional[str],
|
|
33
33
|
config: str,
|
|
34
34
|
urn: Optional[str],
|
|
35
|
-
executor_id: str,
|
|
35
|
+
executor_id: Optional[str],
|
|
36
36
|
cli_version: Optional[str],
|
|
37
37
|
schedule: Optional[str],
|
|
38
|
-
time_zone: str,
|
|
38
|
+
time_zone: Optional[str],
|
|
39
39
|
extra_pip: Optional[str],
|
|
40
40
|
debug: bool = False,
|
|
41
41
|
) -> dict:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|