acryl-datahub 1.0.0.1rc7__py3-none-any.whl → 1.0.0.2rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,7 +1,7 @@
1
- acryl_datahub-1.0.0.1rc7.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.0.0.2rc1.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=AZj-rwp4edRcZvS9Mq4fxTeV64QHFW-6zysNAtjc2qg,323
4
+ datahub/_version.py,sha256=vzyBMegu61oWM-Gce9R3y5zLfMrINPSGDEFO-MHhthA,323
5
5
  datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
6
6
  datahub/errors.py,sha256=BzKdcmYseHOt36zfjJXc17WNutFhp9Y23cU_L6cIkxc,612
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -72,7 +72,7 @@ datahub/cli/env_utils.py,sha256=RQzjg4JE29hjPt4v7p-RuqoOr99w8E3DBHWiN2Sm7T4,252
72
72
  datahub/cli/exists_cli.py,sha256=IsuU86R-g7BJjAl1vULH6d-BWJHAKa4XHLZl5WxGUEM,1233
73
73
  datahub/cli/get_cli.py,sha256=VV80BCXfZ0-C8fr2k43SIuN9DB-fOYP9StWsTHnXwFw,2327
74
74
  datahub/cli/iceberg_cli.py,sha256=-XT3wpkr8b-HFMafYk7lSon3Lys6XjTQA8U1b698ByM,23003
75
- datahub/cli/ingest_cli.py,sha256=Xvdb0v-r_8rhlmZo161E043BMbLuyvNq1LLJWHdlXT0,20526
75
+ datahub/cli/ingest_cli.py,sha256=Welutg0LOjuEiBOnQdNTnEZFDyKZiiWxqyQK-Go8dL8,20540
76
76
  datahub/cli/json_file.py,sha256=nWo-VVthaaW4Do1eUqgrzk0fShb29MjiKXvZVOTq76c,943
77
77
  datahub/cli/lite_cli.py,sha256=XKMejSuYUToKBvgN3YmmnxjRcaG5WPw23gJuQK8pgRc,13099
78
78
  datahub/cli/migrate.py,sha256=3orGfLNsdh1Q7gkPaCaf2bBWM5b3Ih4fGFw3poe0wiA,17937
@@ -122,7 +122,7 @@ datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvVi
122
122
  datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
123
123
  datahub/emitter/mce_builder.py,sha256=i-iLLdnuy7h1JrzwC2sCtQthbso-cNj1uijOQZKHbeA,16717
124
124
  datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
125
- datahub/emitter/mcp_builder.py,sha256=JyAC8obvkf6ZpINJ8I2p-Ofr52-tuoQBDbxp-bhjyrM,11871
125
+ datahub/emitter/mcp_builder.py,sha256=8IwJAlolQkPpMqQJPLtGrsUqAcuFNs98nrI5iYUxgaU,11920
126
126
  datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
127
127
  datahub/emitter/request_helper.py,sha256=HpI9a9W0TzoVbrs584rF8P8w-IT_iKLmvYmO_6IHhXs,1008
128
128
  datahub/emitter/response_helper.py,sha256=h2hrZYiv4xfauD_lHPW_fN_AV8KhWNM4CVd-Lat2vT0,4608
@@ -171,7 +171,7 @@ datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGo
171
171
  datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
172
172
  datahub/ingestion/glossary/datahub_classifier.py,sha256=O7wm6gQT1Jf2QSKdWjJQbS5oSzJwplXzfza26Gdq5Mg,7555
173
173
  datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
174
- datahub/ingestion/graph/client.py,sha256=Qtjf5YrQeQzcTb0qxr6-y4MSEKSJm8f0hO6BoeRA_yI,65916
174
+ datahub/ingestion/graph/client.py,sha256=DUOy3fzwUMT0wJ2GAcLiYa5bzelwfZjBmzhbm95IIys,69918
175
175
  datahub/ingestion/graph/config.py,sha256=_oha8Je7P80ZmrkZUAaRHyYbdMmTkMI5JkYjEP2Ri1Q,751
176
176
  datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
177
177
  datahub/ingestion/graph/entity_versioning.py,sha256=nrcNz0Qm6kpE6oTu_mrYUQDx14KPspBTc6R9SyFUY6c,6901
@@ -205,7 +205,7 @@ datahub/ingestion/source/ge_profiling_config.py,sha256=FlWfXoVoayabVXNMB9qETEU0G
205
205
  datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suabri1yV5geaAPjzg2eORDc,2559
206
206
  datahub/ingestion/source/ldap.py,sha256=CNr3foofIpoCXu_GGqfcajlQE2qkHr5isYwVcDutdkk,18695
207
207
  datahub/ingestion/source/metabase.py,sha256=j8DRV2GvisezidL1JZ5HJLF_hdFdtvaoyDoEdEyh0Ks,32603
208
- datahub/ingestion/source/mlflow.py,sha256=pWhBEIXxI19-yTFeZU72Pgc3uY9nK0-w2TEpG803HNI,32485
208
+ datahub/ingestion/source/mlflow.py,sha256=6uN1fjyubs9rjAsdtkSRMKf7h3_89UvFfWDqkgdvPdY,32422
209
209
  datahub/ingestion/source/mode.py,sha256=20vWflnRIMWqK8q2Mt2PorMBLnzRAA4bMjcWEyqhTio,64506
210
210
  datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-OPSNGU,21237
211
211
  datahub/ingestion/source/nifi.py,sha256=w5TPnqPmpotvzSsJROi6nUiHWPUVC6u1g0CzXIE6FNs,56903
@@ -333,7 +333,7 @@ datahub/ingestion/source/hex/hex.py,sha256=DPpsi5e-sdUgbS0Okyvx1mvc00Adu47zA65oF
333
333
  datahub/ingestion/source/hex/mapper.py,sha256=6dsGvvhPAOAbAG1ayxLwipgJGt1q7YanWYfMX3rZeiM,12603
334
334
  datahub/ingestion/source/hex/model.py,sha256=hmMfOLEGZcKjwy2DW29OPf_9_Q_TesgnUTCen2br_fA,1471
335
335
  datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
336
- datahub/ingestion/source/iceberg/iceberg.py,sha256=i9o0ia2vQUGqoagN7GgsoaUlhjj9xKBNP-3ia2cMgHY,30762
336
+ datahub/ingestion/source/iceberg/iceberg.py,sha256=PhLLXWgBdfZ3hL7LgLvDr6aTK-QKmiZCFNz5jD-mxZM,30773
337
337
  datahub/ingestion/source/iceberg/iceberg_common.py,sha256=VGosqYPmn_j6GETSnDHZ8Ay1BVOedmx2x5LHxw16I3A,12278
338
338
  datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=9iwp2vpQTi4OMbIKoDZV5lAdvjMR0ls6Llpck9grJIE,9875
339
339
  datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -378,7 +378,7 @@ datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
378
378
  datahub/ingestion/source/powerbi/config.py,sha256=5rG62dspGF9jIo8l6HLpB6ECv5n-t1un2ZyGiisD784,24219
379
379
  datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=-njW1kJOy-LY5JFwJLhVQ0bMBj9NQz5TZhQqsSi_KsM,2285
380
380
  datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=5df3qvalCS9hZ46DPXs6XDcw9-IofGf8Eol_rUC7LHI,20329
381
- datahub/ingestion/source/powerbi/powerbi.py,sha256=a5itVuGmg-0xAQK5a-cXB5UxpR3rLJx0o2x_lz-8ox8,55955
381
+ datahub/ingestion/source/powerbi/powerbi.py,sha256=f0vwzVdKAU7Qp7gRHgTOPq45ThUmsbFXCwcIDaS1S34,56464
382
382
  datahub/ingestion/source/powerbi/m_query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
383
383
  datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=yDi0C13ko2dVxdLJBYvUuGbT4Q2hxQRse3sL7Ul1ZU0,2050
384
384
  datahub/ingestion/source/powerbi/m_query/native_sql_parser.py,sha256=zzKVDGeUM3Yv3-zNah4D6mSnr6jXsstNuLmzczcPQEE,3683
@@ -488,7 +488,7 @@ datahub/ingestion/source/sql/sql_utils.py,sha256=q-Bsk6WxlsRtrw9RXBxvqI3zuaMTC_F
488
488
  datahub/ingestion/source/sql/sqlalchemy_data_reader.py,sha256=FvHZ4JEK3aR2DYOBZiT_ZsAy12RjTu4t_KIR_92B11k,2644
489
489
  datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py,sha256=KOpbmDIE2h1hyYEsbVHJi2B7FlsyUMTXZx4diyzltQg,1826
490
490
  datahub/ingestion/source/sql/teradata.py,sha256=9WdrxDy02lRJi9IZgsAATFsmxcQnIw5Gr6yCqHJQy5k,33507
491
- datahub/ingestion/source/sql/trino.py,sha256=8viVOu67mhDnsO3LuPSRi1WDR5MLdOXu7HOo1vtHVmo,17882
491
+ datahub/ingestion/source/sql/trino.py,sha256=gSLDyETKavSVR8l9wdebrfoc41cqAWz6ApqIicW0BF8,17892
492
492
  datahub/ingestion/source/sql/two_tier_sql_source.py,sha256=YDrGBb5WKVls6qv17QU5foKrf71SydzEltc3WsVAhQc,5732
493
493
  datahub/ingestion/source/sql/vertica.py,sha256=_9OgSgIgqBml0av063rb8nACiT3SAmzpw0ouyF91wv8,33382
494
494
  datahub/ingestion/source/sql/mssql/__init__.py,sha256=1agpl8S_uDW40olkhCX_W19dbr5GO9qgjS3R7pLRZSk,87
@@ -504,7 +504,7 @@ datahub/ingestion/source/state/profiling_state.py,sha256=lsWu7oZhB9nSlqoklvjs-Lj
504
504
  datahub/ingestion/source/state/profiling_state_handler.py,sha256=jDMiIrAq8k4GrYoh9Ymh0ZAmzejYFk8E1W7-kuw6lXg,4295
505
505
  datahub/ingestion/source/state/redundant_run_skip_handler.py,sha256=h28twxcsMNvI74bUjAKleRYid8kfIyWS7Y11aBldDlY,9435
506
506
  datahub/ingestion/source/state/sql_common_state.py,sha256=OtJpJfMTBSgyR37dn3w-nnZwlc0nFNb2GoUzIWhnyAc,143
507
- datahub/ingestion/source/state/stale_entity_removal_handler.py,sha256=-KZjprFEO2tFtt2j236uRV1GVZEL5Q7Mt7TCZWfcxs8,14921
507
+ datahub/ingestion/source/state/stale_entity_removal_handler.py,sha256=Lr2HYGx_b2FQ8A36s7s11tl-4-mGIM13bfy5JbQ3LtM,14890
508
508
  datahub/ingestion/source/state/stateful_ingestion_base.py,sha256=N0Qzp2t2qAf27WndhsvGbCYCd29dSrLY3TSfbO0hoKA,17369
509
509
  datahub/ingestion/source/state/usage_common_state.py,sha256=TJyb0CpwibsduJYI854EFdtrwWnz7JC-IkzKUXVGDx0,983
510
510
  datahub/ingestion/source/state/use_case_handler.py,sha256=3g8ddTvGXHe0dCiyTkyFeNmR8a3bhwywtIt8EpK5oQs,1271
@@ -535,7 +535,7 @@ datahub/ingestion/source/usage/clickhouse_usage.py,sha256=jJ-EUJdS7t4d9RVjLWQQ2e
535
535
  datahub/ingestion/source/usage/starburst_trino_usage.py,sha256=O3EDEZsXPNdsKGD-jStREA8e4-iTlnqd3ocqtAYFKNA,10544
536
536
  datahub/ingestion/source/usage/usage_common.py,sha256=uuCgIduhlRL2zIAN8rymZ5cZn1WF6akZ-ZbbaVYo9_w,9813
537
537
  datahub/ingestion/source/vertexai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
538
- datahub/ingestion/source/vertexai/vertexai.py,sha256=TXTa-Cm1C7xkbTNXNmKr_hi2FDJ9VG-ahrxk9yuxaTg,43635
538
+ datahub/ingestion/source/vertexai/vertexai.py,sha256=ipq2Zb2lXTZkUg9r78kvuIIhX7mc-5hr-o83395IWpo,43589
539
539
  datahub/ingestion/source/vertexai/vertexai_config.py,sha256=uMnsv3b6TsPRH26u_JE_v1u0db7ANEAFlVxU5A6ELRM,989
540
540
  datahub/ingestion/source/vertexai/vertexai_result_type_utils.py,sha256=fE2l_xXvKONqb4jabl4LtKRBZDnP3koMLJV520wEAMg,2555
541
541
  datahub/ingestion/source_config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -970,7 +970,7 @@ datahub/utilities/file_backed_collections.py,sha256=zW-Xy6zO1E6R-FRJKjlimMKr-emW
970
970
  datahub/utilities/global_warning_util.py,sha256=adrEl3WhetQ-bymrPINjd976ZFndhbvk3QosUYGsos8,261
971
971
  datahub/utilities/groupby.py,sha256=pe6rP4ZCttYB98yjbs0Aey8C32aLb7rq-NJ_BFky0H4,524
972
972
  datahub/utilities/hive_schema_to_avro.py,sha256=1MP0a6FFVEYxLg_4lKF7hPxbHJJy0uRQYkML5zRwV3Q,11622
973
- datahub/utilities/ingest_utils.py,sha256=znIuvFkCdOAOg1dkF-mJn03A2YYFPHlDPZsfCPxKkaQ,3117
973
+ datahub/utilities/ingest_utils.py,sha256=OgETzX_9g6DcYlxGH0_xgAmAlWLSROr25ydDL-mBhKM,3137
974
974
  datahub/utilities/is_pytest.py,sha256=2m9T4S9IIKhI5RfTqrB2ZmumzHocdxBHpM1HroWj2XQ,138
975
975
  datahub/utilities/logging_manager.py,sha256=bc-x5VZGvFUHT0HD-TF3Uz_nzw3dpKdJSbz6kjpAqAQ,10073
976
976
  datahub/utilities/lossy_collections.py,sha256=5rdtfK2pjwvOrrzLf_KGFOMiVvLLmoXj5EVQXTFSR3E,5704
@@ -1043,8 +1043,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1043
1043
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1044
1044
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1045
1045
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1046
- acryl_datahub-1.0.0.1rc7.dist-info/METADATA,sha256=QkeMAnAXXez9FFTnJVpxhOJMNDadF0gfVQ3uz3Fh4i8,176849
1047
- acryl_datahub-1.0.0.1rc7.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
1048
- acryl_datahub-1.0.0.1rc7.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1049
- acryl_datahub-1.0.0.1rc7.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1050
- acryl_datahub-1.0.0.1rc7.dist-info/RECORD,,
1046
+ acryl_datahub-1.0.0.2rc1.dist-info/METADATA,sha256=IE26ZK9HREmhmiMf2zQds-JatSIyAh9gcaVjGyOAGLE,176849
1047
+ acryl_datahub-1.0.0.2rc1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
1048
+ acryl_datahub-1.0.0.2rc1.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1049
+ acryl_datahub-1.0.0.2rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1050
+ acryl_datahub-1.0.0.2rc1.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.0.0.1rc7"
3
+ __version__ = "1.0.0.2rc1"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
datahub/cli/ingest_cli.py CHANGED
@@ -216,9 +216,9 @@ def run(
216
216
  @click.option(
217
217
  "--executor-id",
218
218
  type=str,
219
- default="default",
220
219
  help="Executor id to route execution requests to. Do not use this unless you have configured a custom executor.",
221
220
  required=False,
221
+ default=None,
222
222
  )
223
223
  @click.option(
224
224
  "--cli-version",
@@ -239,7 +239,7 @@ def run(
239
239
  type=str,
240
240
  help="Timezone for the schedule in 'America/New_York' format. Uses UTC by default.",
241
241
  required=False,
242
- default="UTC",
242
+ default=None,
243
243
  )
244
244
  @click.option(
245
245
  "--debug", type=bool, help="Should we debug.", required=False, default=False
@@ -255,10 +255,10 @@ def deploy(
255
255
  name: Optional[str],
256
256
  config: str,
257
257
  urn: Optional[str],
258
- executor_id: str,
258
+ executor_id: Optional[str],
259
259
  cli_version: Optional[str],
260
260
  schedule: Optional[str],
261
- time_zone: str,
261
+ time_zone: Optional[str],
262
262
  extra_pip: Optional[str],
263
263
  debug: bool = False,
264
264
  ) -> None:
@@ -137,6 +137,10 @@ class ProjectIdKey(ContainerKey):
137
137
  project_id: str
138
138
 
139
139
 
140
+ class ExperimentKey(ContainerKey):
141
+ id: str
142
+
143
+
140
144
  class MetastoreKey(ContainerKey):
141
145
  metastore: str
142
146
 
@@ -27,6 +27,7 @@ from pydantic import BaseModel
27
27
  from requests.models import HTTPError
28
28
  from typing_extensions import deprecated
29
29
 
30
+ from datahub._codegen.aspect import _Aspect
30
31
  from datahub.cli import config_utils
31
32
  from datahub.configuration.common import ConfigModel, GraphError, OperationalError
32
33
  from datahub.emitter.aspect import TIMESERIES_ASPECT_MAP
@@ -1697,6 +1698,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1697
1698
 
1698
1699
  return res["runAssertionsForAsset"]
1699
1700
 
1701
+ @deprecated("Use get_entities instead which returns typed aspects")
1700
1702
  def get_entities_v2(
1701
1703
  self,
1702
1704
  entity_name: str,
@@ -1736,6 +1738,108 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1736
1738
  retval[entity_urn][aspect_key] = aspect_value
1737
1739
  return retval
1738
1740
 
1741
+ def get_entities(
1742
+ self,
1743
+ entity_name: str,
1744
+ urns: List[str],
1745
+ aspects: Optional[List[str]] = None,
1746
+ with_system_metadata: bool = False,
1747
+ ) -> Dict[str, Dict[str, Tuple[_Aspect, Optional[SystemMetadataClass]]]]:
1748
+ """
1749
+ Get entities using the OpenAPI v3 endpoint, deserializing aspects into typed objects.
1750
+
1751
+ Args:
1752
+ entity_name: The entity type name
1753
+ urns: List of entity URNs to fetch
1754
+ aspects: Optional list of aspect names to fetch. If None, all aspects will be fetched.
1755
+ with_system_metadata: If True, return system metadata along with each aspect.
1756
+
1757
+ Returns:
1758
+ A dictionary mapping URNs to a dictionary of aspect name to tuples of
1759
+ (typed aspect object, system metadata). If with_system_metadata is False,
1760
+ the system metadata in the tuple will be None.
1761
+ """
1762
+ aspects = aspects or []
1763
+
1764
+ request_payload = []
1765
+ for urn in urns:
1766
+ entity_request: Dict[str, Any] = {"urn": urn}
1767
+ for aspect_name in aspects:
1768
+ entity_request[aspect_name] = {}
1769
+ request_payload.append(entity_request)
1770
+
1771
+ headers: Dict[str, Any] = {
1772
+ "Accept": "application/json",
1773
+ "Content-Type": "application/json",
1774
+ }
1775
+
1776
+ url = f"{self.config.server}/openapi/v3/entity/{entity_name}/batchGet"
1777
+ if with_system_metadata:
1778
+ url += "?systemMetadata=true"
1779
+
1780
+ response = self._session.post(
1781
+ url, data=json.dumps(request_payload), headers=headers
1782
+ )
1783
+ response.raise_for_status()
1784
+ entities = response.json()
1785
+
1786
+ result: Dict[str, Dict[str, Tuple[_Aspect, Optional[SystemMetadataClass]]]] = {}
1787
+
1788
+ for entity in entities:
1789
+ entity_urn = entity.get("urn")
1790
+ if entity_urn is None:
1791
+ logger.warning(
1792
+ f"Missing URN in entity response: {entity}, skipping deserialization"
1793
+ )
1794
+ continue
1795
+
1796
+ entity_aspects: Dict[
1797
+ str, Tuple[_Aspect, Optional[SystemMetadataClass]]
1798
+ ] = {}
1799
+
1800
+ for aspect_name, aspect_obj in entity.items():
1801
+ if aspect_name == "urn":
1802
+ continue
1803
+
1804
+ aspect_class = ASPECT_NAME_MAP.get(aspect_name)
1805
+ if aspect_class is None:
1806
+ logger.warning(
1807
+ f"Unknown aspect type {aspect_name}, skipping deserialization"
1808
+ )
1809
+ continue
1810
+
1811
+ aspect_value = aspect_obj.get("value")
1812
+ if aspect_value is None:
1813
+ logger.warning(
1814
+ f"Unknown aspect value for aspect {aspect_name}, skipping deserialization"
1815
+ )
1816
+ continue
1817
+
1818
+ try:
1819
+ post_json_obj = post_json_transform(aspect_value)
1820
+ typed_aspect = aspect_class.from_obj(post_json_obj)
1821
+ assert isinstance(typed_aspect, aspect_class) and isinstance(
1822
+ typed_aspect, _Aspect
1823
+ )
1824
+
1825
+ system_metadata = None
1826
+ if with_system_metadata:
1827
+ system_metadata_obj = aspect_obj.get("systemMetadata")
1828
+ if system_metadata_obj:
1829
+ system_metadata = SystemMetadataClass.from_obj(
1830
+ system_metadata_obj
1831
+ )
1832
+
1833
+ entity_aspects[aspect_name] = (typed_aspect, system_metadata)
1834
+ except Exception as e:
1835
+ logger.error(f"Error deserializing aspect {aspect_name}: {e}")
1836
+ raise
1837
+
1838
+ if entity_aspects:
1839
+ result[entity_urn] = entity_aspects
1840
+
1841
+ return result
1842
+
1739
1843
  def upsert_custom_assertion(
1740
1844
  self,
1741
1845
  urn: Optional[str],
@@ -425,23 +425,21 @@ class IcebergSource(StatefulIngestionSourceBase):
425
425
  def _get_dataset_properties_aspect(
426
426
  self, dataset_name: str, table: Table
427
427
  ) -> DatasetPropertiesClass:
428
- additional_properties = {}
428
+ created: Optional[TimeStampClass] = None
429
429
  custom_properties = table.metadata.properties.copy()
430
430
  custom_properties["location"] = table.metadata.location
431
431
  custom_properties["format-version"] = str(table.metadata.format_version)
432
432
  custom_properties["partition-spec"] = str(self._get_partition_aspect(table))
433
+ last_modified: Optional[int] = table.metadata.last_updated_ms
433
434
  if table.current_snapshot():
434
435
  custom_properties["snapshot-id"] = str(table.current_snapshot().snapshot_id)
435
436
  custom_properties["manifest-list"] = table.current_snapshot().manifest_list
436
- additional_properties["lastModified"] = TimeStampClass(
437
- int(table.current_snapshot().timestamp_ms)
438
- )
437
+ if not last_modified:
438
+ last_modified = int(table.current_snapshot().timestamp_ms)
439
439
  if "created-at" in custom_properties:
440
440
  try:
441
441
  dt = dateutil_parser.isoparse(custom_properties["created-at"])
442
- additional_properties["created"] = TimeStampClass(
443
- int(dt.timestamp() * 1000)
444
- )
442
+ created = TimeStampClass(int(dt.timestamp() * 1000))
445
443
  except Exception as ex:
446
444
  LOGGER.warning(
447
445
  f"Exception while trying to parse creation date {custom_properties['created-at']}, ignoring: {ex}"
@@ -451,8 +449,10 @@ class IcebergSource(StatefulIngestionSourceBase):
451
449
  name=table.name()[-1],
452
450
  description=table.metadata.properties.get("comment", None),
453
451
  customProperties=custom_properties,
454
- lastModified=additional_properties.get("lastModified"),
455
- created=additional_properties.get("created"),
452
+ lastModified=TimeStampClass(last_modified)
453
+ if last_modified is not None
454
+ else None,
455
+ created=created,
456
456
  qualifiedName=dataset_name,
457
457
  )
458
458
 
@@ -16,7 +16,7 @@ from datahub.api.entities.dataprocess.dataprocess_instance import (
16
16
  )
17
17
  from datahub.configuration.source_common import EnvConfigMixin
18
18
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
19
- from datahub.emitter.mcp_builder import ContainerKey
19
+ from datahub.emitter.mcp_builder import ExperimentKey
20
20
  from datahub.ingestion.api.common import PipelineContext
21
21
  from datahub.ingestion.api.decorators import (
22
22
  SupportStatus,
@@ -77,10 +77,6 @@ from datahub.sdk.dataset import Dataset
77
77
  T = TypeVar("T")
78
78
 
79
79
 
80
- class ContainerKeyWithId(ContainerKey):
81
- id: str
82
-
83
-
84
80
  class MLflowConfig(StatefulIngestionConfigBase, EnvConfigMixin):
85
81
  tracking_uri: Optional[str] = Field(
86
82
  default=None,
@@ -252,7 +248,7 @@ class MLflowSource(StatefulIngestionSourceBase):
252
248
  self, experiment: Experiment
253
249
  ) -> Iterable[MetadataWorkUnit]:
254
250
  experiment_container = Container(
255
- container_key=ContainerKeyWithId(
251
+ container_key=ExperimentKey(
256
252
  platform=str(DataPlatformUrn(platform_name=self.platform)),
257
253
  id=experiment.name,
258
254
  ),
@@ -470,7 +466,7 @@ class MLflowSource(StatefulIngestionSourceBase):
470
466
  def _get_run_workunits(
471
467
  self, experiment: Experiment, run: Run
472
468
  ) -> Iterable[MetadataWorkUnit]:
473
- experiment_key = ContainerKeyWithId(
469
+ experiment_key = ExperimentKey(
474
470
  platform=str(DataPlatformUrn(self.platform)), id=experiment.name
475
471
  )
476
472
 
@@ -94,7 +94,7 @@ from datahub.metadata.schema_classes import (
94
94
  UpstreamLineageClass,
95
95
  ViewPropertiesClass,
96
96
  )
97
- from datahub.metadata.urns import ChartUrn
97
+ from datahub.metadata.urns import ChartUrn, DatasetUrn
98
98
  from datahub.sql_parsing.sqlglot_lineage import ColumnLineageInfo
99
99
  from datahub.utilities.dedup_list import deduplicate_list
100
100
  from datahub.utilities.urns.urn_iter import lowercase_dataset_urn
@@ -1083,6 +1083,7 @@ class Mapper:
1083
1083
  report: powerbi_data_classes.Report,
1084
1084
  chart_mcps: List[MetadataChangeProposalWrapper],
1085
1085
  user_mcps: List[MetadataChangeProposalWrapper],
1086
+ dataset_edges: List[EdgeClass],
1086
1087
  ) -> List[MetadataChangeProposalWrapper]:
1087
1088
  """
1088
1089
  Map PowerBi report to Datahub dashboard
@@ -1104,6 +1105,7 @@ class Mapper:
1104
1105
  charts=chart_urn_list,
1105
1106
  lastModified=ChangeAuditStamps(),
1106
1107
  dashboardUrl=report.webUrl,
1108
+ datasetEdges=dataset_edges,
1107
1109
  )
1108
1110
 
1109
1111
  info_mcp = self.new_mcp(
@@ -1197,12 +1199,23 @@ class Mapper:
1197
1199
  ds_mcps = self.to_datahub_dataset(report.dataset, workspace)
1198
1200
  chart_mcps = self.pages_to_chart(report.pages, workspace, ds_mcps)
1199
1201
 
1202
+ # collect all upstream datasets; using a set to retain unique urns
1203
+ dataset_urns = {
1204
+ dataset.entityUrn
1205
+ for dataset in ds_mcps
1206
+ if dataset.entityType == DatasetUrn.ENTITY_TYPE and dataset.entityUrn
1207
+ }
1208
+ dataset_edges = [
1209
+ EdgeClass(destinationUrn=dataset_urn) for dataset_urn in dataset_urns
1210
+ ]
1211
+
1200
1212
  # Let's convert report to datahub dashboard
1201
1213
  report_mcps = self.report_to_dashboard(
1202
1214
  workspace=workspace,
1203
1215
  report=report,
1204
1216
  chart_mcps=chart_mcps,
1205
1217
  user_mcps=user_mcps,
1218
+ dataset_edges=dataset_edges,
1206
1219
  )
1207
1220
 
1208
1221
  # Now add MCPs in sequence
@@ -128,9 +128,10 @@ def get_table_comment(self, connection, table_name: str, schema: str = None, **k
128
128
  if catalog_name is None:
129
129
  raise exc.NoSuchTableError("catalog is required in connection")
130
130
  connector_name = get_catalog_connector_name(connection.engine, catalog_name)
131
- if connector_name is None:
132
- return {}
133
- if connector_name in PROPERTIES_TABLE_SUPPORTED_CONNECTORS:
131
+ if (
132
+ connector_name is not None
133
+ and connector_name in PROPERTIES_TABLE_SUPPORTED_CONNECTORS
134
+ ):
134
135
  properties_table = self._get_full_table(f"{table_name}$properties", schema)
135
136
  query = f"SELECT * FROM {properties_table}"
136
137
  row = connection.execute(sql.text(query)).fetchone()
@@ -45,7 +45,6 @@ class StatefulStaleMetadataRemovalConfig(StatefulIngestionConfig):
45
45
  description="Prevents large amount of soft deletes & the state from committing from accidental changes to the source configuration if the relative change percent in entities compared to the previous state is above the 'fail_safe_threshold'.",
46
46
  le=100.0,
47
47
  ge=0.0,
48
- hidden_from_docs=True,
49
48
  )
50
49
 
51
50
 
@@ -22,7 +22,11 @@ from google.oauth2 import service_account
22
22
 
23
23
  import datahub.emitter.mce_builder as builder
24
24
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
25
- from datahub.emitter.mcp_builder import ContainerKey, ProjectIdKey, gen_containers
25
+ from datahub.emitter.mcp_builder import (
26
+ ExperimentKey,
27
+ ProjectIdKey,
28
+ gen_containers,
29
+ )
26
30
  from datahub.ingestion.api.common import PipelineContext
27
31
  from datahub.ingestion.api.decorators import (
28
32
  SupportStatus,
@@ -96,10 +100,6 @@ class ModelMetadata:
96
100
  endpoints: Optional[List[Endpoint]] = None
97
101
 
98
102
 
99
- class ContainerKeyWithId(ContainerKey):
100
- id: str
101
-
102
-
103
103
  @platform_name("Vertex AI", id="vertexai")
104
104
  @config_class(VertexAIConfig)
105
105
  @support_status(SupportStatus.TESTING)
@@ -173,7 +173,7 @@ class VertexAISource(Source):
173
173
  ) -> Iterable[MetadataWorkUnit]:
174
174
  yield from gen_containers(
175
175
  parent_container_key=self._get_project_container(),
176
- container_key=ContainerKeyWithId(
176
+ container_key=ExperimentKey(
177
177
  platform=self.platform,
178
178
  id=self._make_vertexai_experiment_name(experiment.name),
179
179
  ),
@@ -309,7 +309,7 @@ class VertexAISource(Source):
309
309
  def _gen_experiment_run_mcps(
310
310
  self, experiment: Experiment, run: ExperimentRun
311
311
  ) -> Iterable[MetadataChangeProposalWrapper]:
312
- experiment_key = ContainerKeyWithId(
312
+ experiment_key = ExperimentKey(
313
313
  platform=self.platform,
314
314
  id=self._make_vertexai_experiment_name(experiment.name),
315
315
  )
@@ -32,10 +32,10 @@ def deploy_source_vars(
32
32
  name: Optional[str],
33
33
  config: str,
34
34
  urn: Optional[str],
35
- executor_id: str,
35
+ executor_id: Optional[str],
36
36
  cli_version: Optional[str],
37
37
  schedule: Optional[str],
38
- time_zone: str,
38
+ time_zone: Optional[str],
39
39
  extra_pip: Optional[str],
40
40
  debug: bool = False,
41
41
  ) -> dict: