acryl-datahub 1.0.0.1rc7__py3-none-any.whl → 1.0.0.2rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,7 +1,7 @@
1
- acryl_datahub-1.0.0.1rc7.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.0.0.2rc2.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=AZj-rwp4edRcZvS9Mq4fxTeV64QHFW-6zysNAtjc2qg,323
4
+ datahub/_version.py,sha256=lFv-ImaIXKL_EDY2GlHJHg9iVkj13C_xihZRNnxH3M8,323
5
5
  datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
6
6
  datahub/errors.py,sha256=BzKdcmYseHOt36zfjJXc17WNutFhp9Y23cU_L6cIkxc,612
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -72,7 +72,7 @@ datahub/cli/env_utils.py,sha256=RQzjg4JE29hjPt4v7p-RuqoOr99w8E3DBHWiN2Sm7T4,252
72
72
  datahub/cli/exists_cli.py,sha256=IsuU86R-g7BJjAl1vULH6d-BWJHAKa4XHLZl5WxGUEM,1233
73
73
  datahub/cli/get_cli.py,sha256=VV80BCXfZ0-C8fr2k43SIuN9DB-fOYP9StWsTHnXwFw,2327
74
74
  datahub/cli/iceberg_cli.py,sha256=-XT3wpkr8b-HFMafYk7lSon3Lys6XjTQA8U1b698ByM,23003
75
- datahub/cli/ingest_cli.py,sha256=Xvdb0v-r_8rhlmZo161E043BMbLuyvNq1LLJWHdlXT0,20526
75
+ datahub/cli/ingest_cli.py,sha256=Welutg0LOjuEiBOnQdNTnEZFDyKZiiWxqyQK-Go8dL8,20540
76
76
  datahub/cli/json_file.py,sha256=nWo-VVthaaW4Do1eUqgrzk0fShb29MjiKXvZVOTq76c,943
77
77
  datahub/cli/lite_cli.py,sha256=XKMejSuYUToKBvgN3YmmnxjRcaG5WPw23gJuQK8pgRc,13099
78
78
  datahub/cli/migrate.py,sha256=3orGfLNsdh1Q7gkPaCaf2bBWM5b3Ih4fGFw3poe0wiA,17937
@@ -122,7 +122,7 @@ datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvVi
122
122
  datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
123
123
  datahub/emitter/mce_builder.py,sha256=i-iLLdnuy7h1JrzwC2sCtQthbso-cNj1uijOQZKHbeA,16717
124
124
  datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
125
- datahub/emitter/mcp_builder.py,sha256=JyAC8obvkf6ZpINJ8I2p-Ofr52-tuoQBDbxp-bhjyrM,11871
125
+ datahub/emitter/mcp_builder.py,sha256=8IwJAlolQkPpMqQJPLtGrsUqAcuFNs98nrI5iYUxgaU,11920
126
126
  datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
127
127
  datahub/emitter/request_helper.py,sha256=HpI9a9W0TzoVbrs584rF8P8w-IT_iKLmvYmO_6IHhXs,1008
128
128
  datahub/emitter/response_helper.py,sha256=h2hrZYiv4xfauD_lHPW_fN_AV8KhWNM4CVd-Lat2vT0,4608
@@ -171,7 +171,7 @@ datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGo
171
171
  datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
172
172
  datahub/ingestion/glossary/datahub_classifier.py,sha256=O7wm6gQT1Jf2QSKdWjJQbS5oSzJwplXzfza26Gdq5Mg,7555
173
173
  datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
174
- datahub/ingestion/graph/client.py,sha256=Qtjf5YrQeQzcTb0qxr6-y4MSEKSJm8f0hO6BoeRA_yI,65916
174
+ datahub/ingestion/graph/client.py,sha256=DUOy3fzwUMT0wJ2GAcLiYa5bzelwfZjBmzhbm95IIys,69918
175
175
  datahub/ingestion/graph/config.py,sha256=_oha8Je7P80ZmrkZUAaRHyYbdMmTkMI5JkYjEP2Ri1Q,751
176
176
  datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
177
177
  datahub/ingestion/graph/entity_versioning.py,sha256=nrcNz0Qm6kpE6oTu_mrYUQDx14KPspBTc6R9SyFUY6c,6901
@@ -205,7 +205,7 @@ datahub/ingestion/source/ge_profiling_config.py,sha256=FlWfXoVoayabVXNMB9qETEU0G
205
205
  datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suabri1yV5geaAPjzg2eORDc,2559
206
206
  datahub/ingestion/source/ldap.py,sha256=CNr3foofIpoCXu_GGqfcajlQE2qkHr5isYwVcDutdkk,18695
207
207
  datahub/ingestion/source/metabase.py,sha256=j8DRV2GvisezidL1JZ5HJLF_hdFdtvaoyDoEdEyh0Ks,32603
208
- datahub/ingestion/source/mlflow.py,sha256=pWhBEIXxI19-yTFeZU72Pgc3uY9nK0-w2TEpG803HNI,32485
208
+ datahub/ingestion/source/mlflow.py,sha256=6uN1fjyubs9rjAsdtkSRMKf7h3_89UvFfWDqkgdvPdY,32422
209
209
  datahub/ingestion/source/mode.py,sha256=20vWflnRIMWqK8q2Mt2PorMBLnzRAA4bMjcWEyqhTio,64506
210
210
  datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-OPSNGU,21237
211
211
  datahub/ingestion/source/nifi.py,sha256=w5TPnqPmpotvzSsJROi6nUiHWPUVC6u1g0CzXIE6FNs,56903
@@ -217,7 +217,7 @@ datahub/ingestion/source/redash.py,sha256=YxjSad-X_wPmxYH8dJmFz_VCFhiLTCTSlK99Wd
217
217
  datahub/ingestion/source/salesforce.py,sha256=CQtDFv1OsbC1vyzNbKOc6GxhFQ5GdYj45hgAF0-oIcw,40487
218
218
  datahub/ingestion/source/source_registry.py,sha256=a2mLjJPLkSI-gYCTb_7U7Jo4D8jGknNQ_yScPIihXFk,1208
219
219
  datahub/ingestion/source/sql_queries.py,sha256=Ip7UZub7fgMh7P5jL_zJPY7lSkc9GGTy8GJ8lqZrcsE,9502
220
- datahub/ingestion/source/superset.py,sha256=FRZ7cCURW6NHUOKaFicdAZq2caXektvO9rJE4tO9scU,40336
220
+ datahub/ingestion/source/superset.py,sha256=bMfvm9HgUoS3T7BjHsDrrOodc8iBRrJRQYv2D66bABo,41194
221
221
  datahub/ingestion/source/abs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
222
222
  datahub/ingestion/source/abs/config.py,sha256=mBQe0JTaP-Rcv4HnMUUySoYbSr4r3jDEMioxaXHnxXU,6709
223
223
  datahub/ingestion/source/abs/datalake_profiler_config.py,sha256=Rkf64evufyVGPiE4VK8QAjzBiJFu85tOGMmJ0lJZ2Og,3600
@@ -328,12 +328,13 @@ datahub/ingestion/source/grafana/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
328
328
  datahub/ingestion/source/grafana/grafana_source.py,sha256=3pU3xodPgS5lmnjuQ_u7F0XPzD_Y8MnPlMxRJ86qz4g,4960
329
329
  datahub/ingestion/source/hex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
330
330
  datahub/ingestion/source/hex/api.py,sha256=JfFPD8O4z16fwZE_BdX5aCQztEq-tbzxJJ7aofH4DE4,12274
331
- datahub/ingestion/source/hex/constants.py,sha256=NuBjxgJpIt598Cyn_9IcZ158PqBdn5vNjw8T92sTQck,115
332
- datahub/ingestion/source/hex/hex.py,sha256=DPpsi5e-sdUgbS0Okyvx1mvc00Adu47zA65oFnRP74A,6510
333
- datahub/ingestion/source/hex/mapper.py,sha256=6dsGvvhPAOAbAG1ayxLwipgJGt1q7YanWYfMX3rZeiM,12603
334
- datahub/ingestion/source/hex/model.py,sha256=hmMfOLEGZcKjwy2DW29OPf_9_Q_TesgnUTCen2br_fA,1471
331
+ datahub/ingestion/source/hex/constants.py,sha256=8hUTMWyG5keTNfXoLu_Dh413Hw_mGGJX1atiiDZyKtg,271
332
+ datahub/ingestion/source/hex/hex.py,sha256=PIRl8fPkKtlHV7cqR4H8RKVYdTLgEFXHFzc3QAqJLhE,12733
333
+ datahub/ingestion/source/hex/mapper.py,sha256=N3mTlEcrOmhv9ia1dnHGFgFJD2ddyTtU3H5IUbb-UxU,13344
334
+ datahub/ingestion/source/hex/model.py,sha256=S9bUhfFcjzuio2dBS6HzSyRVPiSJvRvMQ0qyVrjV5-E,1766
335
+ datahub/ingestion/source/hex/query_fetcher.py,sha256=5r065vL7XohcgZ_fj-1h6o8cxrPin37IeYsC99GU6LA,12287
335
336
  datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
336
- datahub/ingestion/source/iceberg/iceberg.py,sha256=i9o0ia2vQUGqoagN7GgsoaUlhjj9xKBNP-3ia2cMgHY,30762
337
+ datahub/ingestion/source/iceberg/iceberg.py,sha256=PhLLXWgBdfZ3hL7LgLvDr6aTK-QKmiZCFNz5jD-mxZM,30773
337
338
  datahub/ingestion/source/iceberg/iceberg_common.py,sha256=VGosqYPmn_j6GETSnDHZ8Ay1BVOedmx2x5LHxw16I3A,12278
338
339
  datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=9iwp2vpQTi4OMbIKoDZV5lAdvjMR0ls6Llpck9grJIE,9875
339
340
  datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -378,7 +379,7 @@ datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
378
379
  datahub/ingestion/source/powerbi/config.py,sha256=5rG62dspGF9jIo8l6HLpB6ECv5n-t1un2ZyGiisD784,24219
379
380
  datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=-njW1kJOy-LY5JFwJLhVQ0bMBj9NQz5TZhQqsSi_KsM,2285
380
381
  datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=5df3qvalCS9hZ46DPXs6XDcw9-IofGf8Eol_rUC7LHI,20329
381
- datahub/ingestion/source/powerbi/powerbi.py,sha256=a5itVuGmg-0xAQK5a-cXB5UxpR3rLJx0o2x_lz-8ox8,55955
382
+ datahub/ingestion/source/powerbi/powerbi.py,sha256=f0vwzVdKAU7Qp7gRHgTOPq45ThUmsbFXCwcIDaS1S34,56464
382
383
  datahub/ingestion/source/powerbi/m_query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
383
384
  datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=yDi0C13ko2dVxdLJBYvUuGbT4Q2hxQRse3sL7Ul1ZU0,2050
384
385
  datahub/ingestion/source/powerbi/m_query/native_sql_parser.py,sha256=zzKVDGeUM3Yv3-zNah4D6mSnr6jXsstNuLmzczcPQEE,3683
@@ -488,7 +489,7 @@ datahub/ingestion/source/sql/sql_utils.py,sha256=q-Bsk6WxlsRtrw9RXBxvqI3zuaMTC_F
488
489
  datahub/ingestion/source/sql/sqlalchemy_data_reader.py,sha256=FvHZ4JEK3aR2DYOBZiT_ZsAy12RjTu4t_KIR_92B11k,2644
489
490
  datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py,sha256=KOpbmDIE2h1hyYEsbVHJi2B7FlsyUMTXZx4diyzltQg,1826
490
491
  datahub/ingestion/source/sql/teradata.py,sha256=9WdrxDy02lRJi9IZgsAATFsmxcQnIw5Gr6yCqHJQy5k,33507
491
- datahub/ingestion/source/sql/trino.py,sha256=8viVOu67mhDnsO3LuPSRi1WDR5MLdOXu7HOo1vtHVmo,17882
492
+ datahub/ingestion/source/sql/trino.py,sha256=gSLDyETKavSVR8l9wdebrfoc41cqAWz6ApqIicW0BF8,17892
492
493
  datahub/ingestion/source/sql/two_tier_sql_source.py,sha256=YDrGBb5WKVls6qv17QU5foKrf71SydzEltc3WsVAhQc,5732
493
494
  datahub/ingestion/source/sql/vertica.py,sha256=_9OgSgIgqBml0av063rb8nACiT3SAmzpw0ouyF91wv8,33382
494
495
  datahub/ingestion/source/sql/mssql/__init__.py,sha256=1agpl8S_uDW40olkhCX_W19dbr5GO9qgjS3R7pLRZSk,87
@@ -504,7 +505,7 @@ datahub/ingestion/source/state/profiling_state.py,sha256=lsWu7oZhB9nSlqoklvjs-Lj
504
505
  datahub/ingestion/source/state/profiling_state_handler.py,sha256=jDMiIrAq8k4GrYoh9Ymh0ZAmzejYFk8E1W7-kuw6lXg,4295
505
506
  datahub/ingestion/source/state/redundant_run_skip_handler.py,sha256=h28twxcsMNvI74bUjAKleRYid8kfIyWS7Y11aBldDlY,9435
506
507
  datahub/ingestion/source/state/sql_common_state.py,sha256=OtJpJfMTBSgyR37dn3w-nnZwlc0nFNb2GoUzIWhnyAc,143
507
- datahub/ingestion/source/state/stale_entity_removal_handler.py,sha256=-KZjprFEO2tFtt2j236uRV1GVZEL5Q7Mt7TCZWfcxs8,14921
508
+ datahub/ingestion/source/state/stale_entity_removal_handler.py,sha256=Lr2HYGx_b2FQ8A36s7s11tl-4-mGIM13bfy5JbQ3LtM,14890
508
509
  datahub/ingestion/source/state/stateful_ingestion_base.py,sha256=N0Qzp2t2qAf27WndhsvGbCYCd29dSrLY3TSfbO0hoKA,17369
509
510
  datahub/ingestion/source/state/usage_common_state.py,sha256=TJyb0CpwibsduJYI854EFdtrwWnz7JC-IkzKUXVGDx0,983
510
511
  datahub/ingestion/source/state/use_case_handler.py,sha256=3g8ddTvGXHe0dCiyTkyFeNmR8a3bhwywtIt8EpK5oQs,1271
@@ -535,7 +536,7 @@ datahub/ingestion/source/usage/clickhouse_usage.py,sha256=jJ-EUJdS7t4d9RVjLWQQ2e
535
536
  datahub/ingestion/source/usage/starburst_trino_usage.py,sha256=O3EDEZsXPNdsKGD-jStREA8e4-iTlnqd3ocqtAYFKNA,10544
536
537
  datahub/ingestion/source/usage/usage_common.py,sha256=uuCgIduhlRL2zIAN8rymZ5cZn1WF6akZ-ZbbaVYo9_w,9813
537
538
  datahub/ingestion/source/vertexai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
538
- datahub/ingestion/source/vertexai/vertexai.py,sha256=TXTa-Cm1C7xkbTNXNmKr_hi2FDJ9VG-ahrxk9yuxaTg,43635
539
+ datahub/ingestion/source/vertexai/vertexai.py,sha256=ipq2Zb2lXTZkUg9r78kvuIIhX7mc-5hr-o83395IWpo,43589
539
540
  datahub/ingestion/source/vertexai/vertexai_config.py,sha256=uMnsv3b6TsPRH26u_JE_v1u0db7ANEAFlVxU5A6ELRM,989
540
541
  datahub/ingestion/source/vertexai/vertexai_result_type_utils.py,sha256=fE2l_xXvKONqb4jabl4LtKRBZDnP3koMLJV520wEAMg,2555
541
542
  datahub/ingestion/source_config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -970,7 +971,7 @@ datahub/utilities/file_backed_collections.py,sha256=zW-Xy6zO1E6R-FRJKjlimMKr-emW
970
971
  datahub/utilities/global_warning_util.py,sha256=adrEl3WhetQ-bymrPINjd976ZFndhbvk3QosUYGsos8,261
971
972
  datahub/utilities/groupby.py,sha256=pe6rP4ZCttYB98yjbs0Aey8C32aLb7rq-NJ_BFky0H4,524
972
973
  datahub/utilities/hive_schema_to_avro.py,sha256=1MP0a6FFVEYxLg_4lKF7hPxbHJJy0uRQYkML5zRwV3Q,11622
973
- datahub/utilities/ingest_utils.py,sha256=znIuvFkCdOAOg1dkF-mJn03A2YYFPHlDPZsfCPxKkaQ,3117
974
+ datahub/utilities/ingest_utils.py,sha256=OgETzX_9g6DcYlxGH0_xgAmAlWLSROr25ydDL-mBhKM,3137
974
975
  datahub/utilities/is_pytest.py,sha256=2m9T4S9IIKhI5RfTqrB2ZmumzHocdxBHpM1HroWj2XQ,138
975
976
  datahub/utilities/logging_manager.py,sha256=bc-x5VZGvFUHT0HD-TF3Uz_nzw3dpKdJSbz6kjpAqAQ,10073
976
977
  datahub/utilities/lossy_collections.py,sha256=5rdtfK2pjwvOrrzLf_KGFOMiVvLLmoXj5EVQXTFSR3E,5704
@@ -1043,8 +1044,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1043
1044
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1044
1045
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1045
1046
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1046
- acryl_datahub-1.0.0.1rc7.dist-info/METADATA,sha256=QkeMAnAXXez9FFTnJVpxhOJMNDadF0gfVQ3uz3Fh4i8,176849
1047
- acryl_datahub-1.0.0.1rc7.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
1048
- acryl_datahub-1.0.0.1rc7.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1049
- acryl_datahub-1.0.0.1rc7.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1050
- acryl_datahub-1.0.0.1rc7.dist-info/RECORD,,
1047
+ acryl_datahub-1.0.0.2rc2.dist-info/METADATA,sha256=VuKbVh0Lt8z7Jik8lZ39CF56PZHqn_oIwn2LBmYzrVc,176849
1048
+ acryl_datahub-1.0.0.2rc2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
1049
+ acryl_datahub-1.0.0.2rc2.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1050
+ acryl_datahub-1.0.0.2rc2.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1051
+ acryl_datahub-1.0.0.2rc2.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.0.0.1rc7"
3
+ __version__ = "1.0.0.2rc2"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
datahub/cli/ingest_cli.py CHANGED
@@ -216,9 +216,9 @@ def run(
216
216
  @click.option(
217
217
  "--executor-id",
218
218
  type=str,
219
- default="default",
220
219
  help="Executor id to route execution requests to. Do not use this unless you have configured a custom executor.",
221
220
  required=False,
221
+ default=None,
222
222
  )
223
223
  @click.option(
224
224
  "--cli-version",
@@ -239,7 +239,7 @@ def run(
239
239
  type=str,
240
240
  help="Timezone for the schedule in 'America/New_York' format. Uses UTC by default.",
241
241
  required=False,
242
- default="UTC",
242
+ default=None,
243
243
  )
244
244
  @click.option(
245
245
  "--debug", type=bool, help="Should we debug.", required=False, default=False
@@ -255,10 +255,10 @@ def deploy(
255
255
  name: Optional[str],
256
256
  config: str,
257
257
  urn: Optional[str],
258
- executor_id: str,
258
+ executor_id: Optional[str],
259
259
  cli_version: Optional[str],
260
260
  schedule: Optional[str],
261
- time_zone: str,
261
+ time_zone: Optional[str],
262
262
  extra_pip: Optional[str],
263
263
  debug: bool = False,
264
264
  ) -> None:
@@ -137,6 +137,10 @@ class ProjectIdKey(ContainerKey):
137
137
  project_id: str
138
138
 
139
139
 
140
+ class ExperimentKey(ContainerKey):
141
+ id: str
142
+
143
+
140
144
  class MetastoreKey(ContainerKey):
141
145
  metastore: str
142
146
 
@@ -27,6 +27,7 @@ from pydantic import BaseModel
27
27
  from requests.models import HTTPError
28
28
  from typing_extensions import deprecated
29
29
 
30
+ from datahub._codegen.aspect import _Aspect
30
31
  from datahub.cli import config_utils
31
32
  from datahub.configuration.common import ConfigModel, GraphError, OperationalError
32
33
  from datahub.emitter.aspect import TIMESERIES_ASPECT_MAP
@@ -1697,6 +1698,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1697
1698
 
1698
1699
  return res["runAssertionsForAsset"]
1699
1700
 
1701
+ @deprecated("Use get_entities instead which returns typed aspects")
1700
1702
  def get_entities_v2(
1701
1703
  self,
1702
1704
  entity_name: str,
@@ -1736,6 +1738,108 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1736
1738
  retval[entity_urn][aspect_key] = aspect_value
1737
1739
  return retval
1738
1740
 
1741
+ def get_entities(
1742
+ self,
1743
+ entity_name: str,
1744
+ urns: List[str],
1745
+ aspects: Optional[List[str]] = None,
1746
+ with_system_metadata: bool = False,
1747
+ ) -> Dict[str, Dict[str, Tuple[_Aspect, Optional[SystemMetadataClass]]]]:
1748
+ """
1749
+ Get entities using the OpenAPI v3 endpoint, deserializing aspects into typed objects.
1750
+
1751
+ Args:
1752
+ entity_name: The entity type name
1753
+ urns: List of entity URNs to fetch
1754
+ aspects: Optional list of aspect names to fetch. If None, all aspects will be fetched.
1755
+ with_system_metadata: If True, return system metadata along with each aspect.
1756
+
1757
+ Returns:
1758
+ A dictionary mapping URNs to a dictionary of aspect name to tuples of
1759
+ (typed aspect object, system metadata). If with_system_metadata is False,
1760
+ the system metadata in the tuple will be None.
1761
+ """
1762
+ aspects = aspects or []
1763
+
1764
+ request_payload = []
1765
+ for urn in urns:
1766
+ entity_request: Dict[str, Any] = {"urn": urn}
1767
+ for aspect_name in aspects:
1768
+ entity_request[aspect_name] = {}
1769
+ request_payload.append(entity_request)
1770
+
1771
+ headers: Dict[str, Any] = {
1772
+ "Accept": "application/json",
1773
+ "Content-Type": "application/json",
1774
+ }
1775
+
1776
+ url = f"{self.config.server}/openapi/v3/entity/{entity_name}/batchGet"
1777
+ if with_system_metadata:
1778
+ url += "?systemMetadata=true"
1779
+
1780
+ response = self._session.post(
1781
+ url, data=json.dumps(request_payload), headers=headers
1782
+ )
1783
+ response.raise_for_status()
1784
+ entities = response.json()
1785
+
1786
+ result: Dict[str, Dict[str, Tuple[_Aspect, Optional[SystemMetadataClass]]]] = {}
1787
+
1788
+ for entity in entities:
1789
+ entity_urn = entity.get("urn")
1790
+ if entity_urn is None:
1791
+ logger.warning(
1792
+ f"Missing URN in entity response: {entity}, skipping deserialization"
1793
+ )
1794
+ continue
1795
+
1796
+ entity_aspects: Dict[
1797
+ str, Tuple[_Aspect, Optional[SystemMetadataClass]]
1798
+ ] = {}
1799
+
1800
+ for aspect_name, aspect_obj in entity.items():
1801
+ if aspect_name == "urn":
1802
+ continue
1803
+
1804
+ aspect_class = ASPECT_NAME_MAP.get(aspect_name)
1805
+ if aspect_class is None:
1806
+ logger.warning(
1807
+ f"Unknown aspect type {aspect_name}, skipping deserialization"
1808
+ )
1809
+ continue
1810
+
1811
+ aspect_value = aspect_obj.get("value")
1812
+ if aspect_value is None:
1813
+ logger.warning(
1814
+ f"Unknown aspect value for aspect {aspect_name}, skipping deserialization"
1815
+ )
1816
+ continue
1817
+
1818
+ try:
1819
+ post_json_obj = post_json_transform(aspect_value)
1820
+ typed_aspect = aspect_class.from_obj(post_json_obj)
1821
+ assert isinstance(typed_aspect, aspect_class) and isinstance(
1822
+ typed_aspect, _Aspect
1823
+ )
1824
+
1825
+ system_metadata = None
1826
+ if with_system_metadata:
1827
+ system_metadata_obj = aspect_obj.get("systemMetadata")
1828
+ if system_metadata_obj:
1829
+ system_metadata = SystemMetadataClass.from_obj(
1830
+ system_metadata_obj
1831
+ )
1832
+
1833
+ entity_aspects[aspect_name] = (typed_aspect, system_metadata)
1834
+ except Exception as e:
1835
+ logger.error(f"Error deserializing aspect {aspect_name}: {e}")
1836
+ raise
1837
+
1838
+ if entity_aspects:
1839
+ result[entity_urn] = entity_aspects
1840
+
1841
+ return result
1842
+
1739
1843
  def upsert_custom_assertion(
1740
1844
  self,
1741
1845
  urn: Optional[str],
@@ -1,3 +1,8 @@
1
+ from datahub.metadata.urns import DataPlatformUrn
2
+
1
3
  HEX_PLATFORM_NAME = "hex"
4
+ HEX_PLATFORM_URN = DataPlatformUrn(platform_name=HEX_PLATFORM_NAME)
2
5
  HEX_API_BASE_URL_DEFAULT = "https://app.hex.tech/api/v1"
3
6
  HEX_API_PAGE_SIZE_DEFAULT = 100
7
+
8
+ DATAHUB_API_PAGE_SIZE_DEFAULT = 100
@@ -1,9 +1,12 @@
1
+ from dataclasses import dataclass
2
+ from datetime import datetime, timedelta, timezone
1
3
  from typing import Any, Dict, Iterable, List, Optional
2
4
 
3
- from pydantic import Field, SecretStr
5
+ from pydantic import Field, SecretStr, root_validator
4
6
  from typing_extensions import assert_never
5
7
 
6
8
  from datahub.configuration.common import AllowDenyPattern
9
+ from datahub.configuration.datetimes import parse_user_datetime
7
10
  from datahub.configuration.source_common import (
8
11
  EnvConfigMixin,
9
12
  PlatformInstanceConfigMixin,
@@ -21,12 +24,17 @@ from datahub.ingestion.api.source import MetadataWorkUnitProcessor
21
24
  from datahub.ingestion.api.workunit import MetadataWorkUnit
22
25
  from datahub.ingestion.source.hex.api import HexApi, HexApiReport
23
26
  from datahub.ingestion.source.hex.constants import (
27
+ DATAHUB_API_PAGE_SIZE_DEFAULT,
24
28
  HEX_API_BASE_URL_DEFAULT,
25
29
  HEX_API_PAGE_SIZE_DEFAULT,
26
30
  HEX_PLATFORM_NAME,
27
31
  )
28
32
  from datahub.ingestion.source.hex.mapper import Mapper
29
33
  from datahub.ingestion.source.hex.model import Component, Project
34
+ from datahub.ingestion.source.hex.query_fetcher import (
35
+ HexQueryFetcher,
36
+ HexQueryFetcherReport,
37
+ )
30
38
  from datahub.ingestion.source.state.stale_entity_removal_handler import (
31
39
  StaleEntityRemovalHandler,
32
40
  StaleEntityRemovalSourceReport,
@@ -34,9 +42,10 @@ from datahub.ingestion.source.state.stale_entity_removal_handler import (
34
42
  )
35
43
  from datahub.ingestion.source.state.stateful_ingestion_base import (
36
44
  StatefulIngestionConfigBase,
37
- StatefulIngestionReport,
38
45
  StatefulIngestionSourceBase,
39
46
  )
47
+ from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
48
+ from datahub.sdk.main_client import DataHubClient
40
49
 
41
50
 
42
51
  class HexSourceConfig(
@@ -93,9 +102,73 @@ class HexSourceConfig(
93
102
  default=True,
94
103
  description="Set ownership identity from owner/creator email",
95
104
  )
105
+ include_lineage: bool = Field(
106
+ default=True,
107
+ description='Include Hex lineage, being fetched from DataHub. See "Limitations" section in the docs for more details about the limitations of this feature.',
108
+ )
109
+ lineage_start_time: Optional[datetime] = Field(
110
+ default=None,
111
+ description="Earliest date of lineage to consider. Default: 1 day before lineage end time. You can specify absolute time like '2023-01-01' or relative time like '-7 days' or '-7d'.",
112
+ )
113
+ lineage_end_time: Optional[datetime] = Field(
114
+ default=None,
115
+ description="Latest date of lineage to consider. Default: Current time in UTC. You can specify absolute time like '2023-01-01' or relative time like '-1 day' or '-1d'.",
116
+ )
117
+ datahub_page_size: int = Field(
118
+ default=DATAHUB_API_PAGE_SIZE_DEFAULT,
119
+ description="Number of items to fetch per DataHub API call.",
120
+ )
121
+
122
+ @root_validator(pre=True)
123
+ def validate_lineage_times(cls, data: Dict[str, Any]) -> Dict[str, Any]:
124
+ # lineage_end_time default = now
125
+ if "lineage_end_time" not in data or data["lineage_end_time"] is None:
126
+ data["lineage_end_time"] = datetime.now(tz=timezone.utc)
127
+ # if string is given, parse it
128
+ if isinstance(data["lineage_end_time"], str):
129
+ data["lineage_end_time"] = parse_user_datetime(data["lineage_end_time"])
130
+ # if no timezone is given, assume UTC
131
+ if data["lineage_end_time"].tzinfo is None:
132
+ data["lineage_end_time"] = data["lineage_end_time"].replace(
133
+ tzinfo=timezone.utc
134
+ )
135
+ # at this point, we ensure there is a non null datetime with UTC timezone for lineage_end_time
136
+ assert (
137
+ data["lineage_end_time"]
138
+ and isinstance(data["lineage_end_time"], datetime)
139
+ and data["lineage_end_time"].tzinfo is not None
140
+ and data["lineage_end_time"].tzinfo == timezone.utc
141
+ )
142
+
143
+ # lineage_start_time default = lineage_end_time - 1 day
144
+ if "lineage_start_time" not in data or data["lineage_start_time"] is None:
145
+ data["lineage_start_time"] = data["lineage_end_time"] - timedelta(days=1)
146
+ # if string is given, parse it
147
+ if isinstance(data["lineage_start_time"], str):
148
+ data["lineage_start_time"] = parse_user_datetime(data["lineage_start_time"])
149
+ # if no timezone is given, assume UTC
150
+ if data["lineage_start_time"].tzinfo is None:
151
+ data["lineage_start_time"] = data["lineage_start_time"].replace(
152
+ tzinfo=timezone.utc
153
+ )
154
+ # at this point, we ensure there is a non null datetime with UTC timezone for lineage_start_time
155
+ assert (
156
+ data["lineage_start_time"]
157
+ and isinstance(data["lineage_start_time"], datetime)
158
+ and data["lineage_start_time"].tzinfo is not None
159
+ and data["lineage_start_time"].tzinfo == timezone.utc
160
+ )
161
+
162
+ return data
96
163
 
97
164
 
98
- class HexReport(StaleEntityRemovalSourceReport, HexApiReport):
165
+ @dataclass
166
+ class HexReport(
167
+ StaleEntityRemovalSourceReport,
168
+ HexApiReport,
169
+ IngestionStageReport,
170
+ HexQueryFetcherReport,
171
+ ):
99
172
  pass
100
173
 
101
174
 
@@ -110,7 +183,7 @@ class HexSource(StatefulIngestionSourceBase):
110
183
  def __init__(self, config: HexSourceConfig, ctx: PipelineContext):
111
184
  super().__init__(config, ctx)
112
185
  self.source_config = config
113
- self.report = HexReport()
186
+ self.report: HexReport = HexReport()
114
187
  self.platform = HEX_PLATFORM_NAME
115
188
  self.hex_api = HexApi(
116
189
  report=self.report,
@@ -129,6 +202,28 @@ class HexSource(StatefulIngestionSourceBase):
129
202
  categories_as_tags=self.source_config.categories_as_tags,
130
203
  set_ownership_from_email=self.source_config.set_ownership_from_email,
131
204
  )
205
+ self.project_registry: Dict[str, Project] = {}
206
+ self.component_registry: Dict[str, Component] = {}
207
+
208
+ self.datahub_client: Optional[DataHubClient] = None
209
+ self.query_fetcher: Optional[HexQueryFetcher] = None
210
+ if self.source_config.include_lineage:
211
+ graph = ctx.require_graph("Lineage")
212
+ assert self.source_config.lineage_start_time and isinstance(
213
+ self.source_config.lineage_start_time, datetime
214
+ )
215
+ assert self.source_config.lineage_end_time and isinstance(
216
+ self.source_config.lineage_end_time, datetime
217
+ )
218
+ self.datahub_client = DataHubClient(graph=graph)
219
+ self.query_fetcher = HexQueryFetcher(
220
+ datahub_client=self.datahub_client,
221
+ workspace_name=self.source_config.workspace_name,
222
+ start_datetime=self.source_config.lineage_start_time,
223
+ end_datetime=self.source_config.lineage_end_time,
224
+ report=self.report,
225
+ page_size=self.source_config.datahub_page_size,
226
+ )
132
227
 
133
228
  @classmethod
134
229
  def create(cls, config_dict: Dict[str, Any], ctx: PipelineContext) -> "HexSource":
@@ -143,25 +238,58 @@ class HexSource(StatefulIngestionSourceBase):
143
238
  ).workunit_processor,
144
239
  ]
145
240
 
146
- def get_report(self) -> StatefulIngestionReport:
241
+ def get_report(self) -> HexReport:
147
242
  return self.report
148
243
 
149
244
  def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
150
- yield from self.mapper.map_workspace()
151
-
152
- for project_or_component in self.hex_api.fetch_projects():
153
- if isinstance(project_or_component, Project):
154
- if self.source_config.project_title_pattern.allowed(
155
- project_or_component.title
156
- ):
157
- yield from self.mapper.map_project(project=project_or_component)
158
- elif isinstance(project_or_component, Component):
159
- if (
160
- self.source_config.include_components
161
- and self.source_config.component_title_pattern.allowed(
245
+ with self.report.new_stage("Fetch Hex assets from Hex API"):
246
+ for project_or_component in self.hex_api.fetch_projects():
247
+ if isinstance(project_or_component, Project):
248
+ if self.source_config.project_title_pattern.allowed(
162
249
  project_or_component.title
163
- )
164
- ):
165
- yield from self.mapper.map_component(component=project_or_component)
166
- else:
167
- assert_never(project_or_component)
250
+ ):
251
+ self.project_registry[project_or_component.id] = (
252
+ project_or_component
253
+ )
254
+ elif isinstance(project_or_component, Component):
255
+ if (
256
+ self.source_config.include_components
257
+ and self.source_config.component_title_pattern.allowed(
258
+ project_or_component.title
259
+ )
260
+ ):
261
+ self.component_registry[project_or_component.id] = (
262
+ project_or_component
263
+ )
264
+ else:
265
+ assert_never(project_or_component)
266
+
267
+ if self.source_config.include_lineage:
268
+ assert self.datahub_client and self.query_fetcher
269
+
270
+ with self.report.new_stage(
271
+ "Fetch Hex lineage from existing Queries in DataHub"
272
+ ):
273
+ for query_metadata in self.query_fetcher.fetch():
274
+ project = self.project_registry.get(query_metadata.hex_project_id)
275
+ if project:
276
+ project.upstream_datasets.extend(
277
+ query_metadata.dataset_subjects
278
+ )
279
+ project.upstream_schema_fields.extend(
280
+ query_metadata.schema_field_subjects
281
+ )
282
+ else:
283
+ self.report.report_warning(
284
+ title="Missing project for lineage",
285
+ message="Lineage missed because missed project, likely due to filter patterns or deleted project.",
286
+ context=str(query_metadata),
287
+ )
288
+
289
+ with self.report.new_stage("Emit"):
290
+ yield from self.mapper.map_workspace()
291
+
292
+ for project in self.project_registry.values():
293
+ yield from self.mapper.map_project(project=project)
294
+ for component in self.component_registry.values():
295
+ yield from self.mapper.map_component(component=component)
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
  from datetime import datetime
3
- from typing import Iterable, List, Optional, Tuple
3
+ from typing import Iterable, List, Optional, Tuple, Union
4
4
 
5
5
  from datahub._codegen.aspect import (
6
6
  _Aspect, # TODO: is there a better import than this one?
@@ -46,6 +46,7 @@ from datahub.metadata.schema_classes import (
46
46
  DashboardInfoClass,
47
47
  DashboardUsageStatisticsClass,
48
48
  DataPlatformInstanceClass,
49
+ EdgeClass,
49
50
  GlobalTagsClass,
50
51
  OwnerClass,
51
52
  OwnershipClass,
@@ -53,7 +54,14 @@ from datahub.metadata.schema_classes import (
53
54
  TagAssociationClass,
54
55
  TimeWindowSizeClass,
55
56
  )
56
- from datahub.metadata.urns import ContainerUrn, CorpUserUrn, DashboardUrn, Urn
57
+ from datahub.metadata.urns import (
58
+ ContainerUrn,
59
+ CorpUserUrn,
60
+ DashboardUrn,
61
+ DatasetUrn,
62
+ SchemaFieldUrn,
63
+ Urn,
64
+ )
57
65
 
58
66
  logger = logging.getLogger(__name__)
59
67
 
@@ -116,6 +124,8 @@ class Mapper:
116
124
  ),
117
125
  externalUrl=f"{self._base_url}/{self._workspace_name}/hex/{project.id}",
118
126
  customProperties=dict(id=project.id),
127
+ datasetEdges=self._dataset_edges(project.upstream_datasets),
128
+ # TODO: support schema field upstream, maybe InputFields?
119
129
  )
120
130
 
121
131
  subtypes = SubTypesClass(
@@ -343,6 +353,22 @@ class Mapper:
343
353
  else None,
344
354
  )
345
355
 
356
+ def _dataset_edges(
357
+ self, upstream: List[Union[DatasetUrn, SchemaFieldUrn]]
358
+ ) -> Optional[List[EdgeClass]]:
359
+ # TBC: is there support for CLL in Dashboards? for the moment, skip SchemaFieldUrns
360
+ return (
361
+ [
362
+ EdgeClass(
363
+ destinationUrn=upstream_urn.urn(),
364
+ )
365
+ for upstream_urn in upstream
366
+ if isinstance(upstream_urn, DatasetUrn)
367
+ ]
368
+ if upstream
369
+ else None
370
+ )
371
+
346
372
  def _yield_mcps(
347
373
  self, entity_urn: Urn, aspects: List[Optional[_Aspect]]
348
374
  ) -> Iterable[MetadataWorkUnit]:
@@ -1,6 +1,8 @@
1
- from dataclasses import dataclass
1
+ from dataclasses import dataclass, field
2
2
  from datetime import datetime
3
- from typing import List, Optional
3
+ from typing import List, Optional, Union
4
+
5
+ from datahub.metadata.urns import DatasetUrn, SchemaFieldUrn
4
6
 
5
7
 
6
8
  @dataclass
@@ -51,6 +53,12 @@ class Project:
51
53
  creator: Optional[Owner] = None
52
54
  owner: Optional[Owner] = None
53
55
  analytics: Optional[Analytics] = None
56
+ upstream_datasets: List[Union[DatasetUrn, SchemaFieldUrn]] = field(
57
+ default_factory=list
58
+ )
59
+ upstream_schema_fields: List[Union[DatasetUrn, SchemaFieldUrn]] = field(
60
+ default_factory=list
61
+ )
54
62
 
55
63
 
56
64
  @dataclass