acryl-datahub 1.0.0rc13__py3-none-any.whl → 1.0.0rc15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (45) hide show
  1. {acryl_datahub-1.0.0rc13.dist-info → acryl_datahub-1.0.0rc15.dist-info}/METADATA +2540 -2540
  2. {acryl_datahub-1.0.0rc13.dist-info → acryl_datahub-1.0.0rc15.dist-info}/RECORD +45 -45
  3. datahub/_version.py +1 -1
  4. datahub/configuration/common.py +1 -1
  5. datahub/emitter/rest_emitter.py +165 -10
  6. datahub/ingestion/glossary/classification_mixin.py +1 -5
  7. datahub/ingestion/graph/client.py +6 -3
  8. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
  9. datahub/ingestion/run/pipeline.py +2 -4
  10. datahub/ingestion/sink/datahub_rest.py +4 -0
  11. datahub/ingestion/source/common/subtypes.py +5 -0
  12. datahub/ingestion/source/data_lake_common/path_spec.py +1 -3
  13. datahub/ingestion/source/dbt/dbt_common.py +2 -4
  14. datahub/ingestion/source/dbt/dbt_tests.py +4 -8
  15. datahub/ingestion/source/dremio/dremio_api.py +1 -5
  16. datahub/ingestion/source/dremio/dremio_aspects.py +1 -4
  17. datahub/ingestion/source/dynamodb/dynamodb.py +1 -0
  18. datahub/ingestion/source/ge_data_profiler.py +1 -1
  19. datahub/ingestion/source/kafka_connect/common.py +1 -6
  20. datahub/ingestion/source/mlflow.py +338 -31
  21. datahub/ingestion/source/redshift/lineage.py +2 -2
  22. datahub/ingestion/source/redshift/lineage_v2.py +19 -7
  23. datahub/ingestion/source/redshift/profile.py +1 -1
  24. datahub/ingestion/source/redshift/query.py +14 -6
  25. datahub/ingestion/source/redshift/redshift.py +9 -5
  26. datahub/ingestion/source/redshift/redshift_schema.py +27 -7
  27. datahub/ingestion/source/sql/athena.py +6 -12
  28. datahub/ingestion/source/sql/hive.py +2 -6
  29. datahub/ingestion/source/sql/hive_metastore.py +2 -1
  30. datahub/ingestion/source/sql/sql_common.py +3 -9
  31. datahub/ingestion/source/state/stale_entity_removal_handler.py +4 -8
  32. datahub/ingestion/source/superset.py +1 -3
  33. datahub/ingestion/source/tableau/tableau_common.py +1 -1
  34. datahub/ingestion/source/unity/ge_profiler.py +2 -1
  35. datahub/lite/duckdb_lite.py +1 -3
  36. datahub/metadata/_schema_classes.py +31 -1
  37. datahub/metadata/schema.avsc +56 -4
  38. datahub/metadata/schemas/DataProcessInstanceInput.avsc +129 -1
  39. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +131 -3
  40. datahub/sdk/dataset.py +2 -2
  41. datahub/sql_parsing/sqlglot_utils.py +1 -4
  42. {acryl_datahub-1.0.0rc13.dist-info → acryl_datahub-1.0.0rc15.dist-info}/LICENSE +0 -0
  43. {acryl_datahub-1.0.0rc13.dist-info → acryl_datahub-1.0.0rc15.dist-info}/WHEEL +0 -0
  44. {acryl_datahub-1.0.0rc13.dist-info → acryl_datahub-1.0.0rc15.dist-info}/entry_points.txt +0 -0
  45. {acryl_datahub-1.0.0rc13.dist-info → acryl_datahub-1.0.0rc15.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
- datahub/_version.py,sha256=W5gCw-PvGPeNClWZ5wYkU1EO0af_2kWCyO3nFe4JtkE,322
3
+ datahub/_version.py,sha256=AldGgLwSkxqcsAUDu1_LgQwm__ULIRB4DHXCPzMQPxg,322
4
4
  datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
5
5
  datahub/errors.py,sha256=w6h8b27j9XlmPbTwqpu7-wgiTrXlHzcnUOnJ_iOrwzo,520
6
6
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -93,7 +93,7 @@ datahub/cli/specific/structuredproperties_cli.py,sha256=Rb06XJKxqda5RSUo188B90Wn
93
93
  datahub/cli/specific/user_cli.py,sha256=jGAokb1NRu8obs6P2g4OL2NQdFgpUBa9De55TBBtun0,1897
94
94
  datahub/configuration/__init__.py,sha256=5TN3a7CWNsLRHpdj-sv2bxKWF2IslvJwE6EpNMFrIS4,123
95
95
  datahub/configuration/_config_enum.py,sha256=ul2hr5gMmdLvBINicFkMNMi1ApmnmZSwNdUYYted5nk,1447
96
- datahub/configuration/common.py,sha256=PAcEm2aBZ-GS7SAmPR_o8XGtTGnKZk-d-tb4_Y9A4hE,10394
96
+ datahub/configuration/common.py,sha256=bt_kiy2blqHbxbG-aM_8RNAZoIKMfaMzOZhtknnyLXg,10410
97
97
  datahub/configuration/config_loader.py,sha256=hRzPFxkz-w9IqkpSa5vwCzSra1p49DyfeJNeyqGa8-4,6827
98
98
  datahub/configuration/connection_resolver.py,sha256=n4-6MwMiOEDgTouxO0SMjTILKVhJPo6-naE6FuR5qMs,1516
99
99
  datahub/configuration/datetimes.py,sha256=nayNc0mmlVKH6oVv9ud6C1dDUiZPGabW-YZxvrkosPg,2870
@@ -124,7 +124,7 @@ datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
124
124
  datahub/emitter/mcp_builder.py,sha256=Q1bX2BthNvZ7ae71XYF6ICoiN8IOqaAd_h3zOct57Q0,11752
125
125
  datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
126
126
  datahub/emitter/request_helper.py,sha256=33ORG3S3OVy97_jlWBRn7yUM5XCIkRN6WSdJvN7Ofcg,670
127
- datahub/emitter/rest_emitter.py,sha256=zPQNTtZsY75gh7MabexNag-M4nATcumka_An0nNI3j0,17889
127
+ datahub/emitter/rest_emitter.py,sha256=yJ_QCVe4K-ILXQOhS7CiTHG5Gw2xu4H9mscAnOvfUY4,23633
128
128
  datahub/emitter/serialization_helper.py,sha256=q12Avmf70Vy4ttQGMJoTKlE5EsybMKNg2w3MQeZiHvk,3652
129
129
  datahub/emitter/sql_parsing_builder.py,sha256=Cr5imZrm3dYDSCACt5MFscgHCtVbHTD6IjUmsvsKoEs,11991
130
130
  datahub/emitter/synchronized_file_emitter.py,sha256=s4ATuxalI4GDAkrZTaGSegxBdvvNPZ9jRSdtElU0kNs,1805
@@ -164,23 +164,23 @@ datahub/ingestion/fs/http_fs.py,sha256=NBIKp4vl7mW0YfVfkfpO3R6DBGqSC7f6EE_da0yz2
164
164
  datahub/ingestion/fs/local_fs.py,sha256=oWf-PZsl5sI-9eHWGeKlfKYagbQaSZ9fGfNbxcFji14,885
165
165
  datahub/ingestion/fs/s3_fs.py,sha256=B113EdaCelb80uF0n2rsLFettWB41RqFxa9X_XKRzZg,3190
166
166
  datahub/ingestion/glossary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
167
- datahub/ingestion/glossary/classification_mixin.py,sha256=Noy-9nGYPigEWZK9XKbXWDEDeVxAYYzdCFFqFPHjM2E,13782
167
+ datahub/ingestion/glossary/classification_mixin.py,sha256=znNNYnMwQW0eNR1OsoOASonfpMpQ7y0u3AmMJwpgl4Y,13645
168
168
  datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGot6z9Cir5Vuc,2981
169
169
  datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
170
170
  datahub/ingestion/glossary/datahub_classifier.py,sha256=O7wm6gQT1Jf2QSKdWjJQbS5oSzJwplXzfza26Gdq5Mg,7555
171
171
  datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
172
- datahub/ingestion/graph/client.py,sha256=qaY7xA7aTykcICMv-0Q26Im-WreObvoQjceeUgqqwmk,65470
172
+ datahub/ingestion/graph/client.py,sha256=DpGenZBQ5yziaDRNzKDSlMnE2GWoJe_yk2MdyU3UnLM,65551
173
173
  datahub/ingestion/graph/config.py,sha256=_oha8Je7P80ZmrkZUAaRHyYbdMmTkMI5JkYjEP2Ri1Q,751
174
174
  datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
175
175
  datahub/ingestion/graph/entity_versioning.py,sha256=nrcNz0Qm6kpE6oTu_mrYUQDx14KPspBTc6R9SyFUY6c,6901
176
176
  datahub/ingestion/graph/filters.py,sha256=TL9JDVhpzKLfKf0m9vvzp3XCg3hecElaYRh0rajYfM8,6922
177
177
  datahub/ingestion/reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
178
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py,sha256=nPNA1ClFpJKcHZjramkOrTsa7A5OMVRSxBzq6Xgmlew,9753
178
+ datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py,sha256=iEulcZMLBQuUfe9MAYyobMekvMcNm4dqVcS_C_2KfrI,9736
179
179
  datahub/ingestion/reporting/file_reporter.py,sha256=tiWukmMxHrTQI3rOAumsq6lRlw8T6spqpS6XBDYnrZU,1640
180
180
  datahub/ingestion/reporting/reporting_provider_registry.py,sha256=jTYSh3T4sensjnHQfPLiIcbA2dG8w0px9ghChAJjGdU,310
181
181
  datahub/ingestion/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
182
182
  datahub/ingestion/run/connection.py,sha256=mngNzr5aRLUDa5Izqxa0xkdDEqEqcDuacWSKIlkdvPc,1483
183
- datahub/ingestion/run/pipeline.py,sha256=n4plNyjt5MPCBLfDf1QP2tVXMXF37PbusBKVb6DTvkM,29977
183
+ datahub/ingestion/run/pipeline.py,sha256=pzd6LV1weecVgjMpWyM4gRzJ7FU7fhn0E5Vui8bw4fE,29938
184
184
  datahub/ingestion/run/pipeline_config.py,sha256=EDwqlid4h_qyqyeTRCEqb1RiFA4py_T-Poz1eIKmzT4,4101
185
185
  datahub/ingestion/run/sink_callback.py,sha256=xZAzaKkR0dcQP838pMJWsA52oaQXV5BiqXkpxEpJ_9U,2856
186
186
  datahub/ingestion/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -188,7 +188,7 @@ datahub/ingestion/sink/blackhole.py,sha256=-jYcWo4i8q7312bCIoHrGr7nT9JdPvA7c4jvS
188
188
  datahub/ingestion/sink/console.py,sha256=TZfhA0Ec2eNCrMH7RRy2JOdUE-U-hkoIQrPm1CmKLQs,591
189
189
  datahub/ingestion/sink/datahub_kafka.py,sha256=_cjuXu5I6G0zJ2UK7hMbaKjMPZXeIwRMgm7CVeTiNtc,2578
190
190
  datahub/ingestion/sink/datahub_lite.py,sha256=7u2aWm7ENLshKHl-PkjJg6Mrw4bWs8sTfKIBz4mm8Ak,1879
191
- datahub/ingestion/sink/datahub_rest.py,sha256=ME8OygJgd7AowrokJLmdjYHxIQEy5jXWS0yKwOLR934,12592
191
+ datahub/ingestion/sink/datahub_rest.py,sha256=KLUFteqGPmMvKaMbZG055uBYNyNUDkt_ziuJcjaNl1o,12781
192
192
  datahub/ingestion/sink/file.py,sha256=SxXJPJpkIGoaqRjCcSmj2ZE3xE4rLlBABBGwpTj5LWI,3271
193
193
  datahub/ingestion/sink/sink_registry.py,sha256=JRBWx8qEYg0ubSTyhqwgSWctgxwyp6fva9GoN2LwBao,490
194
194
  datahub/ingestion/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -198,12 +198,12 @@ datahub/ingestion/source/demo_data.py,sha256=PbtCHlZx3wrKlOPPgkWhDQuPm7ZfIx2neXJ
198
198
  datahub/ingestion/source/elastic_search.py,sha256=2dwIcSbYMaq_RoSnxLGz4Q_20oJ8AGgMKunVIBIgYM8,23406
199
199
  datahub/ingestion/source/feast.py,sha256=lsk0jc_0gKiiBNgmrmT8o8bvBOSLFPQMNrvWEcOye2w,18802
200
200
  datahub/ingestion/source/file.py,sha256=h6CRH7hrKcFxu1SmZDjqJcJUSrc031u5oJUl2clnPO4,15976
201
- datahub/ingestion/source/ge_data_profiler.py,sha256=C93ZZrtIRVL6pDpQ3fn7ZbbJiZmHTml7AlAPdMxwXIM,64628
201
+ datahub/ingestion/source/ge_data_profiler.py,sha256=uPHHgOr-fHXsdGonVa8Lc8ZE1yo0TyVWKhktwAuA3fI,64642
202
202
  datahub/ingestion/source/ge_profiling_config.py,sha256=FlWfXoVoayabVXNMB9qETEU0GX0az6HYqNUZRnIu_fQ,10866
203
203
  datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suabri1yV5geaAPjzg2eORDc,2559
204
204
  datahub/ingestion/source/ldap.py,sha256=CNr3foofIpoCXu_GGqfcajlQE2qkHr5isYwVcDutdkk,18695
205
205
  datahub/ingestion/source/metabase.py,sha256=j8DRV2GvisezidL1JZ5HJLF_hdFdtvaoyDoEdEyh0Ks,32603
206
- datahub/ingestion/source/mlflow.py,sha256=cqQivSyrptm15vn--xbT7eTRHJJVKMmQpoVqfzuDIDU,12858
206
+ datahub/ingestion/source/mlflow.py,sha256=n7jk-IsWYbuIQdW2C2rTr8DAihZiY5gYnSdCxfJ6c6Q,24301
207
207
  datahub/ingestion/source/mode.py,sha256=6WJKukK4VbNZwc5UM200iMlP_Chiwx8y2jFoclWgy0U,64044
208
208
  datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-OPSNGU,21237
209
209
  datahub/ingestion/source/nifi.py,sha256=w5TPnqPmpotvzSsJROi6nUiHWPUVC6u1g0CzXIE6FNs,56903
@@ -215,7 +215,7 @@ datahub/ingestion/source/redash.py,sha256=YxjSad-X_wPmxYH8dJmFz_VCFhiLTCTSlK99Wd
215
215
  datahub/ingestion/source/salesforce.py,sha256=d56tfYqg1rGDvMkLznmBJII55B1Zs8XTaQrrW-wHdLo,32679
216
216
  datahub/ingestion/source/source_registry.py,sha256=a2mLjJPLkSI-gYCTb_7U7Jo4D8jGknNQ_yScPIihXFk,1208
217
217
  datahub/ingestion/source/sql_queries.py,sha256=Ip7UZub7fgMh7P5jL_zJPY7lSkc9GGTy8GJ8lqZrcsE,9502
218
- datahub/ingestion/source/superset.py,sha256=zPUeVMCEhFXFY2PFOpgmZvhzELdXzKPGYvbs6gAZfWs,31019
218
+ datahub/ingestion/source/superset.py,sha256=WrpCiZEC17cmFGcfUTTqUdnKASq7ZpT0ih-4xqB9qt4,30976
219
219
  datahub/ingestion/source/abs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
220
220
  datahub/ingestion/source/abs/config.py,sha256=mBQe0JTaP-Rcv4HnMUUySoYbSr4r3jDEMioxaXHnxXU,6709
221
221
  datahub/ingestion/source/abs/datalake_profiler_config.py,sha256=Rkf64evufyVGPiE4VK8QAjzBiJFu85tOGMmJ0lJZ2Og,3600
@@ -268,11 +268,11 @@ datahub/ingestion/source/cassandra/cassandra_profiling.py,sha256=DkSIryZNwLei5Pa
268
268
  datahub/ingestion/source/cassandra/cassandra_utils.py,sha256=j-LidYkaCTmGnpUVNLsax_c3z32PsQbsbHeYojygd1s,5105
269
269
  datahub/ingestion/source/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
270
270
  datahub/ingestion/source/common/data_reader.py,sha256=XbSxiRTYrk6seOz0ZjVjzSpGvP8lEjmqXrNI4cdYYmQ,1819
271
- datahub/ingestion/source/common/subtypes.py,sha256=EiYSjBHiRvGjRB5wjKEfS5b_k9tQCFWMP1ADw_1p-CY,2525
271
+ datahub/ingestion/source/common/subtypes.py,sha256=LCJefUZ9o8yyhNXOy_HJefBOt93Cmn9r3m4VtCiK4iM,2643
272
272
  datahub/ingestion/source/data_lake_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
273
273
  datahub/ingestion/source/data_lake_common/config.py,sha256=qUk83B01hjuBKHvVz8SmXnVCy5eFj-2-2QLEOrAdbgk,359
274
274
  datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=nxu7osuzqxScPFc-1ODA2M1c_xPNPpRH_SMMU7zKOIE,6212
275
- datahub/ingestion/source/data_lake_common/path_spec.py,sha256=u3u2eMe70V5vur-j8mYtupZdoeA2hSeK262Whdsc2YU,23506
275
+ datahub/ingestion/source/data_lake_common/path_spec.py,sha256=U--s2M78CJDyA7dUwOtWhZxeGxNC6a6fIp_mv_hn7KY,23469
276
276
  datahub/ingestion/source/datahub/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
277
277
  datahub/ingestion/source/datahub/config.py,sha256=JohcVz2pYnHbmJd0SGcIDH7Lp-K6MIJlswkid0vTQO4,4762
278
278
  datahub/ingestion/source/datahub/datahub_api_reader.py,sha256=hlKADVEPoTFiRGKqRsMF5mL4fSu_IrIW8Nx7LpEzvkM,2134
@@ -283,17 +283,17 @@ datahub/ingestion/source/datahub/report.py,sha256=VHBfCbwFRzdLdB7hQG9ST4EiZxl_vB
283
283
  datahub/ingestion/source/datahub/state.py,sha256=PZoT7sSK1wadVf5vN6phrgr7I6LL7ePP-EJjP1OO0bQ,3507
284
284
  datahub/ingestion/source/dbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
285
285
  datahub/ingestion/source/dbt/dbt_cloud.py,sha256=tNpSHbPlLq-oFGbJsdkWY9kIaWmpjcZLWhj1CSewGGY,17981
286
- datahub/ingestion/source/dbt/dbt_common.py,sha256=HoV2nERnHbCCLPe5oNKoTCgHnnMJW0jW5tOaU9M1TYU,80756
286
+ datahub/ingestion/source/dbt/dbt_common.py,sha256=p2uYqWTwIFFomxdKDvoWPUOto9sZa42dKzvQpRvDqEY,80702
287
287
  datahub/ingestion/source/dbt/dbt_core.py,sha256=izfsJhPyv5e14H-5BXWhEeN1P6hdZvcjmutEptVxY4U,22987
288
- datahub/ingestion/source/dbt/dbt_tests.py,sha256=Q5KISW_AOOWqyxmyOgJQquyX7xlfOqKu9WhrHoLKC0M,9881
288
+ datahub/ingestion/source/dbt/dbt_tests.py,sha256=ZbQdOEZzTe-AqV38AzP1d2KMsfqK87OU6CNUDWgb_Ag,9804
289
289
  datahub/ingestion/source/delta_lake/__init__.py,sha256=u5oqUeus81ONAtdl6o9Puw33ODSMun-0wLIamrZ4BUM,71
290
290
  datahub/ingestion/source/delta_lake/config.py,sha256=km8WCmjjyRrMy9kJ7JxZZIdS1pKIhKznWQGMYUsF_4s,3522
291
291
  datahub/ingestion/source/delta_lake/delta_lake_utils.py,sha256=VqIDPEXepOnlk4oWMeRaneSpQBlWmlCKAa1wGUl1sfk,1525
292
292
  datahub/ingestion/source/delta_lake/report.py,sha256=uR4e4QA_jv8lL3CV-wE5t43H8pUqrGmx_ItLqN9flPI,587
293
293
  datahub/ingestion/source/delta_lake/source.py,sha256=1OxdbH_KcC6WFbf78XueKphnmCcIGizUepQ-LQK_hbk,13968
294
294
  datahub/ingestion/source/dremio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
295
- datahub/ingestion/source/dremio/dremio_api.py,sha256=e45ctef1PSmQSvWzNB-rNYBHJebngXvDRcgLdil2IJs,33468
296
- datahub/ingestion/source/dremio/dremio_aspects.py,sha256=QeQRGaR6tvE-XrK6SVeYEDsaF8wEg_acpVckfYUPDdk,18316
295
+ datahub/ingestion/source/dremio/dremio_api.py,sha256=h4rjnRacggFXzIQVVsKFNgTUixUZh2gPHH4_7rSGx2g,33413
296
+ datahub/ingestion/source/dremio/dremio_aspects.py,sha256=oWV2_mSpq3Bh42YJ1QVbAyp-Uihf2WIT6VsHGsGTgzk,18248
297
297
  datahub/ingestion/source/dremio/dremio_config.py,sha256=5SP66ewGYN0OnyWgpU33EZOmtICsclTtBX5DSYLwl3c,5782
298
298
  datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py,sha256=cAFnutqBxl_xKwyktPNQbZRao07cg01zOvT-w7lTZTI,3072
299
299
  datahub/ingestion/source/dremio/dremio_entities.py,sha256=3H3vIvj5ab4d8gmB9-rbZfwRgW87gT1DdjWiMjNgqJ4,15069
@@ -303,7 +303,7 @@ datahub/ingestion/source/dremio/dremio_source.py,sha256=XMx3EP0ciIaQjMffNljp8w-G
303
303
  datahub/ingestion/source/dremio/dremio_sql_queries.py,sha256=W0rcXawlwJOHNYr5o73rilMijtFOO3cVkn6pY-JLc6o,8186
304
304
  datahub/ingestion/source/dynamodb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
305
305
  datahub/ingestion/source/dynamodb/data_reader.py,sha256=vC77KpcP8LJN0g8wsPRDVw4sebv0ZWIP3tJkEIHaomA,3120
306
- datahub/ingestion/source/dynamodb/dynamodb.py,sha256=hyWUFWjyfSqjs9ljM-GcS0IVmaeIufUJu4pkOvZwKoI,22599
306
+ datahub/ingestion/source/dynamodb/dynamodb.py,sha256=vPDp0Au4qpkKJVDXdOb6lwUtNKDTeDKP0yHIcQxXK5k,22632
307
307
  datahub/ingestion/source/fivetran/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
308
308
  datahub/ingestion/source/fivetran/config.py,sha256=BP3KRfAQ6H5qyEeJNu9vNfZNwLoyj4Tl2kXiLVR5DNM,9027
309
309
  datahub/ingestion/source/fivetran/data_classes.py,sha256=ecdUJH5BEze0yv-uFpKWPNaNmV1gORDA2XMFk0zhcBw,595
@@ -333,7 +333,7 @@ datahub/ingestion/source/kafka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
333
333
  datahub/ingestion/source/kafka/kafka.py,sha256=mboUWQmlumEwcXwY2POeK1L8tdk5-CABakZ-MWbvdNQ,26579
334
334
  datahub/ingestion/source/kafka/kafka_schema_registry_base.py,sha256=13XjSwqyVhH1CJUFHAbWdmmv_Rw0Ju_9HQdBmIzPNNA,566
335
335
  datahub/ingestion/source/kafka_connect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
336
- datahub/ingestion/source/kafka_connect/common.py,sha256=6F9pPD_9uX6RcVLNy2Xpv_ipiqIZaLvsgdrj5o22pfA,7127
336
+ datahub/ingestion/source/kafka_connect/common.py,sha256=lH64n1v_rJamWGfidBeuQJj8W1_IvOBpXQLR2YZaEvQ,7057
337
337
  datahub/ingestion/source/kafka_connect/kafka_connect.py,sha256=AVAgBvgH7kM9I2ke3mwr8CfIL1J2SdVHH_86rnCFwrM,17727
338
338
  datahub/ingestion/source/kafka_connect/sink_connectors.py,sha256=rNxolagqwQWQmVp4mDr1C-1TB6Drxc2b1dM9JSjNnuA,12905
339
339
  datahub/ingestion/source/kafka_connect/source_connectors.py,sha256=UKcKGEtQGtVcaAuGQiNXQ0REtlBYMpZpMr1juJ-N1QM,21087
@@ -399,13 +399,13 @@ datahub/ingestion/source/redshift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
399
399
  datahub/ingestion/source/redshift/config.py,sha256=l_hlgsCjvlcgcFQpd5WMKlW8nqQUhaMGec8FnUbSl6Y,8997
400
400
  datahub/ingestion/source/redshift/datashares.py,sha256=kH3YkoenOa59XZU12XeUf283lOOAITYD9jOXpy8R06E,9227
401
401
  datahub/ingestion/source/redshift/exception.py,sha256=dxzYUIv5B_FAWhOuzG2u5We7FX-ar4jhOXPXAlEIvgM,2055
402
- datahub/ingestion/source/redshift/lineage.py,sha256=Gk2dNuRBEipZkY5W1sArlfRbFR7mBKutCFHHTrn3yX4,44096
403
- datahub/ingestion/source/redshift/lineage_v2.py,sha256=H6Qky5dLeZEICdDWyH-My78NoKlXpExHg3m-6d5lbgo,16891
404
- datahub/ingestion/source/redshift/profile.py,sha256=jqFQUSg_qzSYi1yIAq24NFwHW8yIcSDSSh-vgJ4nl6M,4287
405
- datahub/ingestion/source/redshift/query.py,sha256=6Fw3I8qFLflySDu6WY5D9NjXnRnDIw0yxKisSpaHh0A,47526
406
- datahub/ingestion/source/redshift/redshift.py,sha256=IZqeQws3mvDdu9K-ixPGZNalDcRRRse-l_TTwQI7B-4,43407
402
+ datahub/ingestion/source/redshift/lineage.py,sha256=IPF8vHy2MFyhK-hu2-lxV2-kcnNAEzltPLnnIvwIBMY,44100
403
+ datahub/ingestion/source/redshift/lineage_v2.py,sha256=ZMxPmmZ-O-Fid6VqnaUt6FyLSPHY8LXESYLj8fTZy1g,17523
404
+ datahub/ingestion/source/redshift/profile.py,sha256=dq7m9YG3TvEMbplwVIutUpzbXLPH8KIj9SuWNo7PWWE,4323
405
+ datahub/ingestion/source/redshift/query.py,sha256=rkWEpxW7HVCtcMQLQ5hAYenE_4q4884B4lL67OULbuo,47814
406
+ datahub/ingestion/source/redshift/redshift.py,sha256=whMujnJxwNT2ZXnOVRrZQiy317hlsvbARzabKmI3oN8,43536
407
407
  datahub/ingestion/source/redshift/redshift_data_reader.py,sha256=zc69jwXHdF-w8J4Hq-ZQ6BjHQ75Ij2iNDMpoRJlcmlU,1724
408
- datahub/ingestion/source/redshift/redshift_schema.py,sha256=WTc-j4_PYlFgaJZ3hEorGIBWKruTX57E7V_5JaUe8mU,24045
408
+ datahub/ingestion/source/redshift/redshift_schema.py,sha256=7F-l_omOuKMuGE_rBWXVPG_GWXFKnCMzC4frNxZB9cs,24800
409
409
  datahub/ingestion/source/redshift/report.py,sha256=O3QFozHlmMbH9b7KxbqhgTgr_0tCryj6FIzMiN6kRxw,3044
410
410
  datahub/ingestion/source/redshift/usage.py,sha256=eSdB1MYZeQokkQOwl9LPdpo-oCBJSwxJBotSpJ9XjBc,17473
411
411
  datahub/ingestion/source/s3/__init__.py,sha256=HjqFPj11WtNFZM3kcVshlDb7kOsc19-l_3LM8PBjlJM,56
@@ -456,19 +456,19 @@ datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=ySFm7WDk8FW9KjCn
456
456
  datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=EmYb2FEcdLwei92atRBQ3iKH7av4YBZCIFTgPmLo0Ng,13092
457
457
  datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=nAbudDVh9A0kqao3jnIdgBlFNhNk1WIxoU1cofeXkFQ,33905
458
458
  datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
459
- datahub/ingestion/source/sql/athena.py,sha256=5hcx_cEVoRvA5IPLFCdIZxKeHwm6wryPU1urlFhW5NQ,24005
459
+ datahub/ingestion/source/sql/athena.py,sha256=X4SC1kwKJ3a3T0wGQIlOQxzJKE-LEr-U55QKpRUH4j8,23861
460
460
  datahub/ingestion/source/sql/clickhouse.py,sha256=uSRy-HKAiGFTHVLoVtGoh23X0O1lwyYUaK8BaWkYhps,25555
461
461
  datahub/ingestion/source/sql/cockroachdb.py,sha256=XaD7eae34plU9ISRC6PzYX9q6RdT2qkzjH6CpTOgkx4,1443
462
462
  datahub/ingestion/source/sql/druid.py,sha256=IjGZdntb5hubkIzzT9qDRDpyfbckEg2GwRncvC5mDSs,2722
463
463
  datahub/ingestion/source/sql/hana.py,sha256=0PIvcX0Rz59NyR7Ag5Bv1MBV_UbJwxl9UAopo_xe_CA,1342
464
- datahub/ingestion/source/sql/hive.py,sha256=NRUrEWnR1JN5U0q4CHlRacdKzxJhS4unFXnXYZT7vZE,30306
465
- datahub/ingestion/source/sql/hive_metastore.py,sha256=fH7bAcljapYqmF8cQE7humoufFe2RVFRYOcyavMg9yo,36103
464
+ datahub/ingestion/source/sql/hive.py,sha256=tfRgzatF4cDb3F7gNXF9zEjFOFrcI318K6yGgykW_EQ,30212
465
+ datahub/ingestion/source/sql/hive_metastore.py,sha256=HW0zoHKarBYb8oVCy5fHvPOn-pTo25LctW_AusmH0hQ,36252
466
466
  datahub/ingestion/source/sql/mariadb.py,sha256=Hm102kmfs_1rd4lsTYhzVMZq5S3B6cyfvpHSzJjqvMw,737
467
467
  datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_k94o7g-0,3350
468
468
  datahub/ingestion/source/sql/oracle.py,sha256=it9qhUkGRHTq_F5DoEsCBLYnB02divzxDlBvXACH4Pk,27712
469
469
  datahub/ingestion/source/sql/postgres.py,sha256=uC1kYEI8VdxiZ1Y9IxMWzwmg11wtMqYN0e2fkok1rxo,11972
470
470
  datahub/ingestion/source/sql/presto.py,sha256=PB-CS5MX2dSRFRHjlxfkLHGXLZXFNCsVAAyRBtY6HMg,3611
471
- datahub/ingestion/source/sql/sql_common.py,sha256=r75Cd06Qwe2fqTDRZKWnIf7kpnR0BSxZ9PYBOgY0I6k,48785
471
+ datahub/ingestion/source/sql/sql_common.py,sha256=jsweel_-vesNtcPonnfS11OUrlcZnS3wGt5r0dYTPnM,48637
472
472
  datahub/ingestion/source/sql/sql_config.py,sha256=CBXkCpzBAGrWAXJFte_i5TmpzcsMJwEjGHpfzd6vAow,8964
473
473
  datahub/ingestion/source/sql/sql_generic.py,sha256=9AERvkK8kdJUeDOzCYJDb93xdv6Z4DGho0NfeHj5Uyg,2740
474
474
  datahub/ingestion/source/sql/sql_generic_profiler.py,sha256=8cDmNpT_UXzYmP8-RWoDCnewmVGCj2cYCzH9_gSsF3o,11590
@@ -492,7 +492,7 @@ datahub/ingestion/source/state/profiling_state.py,sha256=lsWu7oZhB9nSlqoklvjs-Lj
492
492
  datahub/ingestion/source/state/profiling_state_handler.py,sha256=jDMiIrAq8k4GrYoh9Ymh0ZAmzejYFk8E1W7-kuw6lXg,4295
493
493
  datahub/ingestion/source/state/redundant_run_skip_handler.py,sha256=h28twxcsMNvI74bUjAKleRYid8kfIyWS7Y11aBldDlY,9435
494
494
  datahub/ingestion/source/state/sql_common_state.py,sha256=OtJpJfMTBSgyR37dn3w-nnZwlc0nFNb2GoUzIWhnyAc,143
495
- datahub/ingestion/source/state/stale_entity_removal_handler.py,sha256=Id8W9LI4FMi2M4Vu67rkyrffm7sF-zjADaoHhJjNxYQ,15000
495
+ datahub/ingestion/source/state/stale_entity_removal_handler.py,sha256=-KZjprFEO2tFtt2j236uRV1GVZEL5Q7Mt7TCZWfcxs8,14921
496
496
  datahub/ingestion/source/state/stateful_ingestion_base.py,sha256=N0Qzp2t2qAf27WndhsvGbCYCd29dSrLY3TSfbO0hoKA,17369
497
497
  datahub/ingestion/source/state/usage_common_state.py,sha256=TJyb0CpwibsduJYI854EFdtrwWnz7JC-IkzKUXVGDx0,983
498
498
  datahub/ingestion/source/state/use_case_handler.py,sha256=3g8ddTvGXHe0dCiyTkyFeNmR8a3bhwywtIt8EpK5oQs,1271
@@ -502,7 +502,7 @@ datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py
502
502
  datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
503
503
  datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
504
504
  datahub/ingestion/source/tableau/tableau.py,sha256=AtQYzVWBLwrEjwgfBlBIv7aJJwZVloQSgJqt9ML6IrU,154137
505
- datahub/ingestion/source/tableau/tableau_common.py,sha256=fGuctx_y6WrDUZDWCOSaav3VhxY1DzTUt-zk3hgu-_c,26964
505
+ datahub/ingestion/source/tableau/tableau_common.py,sha256=OhBJDdX-cT93BIDVYPHAxNJYr0dvpT_udqY_GR9kk_E,26945
506
506
  datahub/ingestion/source/tableau/tableau_constant.py,sha256=ZcAeHsQUXVVL26ORly0ByZk_GJAFbxaKuJAlX_sYMac,2686
507
507
  datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=nSyx9RzC6TCQDm-cTVJ657qT8iDwzk_8JMKpohhmOc4,1046
508
508
  datahub/ingestion/source/tableau/tableau_validation.py,sha256=pd--LcTLTfrFsouhCOvGC_2IjeMfKbJV81EEo3ibMwE,1820
@@ -510,7 +510,7 @@ datahub/ingestion/source/unity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
510
510
  datahub/ingestion/source/unity/analyze_profiler.py,sha256=2pqkFY30CfN4aHgFZZntjeG0hNhBytZJvXC13VfTc1I,4689
511
511
  datahub/ingestion/source/unity/config.py,sha256=IE20ybsTd082ilDrnfPXl1GmjRxbWipTTt_E_3JE7FI,14770
512
512
  datahub/ingestion/source/unity/connection_test.py,sha256=B143Wb28fS0V4GhygU9hzKqiArWBjsQO54IUCPf23dc,2586
513
- datahub/ingestion/source/unity/ge_profiler.py,sha256=rCwcXK-n_5tGQb_f-3BTO5LWOGH57flzEmrtCKFT_T8,8348
513
+ datahub/ingestion/source/unity/ge_profiler.py,sha256=NBRHZceq-f95iUn7u0h7cgcd9nAc48Aa-lmp_BqE0As,8409
514
514
  datahub/ingestion/source/unity/hive_metastore_proxy.py,sha256=IAWWJjaW0si_UF52Se2D7wmdYRY_afUG4QlVmQu6xaw,15351
515
515
  datahub/ingestion/source/unity/proxy.py,sha256=qYgjw0COscvUk8TvgWwZKgYvkYyA3j4yc826IwfhIZg,18428
516
516
  datahub/ingestion/source/unity/proxy_profiling.py,sha256=WLqvYP6MziaisA4LYL4T_GA-kPt6Xdde7bfaYsjYw40,9663
@@ -569,15 +569,15 @@ datahub/integrations/assertion/snowflake/metric_sql_generator.py,sha256=7lCSZJ9P
569
569
  datahub/integrations/great_expectations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
570
570
  datahub/integrations/great_expectations/action.py,sha256=78ywIwsfmxXbQQ0emou15ziasdr852dDk9qqSolaHac,100
571
571
  datahub/lite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
572
- datahub/lite/duckdb_lite.py,sha256=lPjFUNvEAJzfQaELR9mhED1zB0kM4yQQfDwFhWRIlHE,32560
572
+ datahub/lite/duckdb_lite.py,sha256=O_LSh7lm7xuqFCyhU0tkxHq8ndqORLz9aZ5T-1k18SQ,32518
573
573
  datahub/lite/duckdb_lite_config.py,sha256=PGY5Hab_xbbqoA1hf7OKySBJ2JQJaLNKl-4CO39ad3g,157
574
574
  datahub/lite/lite_local.py,sha256=jsAwvnMJz_aR_a7y1ju2_ER3J3PS8wtEhutA9GmSihA,2858
575
575
  datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw,286
576
576
  datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
577
577
  datahub/lite/lite_util.py,sha256=Cm6trMTeo0X1fv4nSsW9lC0jqce7Jt-05GhOtIGzsVc,4559
578
578
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
579
- datahub/metadata/_schema_classes.py,sha256=uafVvWsnAqPranXzeC9CrSAu7I1-XJOogtiBPhxmn-k,993397
580
- datahub/metadata/schema.avsc,sha256=uPWX2Rx9A12b-p4ef4zrsjbtQPSIH8w67l3B6pq6zE0,741459
579
+ datahub/metadata/_schema_classes.py,sha256=WMINRH1eF7TmnGXSrUCVw5mxplZf5wXGy8QCAm4pxTk,994687
580
+ datahub/metadata/schema.avsc,sha256=88IHgp2lvp9_uY4XY4xH2LmgNReNfUOnbi6bofQzjs0,743287
581
581
  datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
582
582
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
583
583
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
@@ -723,9 +723,9 @@ datahub/metadata/schemas/DataPlatformInstanceKey.avsc,sha256=nHFRKlg98lfqYyHZEAX
723
723
  datahub/metadata/schemas/DataPlatformInstanceProperties.avsc,sha256=4-UrBTtVAR0rKQ4OPt4MVZeFtolXzIajGtyh3KC8-MQ,1623
724
724
  datahub/metadata/schemas/DataPlatformKey.avsc,sha256=5Z2adruXKzSucmgCba768UXdsGsYBH9t9DvFF9L9mxo,461
725
725
  datahub/metadata/schemas/DataProcessInfo.avsc,sha256=n4Zuk4kpHrHI2BdINhG-OucdCefb2GEsDv5mXQtSWIw,1558
726
- datahub/metadata/schemas/DataProcessInstanceInput.avsc,sha256=BEryvFSjDHOAO0RHxfci2OBxLSI1EdJ95Ox6E9WiBVU,891
726
+ datahub/metadata/schemas/DataProcessInstanceInput.avsc,sha256=qyo5BGB7s2HLcc9crHuIxB0yo5budfrF58zh3Uk6Yrw,6293
727
727
  datahub/metadata/schemas/DataProcessInstanceKey.avsc,sha256=YSEVtSWql1IZ9AG37HmJZ4118pgi8kVCygI_GqFf3YA,945
728
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc,sha256=YVazoJv-BPE05EFuEvwykAPE6Y52EvwYdzF9KSYT7-s,928
728
+ datahub/metadata/schemas/DataProcessInstanceOutput.avsc,sha256=O33dSMbj_l8SmtCC-MRT1Edl3xIIl4yB1KdVxhJ6Yi0,6348
729
729
  datahub/metadata/schemas/DataProcessInstanceProperties.avsc,sha256=2qsDFeSA2-ag5IVetgD8mW2k--F6CwmYXM3KOE6edU8,3836
730
730
  datahub/metadata/schemas/DataProcessInstanceRelationships.avsc,sha256=VhBpnyGGvO06WEnM6zy4PmjiT0nivRQfkSdJCUgIavw,2358
731
731
  datahub/metadata/schemas/DataProcessInstanceRunEvent.avsc,sha256=zwTYULEnpMbqwkLN8NbXW9PQWFG4X6TZkZwTQ1Wb53Y,6713
@@ -880,7 +880,7 @@ datahub/sdk/_attribution.py,sha256=0Trh8steVd27GOr9MKCZeawbuDD2_q3GIsZlCtHqEUg,1
880
880
  datahub/sdk/_shared.py,sha256=pHVKEJ50BoLw0fLLAm9zYsynNDN_bPI26qlj8nk2iyY,19582
881
881
  datahub/sdk/_utils.py,sha256=aGE665Su8SGtj2CRDiTaXNYrJ8ADBsS0m4DmaXw79b8,1027
882
882
  datahub/sdk/container.py,sha256=yw_vw9Jl1wOYNwMHxQHLz5ZvVQVDWWHi9CWBR3hOCd8,7547
883
- datahub/sdk/dataset.py,sha256=zZTZPXi6ltc6PxCbT2O6pHxuzaqMjbZQCRIguyCW8WA,25135
883
+ datahub/sdk/dataset.py,sha256=Izfkn6gIhTAF-A0r8nx8lseiLP-z9s0ljc-l7KD5IsM,25107
884
884
  datahub/sdk/entity.py,sha256=uk0SDZ5kGNDpt1qUYXi8M036-0dgpm42RQgCUSppMwk,3952
885
885
  datahub/sdk/entity_client.py,sha256=a9-n2IMxBfbY8t8qnYNkMj1VzvmvFwylLJ8bVyFX3iU,4276
886
886
  datahub/sdk/main_client.py,sha256=wVCYTdl_ZhKAjacWs0NP4Lo8AEOqOozbkJPt7J6Ya_g,2769
@@ -917,7 +917,7 @@ datahub/sql_parsing/sql_parsing_aggregator.py,sha256=XNZWjeaRhzaT92mzsJZGJfYaxJE
917
917
  datahub/sql_parsing/sql_parsing_common.py,sha256=cZ4WvVyHZuXDGjnBvKMX2_fz2EMextB5WQWcK0_saBo,3155
918
918
  datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
919
919
  datahub/sql_parsing/sqlglot_lineage.py,sha256=l0kT8MuRIg96X7BNJaboMznF54b-yvM2nMTLyF2d0Nw,47446
920
- datahub/sql_parsing/sqlglot_utils.py,sha256=6W6MQ5Yh0xXT9_h0jd19yoGWMdXicyRBDD_FwV7nj04,14701
920
+ datahub/sql_parsing/sqlglot_utils.py,sha256=HP6awSU4ijmwjmTvGA_d0X_RO9O3rbGdkbVAWEhAcck,14667
921
921
  datahub/sql_parsing/tool_meta_extractor.py,sha256=qEPq8RFWyK0tmSPNlluvd5cxgwbd2v6m9ViSY4hm2QM,6822
922
922
  datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
923
923
  datahub/telemetry/stats.py,sha256=TwaQisQlD2Bk0uw__pP6u3Ovz9r-Ip4pCwpnto4r5e0,959
@@ -1022,9 +1022,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1022
1022
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1023
1023
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1024
1024
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1025
- acryl_datahub-1.0.0rc13.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1026
- acryl_datahub-1.0.0rc13.dist-info/METADATA,sha256=WIJxT5ufCOmAm3SU_GD0eN2OnkHcEc1_RZfGGEC1S7U,175337
1027
- acryl_datahub-1.0.0rc13.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
1028
- acryl_datahub-1.0.0rc13.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
1029
- acryl_datahub-1.0.0rc13.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1030
- acryl_datahub-1.0.0rc13.dist-info/RECORD,,
1025
+ acryl_datahub-1.0.0rc15.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1026
+ acryl_datahub-1.0.0rc15.dist-info/METADATA,sha256=BglMEDZs9iZQ-OliHN5v7-w5gD4lw_dNN3BWHnlpznA,175337
1027
+ acryl_datahub-1.0.0rc15.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
1028
+ acryl_datahub-1.0.0rc15.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
1029
+ acryl_datahub-1.0.0rc15.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1030
+ acryl_datahub-1.0.0rc15.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.0.0rc13"
3
+ __version__ = "1.0.0rc15"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -130,7 +130,7 @@ class PermissiveConfigModel(ConfigModel):
130
130
  # It is usually used for argument bags that are passed through to third-party libraries.
131
131
 
132
132
  class Config:
133
- if PYDANTIC_VERSION_2:
133
+ if PYDANTIC_VERSION_2: # noqa: SIM108
134
134
  extra = "allow"
135
135
  else:
136
136
  extra = Extra.allow
@@ -4,6 +4,9 @@ import functools
4
4
  import json
5
5
  import logging
6
6
  import os
7
+ from collections import defaultdict
8
+ from dataclasses import dataclass
9
+ from enum import auto
7
10
  from json.decoder import JSONDecodeError
8
11
  from typing import (
9
12
  TYPE_CHECKING,
@@ -17,6 +20,7 @@ from typing import (
17
20
  Union,
18
21
  )
19
22
 
23
+ import pydantic
20
24
  import requests
21
25
  from deprecated import deprecated
22
26
  from requests.adapters import HTTPAdapter, Retry
@@ -27,10 +31,12 @@ from datahub.cli import config_utils
27
31
  from datahub.cli.cli_utils import ensure_has_system_metadata, fixup_gms_url, get_or_else
28
32
  from datahub.cli.env_utils import get_boolean_env_variable
29
33
  from datahub.configuration.common import (
34
+ ConfigEnum,
30
35
  ConfigModel,
31
36
  ConfigurationError,
32
37
  OperationalError,
33
38
  )
39
+ from datahub.emitter.aspect import JSON_CONTENT_TYPE
34
40
  from datahub.emitter.generic_emitter import Emitter
35
41
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
36
42
  from datahub.emitter.request_helper import make_curl_command
@@ -77,6 +83,17 @@ BATCH_INGEST_MAX_PAYLOAD_LENGTH = int(
77
83
  )
78
84
 
79
85
 
86
+ class RestSinkEndpoint(ConfigEnum):
87
+ RESTLI = auto()
88
+ OPENAPI = auto()
89
+
90
+
91
+ DEFAULT_REST_SINK_ENDPOINT = pydantic.parse_obj_as(
92
+ RestSinkEndpoint,
93
+ os.getenv("DATAHUB_REST_SINK_DEFAULT_ENDPOINT", RestSinkEndpoint.RESTLI),
94
+ )
95
+
96
+
80
97
  class RequestsSessionConfig(ConfigModel):
81
98
  timeout: Union[float, Tuple[float, float], None] = _DEFAULT_TIMEOUT_SEC
82
99
 
@@ -143,10 +160,31 @@ class RequestsSessionConfig(ConfigModel):
143
160
  return session
144
161
 
145
162
 
163
+ @dataclass
164
+ class _Chunk:
165
+ items: List[str]
166
+ total_bytes: int = 0
167
+
168
+ def add_item(self, item: str) -> bool:
169
+ item_bytes = len(item.encode())
170
+ if not self.items: # Always add at least one item even if over byte limit
171
+ self.items.append(item)
172
+ self.total_bytes += item_bytes
173
+ return True
174
+ self.items.append(item)
175
+ self.total_bytes += item_bytes
176
+ return True
177
+
178
+ @staticmethod
179
+ def join(chunk: "_Chunk") -> str:
180
+ return "[" + ",".join(chunk.items) + "]"
181
+
182
+
146
183
  class DataHubRestEmitter(Closeable, Emitter):
147
184
  _gms_server: str
148
185
  _token: Optional[str]
149
186
  _session: requests.Session
187
+ _openapi_ingestion: bool
150
188
 
151
189
  def __init__(
152
190
  self,
@@ -162,6 +200,7 @@ class DataHubRestEmitter(Closeable, Emitter):
162
200
  ca_certificate_path: Optional[str] = None,
163
201
  client_certificate_path: Optional[str] = None,
164
202
  disable_ssl_verification: bool = False,
203
+ openapi_ingestion: bool = False,
165
204
  ):
166
205
  if not gms_server:
167
206
  raise ConfigurationError("gms server is required")
@@ -174,9 +213,13 @@ class DataHubRestEmitter(Closeable, Emitter):
174
213
  self._gms_server = fixup_gms_url(gms_server)
175
214
  self._token = token
176
215
  self.server_config: Dict[str, Any] = {}
177
-
216
+ self._openapi_ingestion = openapi_ingestion
178
217
  self._session = requests.Session()
179
218
 
219
+ logger.debug(
220
+ f"Using {'OpenAPI' if self._openapi_ingestion else 'Restli'} for ingestion."
221
+ )
222
+
180
223
  headers = {
181
224
  "X-RestLi-Protocol-Version": "2.0.0",
182
225
  "X-DataHub-Py-Cli-Version": nice_version_name(),
@@ -264,6 +307,43 @@ class DataHubRestEmitter(Closeable, Emitter):
264
307
 
265
308
  return DataHubGraph.from_emitter(self)
266
309
 
310
+ def _to_openapi_request(
311
+ self,
312
+ mcp: Union[MetadataChangeProposal, MetadataChangeProposalWrapper],
313
+ async_flag: Optional[bool] = None,
314
+ async_default: bool = False,
315
+ ) -> Optional[Tuple[str, List[Dict[str, Any]]]]:
316
+ if mcp.aspect and mcp.aspectName:
317
+ resolved_async_flag = (
318
+ async_flag if async_flag is not None else async_default
319
+ )
320
+ url = f"{self._gms_server}/openapi/v3/entity/{mcp.entityType}?async={'true' if resolved_async_flag else 'false'}"
321
+
322
+ if isinstance(mcp, MetadataChangeProposalWrapper):
323
+ aspect_value = pre_json_transform(
324
+ mcp.to_obj(simplified_structure=True)
325
+ )["aspect"]["json"]
326
+ else:
327
+ obj = mcp.aspect.to_obj()
328
+ if obj.get("value") and obj.get("contentType") == JSON_CONTENT_TYPE:
329
+ obj = json.loads(obj["value"])
330
+ aspect_value = pre_json_transform(obj)
331
+ return (
332
+ url,
333
+ [
334
+ {
335
+ "urn": mcp.entityUrn,
336
+ mcp.aspectName: {
337
+ "value": aspect_value,
338
+ "systemMetadata": mcp.systemMetadata.to_obj()
339
+ if mcp.systemMetadata
340
+ else None,
341
+ },
342
+ }
343
+ ],
344
+ )
345
+ return None
346
+
267
347
  def emit(
268
348
  self,
269
349
  item: Union[
@@ -317,18 +397,24 @@ class DataHubRestEmitter(Closeable, Emitter):
317
397
  mcp: Union[MetadataChangeProposal, MetadataChangeProposalWrapper],
318
398
  async_flag: Optional[bool] = None,
319
399
  ) -> None:
320
- url = f"{self._gms_server}/aspects?action=ingestProposal"
321
400
  ensure_has_system_metadata(mcp)
322
401
 
323
- mcp_obj = pre_json_transform(mcp.to_obj())
324
- payload_dict = {"proposal": mcp_obj}
402
+ if self._openapi_ingestion:
403
+ request = self._to_openapi_request(mcp, async_flag, async_default=False)
404
+ if request:
405
+ self._emit_generic(request[0], payload=request[1])
406
+ else:
407
+ url = f"{self._gms_server}/aspects?action=ingestProposal"
325
408
 
326
- if async_flag is not None:
327
- payload_dict["async"] = "true" if async_flag else "false"
409
+ mcp_obj = pre_json_transform(mcp.to_obj())
410
+ payload_dict = {"proposal": mcp_obj}
328
411
 
329
- payload = json.dumps(payload_dict)
412
+ if async_flag is not None:
413
+ payload_dict["async"] = "true" if async_flag else "false"
330
414
 
331
- self._emit_generic(url, payload)
415
+ payload = json.dumps(payload_dict)
416
+
417
+ self._emit_generic(url, payload)
332
418
 
333
419
  def emit_mcps(
334
420
  self,
@@ -337,10 +423,75 @@ class DataHubRestEmitter(Closeable, Emitter):
337
423
  ) -> int:
338
424
  if _DATAHUB_EMITTER_TRACE:
339
425
  logger.debug(f"Attempting to emit MCP batch of size {len(mcps)}")
340
- url = f"{self._gms_server}/aspects?action=ingestProposalBatch"
426
+
341
427
  for mcp in mcps:
342
428
  ensure_has_system_metadata(mcp)
343
429
 
430
+ if self._openapi_ingestion:
431
+ return self._emit_openapi_mcps(mcps, async_flag)
432
+ else:
433
+ return self._emit_restli_mcps(mcps, async_flag)
434
+
435
+ def _emit_openapi_mcps(
436
+ self,
437
+ mcps: Sequence[Union[MetadataChangeProposal, MetadataChangeProposalWrapper]],
438
+ async_flag: Optional[bool] = None,
439
+ ) -> int:
440
+ """
441
+ 1. Grouping MCPs by their entity URL
442
+ 2. Breaking down large batches into smaller chunks based on both:
443
+ * Total byte size (INGEST_MAX_PAYLOAD_BYTES)
444
+ * Maximum number of items (BATCH_INGEST_MAX_PAYLOAD_LENGTH)
445
+
446
+ The Chunk class encapsulates both the items and their byte size tracking
447
+ Serializing the items only once with json.dumps(request[1]) and reusing that
448
+ The chunking logic handles edge cases (always accepting at least one item per chunk)
449
+ The joining logic is efficient with a simple string concatenation
450
+
451
+ :param mcps: metadata change proposals to transmit
452
+ :param async_flag: the mode
453
+ :return: number of requests
454
+ """
455
+ # group by entity url
456
+ batches: Dict[str, List[_Chunk]] = defaultdict(
457
+ lambda: [_Chunk(items=[])]
458
+ ) # Initialize with one empty Chunk
459
+
460
+ for mcp in mcps:
461
+ request = self._to_openapi_request(mcp, async_flag, async_default=True)
462
+ if request:
463
+ current_chunk = batches[request[0]][-1] # Get the last chunk
464
+ # Only serialize once
465
+ serialized_item = json.dumps(request[1][0])
466
+ item_bytes = len(serialized_item.encode())
467
+
468
+ # If adding this item would exceed max_bytes, create a new chunk
469
+ # Unless the chunk is empty (always add at least one item)
470
+ if current_chunk.items and (
471
+ current_chunk.total_bytes + item_bytes > INGEST_MAX_PAYLOAD_BYTES
472
+ or len(current_chunk.items) >= BATCH_INGEST_MAX_PAYLOAD_LENGTH
473
+ ):
474
+ new_chunk = _Chunk(items=[])
475
+ batches[request[0]].append(new_chunk)
476
+ current_chunk = new_chunk
477
+
478
+ current_chunk.add_item(serialized_item)
479
+
480
+ responses = []
481
+ for url, chunks in batches.items():
482
+ for chunk in chunks:
483
+ response = self._emit_generic(url, payload=_Chunk.join(chunk))
484
+ responses.append(response)
485
+
486
+ return len(responses)
487
+
488
+ def _emit_restli_mcps(
489
+ self,
490
+ mcps: Sequence[Union[MetadataChangeProposal, MetadataChangeProposalWrapper]],
491
+ async_flag: Optional[bool] = None,
492
+ ) -> int:
493
+ url = f"{self._gms_server}/aspects?action=ingestProposalBatch"
494
+
344
495
  mcp_objs = [pre_json_transform(mcp.to_obj()) for mcp in mcps]
345
496
 
346
497
  # As a safety mechanism, we need to make sure we don't exceed the max payload size for GMS.
@@ -392,7 +543,10 @@ class DataHubRestEmitter(Closeable, Emitter):
392
543
  payload = json.dumps(snapshot)
393
544
  self._emit_generic(url, payload)
394
545
 
395
- def _emit_generic(self, url: str, payload: str) -> None:
546
+ def _emit_generic(self, url: str, payload: Union[str, Any]) -> requests.Response:
547
+ if not isinstance(payload, str):
548
+ payload = json.dumps(payload)
549
+
396
550
  curl_command = make_curl_command(self._session, "POST", url, payload)
397
551
  payload_size = len(payload)
398
552
  if payload_size > INGEST_MAX_PAYLOAD_BYTES:
@@ -408,6 +562,7 @@ class DataHubRestEmitter(Closeable, Emitter):
408
562
  try:
409
563
  response = self._session.post(url, data=payload)
410
564
  response.raise_for_status()
565
+ return response
411
566
  except HTTPError as e:
412
567
  try:
413
568
  info: Dict = response.json()
@@ -279,11 +279,7 @@ class ClassificationHandler:
279
279
  "Dataset_Name": dataset_name,
280
280
  }
281
281
  ),
282
- values=(
283
- sample_data[schema_field.fieldPath]
284
- if schema_field.fieldPath in sample_data
285
- else []
286
- ),
282
+ values=sample_data.get(schema_field.fieldPath, []),
287
283
  )
288
284
  )
289
285
 
@@ -32,7 +32,11 @@ from datahub.configuration.common import ConfigModel, GraphError, OperationalErr
32
32
  from datahub.emitter.aspect import TIMESERIES_ASPECT_MAP
33
33
  from datahub.emitter.mce_builder import DEFAULT_ENV, Aspect
34
34
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
35
- from datahub.emitter.rest_emitter import DatahubRestEmitter
35
+ from datahub.emitter.rest_emitter import (
36
+ DEFAULT_REST_SINK_ENDPOINT,
37
+ DatahubRestEmitter,
38
+ RestSinkEndpoint,
39
+ )
36
40
  from datahub.emitter.serialization_helper import post_json_transform
37
41
  from datahub.ingestion.graph.config import (
38
42
  DatahubClientConfig as DatahubClientConfig,
@@ -141,6 +145,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
141
145
  ca_certificate_path=self.config.ca_certificate_path,
142
146
  client_certificate_path=self.config.client_certificate_path,
143
147
  disable_ssl_verification=self.config.disable_ssl_verification,
148
+ openapi_ingestion=DEFAULT_REST_SINK_ENDPOINT == RestSinkEndpoint.OPENAPI,
144
149
  )
145
150
 
146
151
  self.server_id = _MISSING_SERVER_ID
@@ -782,9 +787,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
782
787
  results: Dict = self._post_generic(url, search_body)
783
788
  num_entities = results["value"]["numEntities"]
784
789
  logger.debug(f"Matched {num_entities} containers")
785
- entities_yielded: int = 0
786
790
  for x in results["value"]["entities"]:
787
- entities_yielded += 1
788
791
  logger.debug(f"yielding {x['entity']}")
789
792
  yield x["entity"]
790
793
 
@@ -163,7 +163,7 @@ class DatahubIngestionRunSummaryProvider(PipelineRunListener):
163
163
  key: DatahubIngestionRunSummaryProvider._convert_sets_to_lists(value)
164
164
  for key, value in obj.items()
165
165
  }
166
- elif isinstance(obj, list) or isinstance(obj, set):
166
+ elif isinstance(obj, (list, set)):
167
167
  return [
168
168
  DatahubIngestionRunSummaryProvider._convert_sets_to_lists(element)
169
169
  for element in obj