acryl-datahub 1.0.0rc12__py3-none-any.whl → 1.0.0rc14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc12.dist-info → acryl_datahub-1.0.0rc14.dist-info}/METADATA +2482 -2482
- {acryl_datahub-1.0.0rc12.dist-info → acryl_datahub-1.0.0rc14.dist-info}/RECORD +44 -44
- {acryl_datahub-1.0.0rc12.dist-info → acryl_datahub-1.0.0rc14.dist-info}/WHEEL +1 -1
- datahub/_version.py +1 -1
- datahub/configuration/common.py +1 -1
- datahub/emitter/rest_emitter.py +165 -10
- datahub/ingestion/glossary/classification_mixin.py +1 -5
- datahub/ingestion/graph/client.py +6 -3
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
- datahub/ingestion/run/pipeline.py +2 -4
- datahub/ingestion/sink/datahub_rest.py +4 -0
- datahub/ingestion/source/common/subtypes.py +5 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +1 -3
- datahub/ingestion/source/dbt/dbt_common.py +2 -4
- datahub/ingestion/source/dbt/dbt_tests.py +4 -8
- datahub/ingestion/source/dremio/dremio_api.py +1 -5
- datahub/ingestion/source/dremio/dremio_aspects.py +1 -4
- datahub/ingestion/source/dynamodb/dynamodb.py +1 -0
- datahub/ingestion/source/kafka_connect/common.py +1 -6
- datahub/ingestion/source/mlflow.py +338 -31
- datahub/ingestion/source/mode.py +6 -1
- datahub/ingestion/source/redshift/lineage.py +2 -2
- datahub/ingestion/source/redshift/lineage_v2.py +19 -7
- datahub/ingestion/source/redshift/profile.py +1 -1
- datahub/ingestion/source/redshift/query.py +14 -6
- datahub/ingestion/source/redshift/redshift.py +9 -5
- datahub/ingestion/source/redshift/redshift_schema.py +27 -7
- datahub/ingestion/source/sql/athena.py +6 -12
- datahub/ingestion/source/sql/hive.py +2 -6
- datahub/ingestion/source/sql/hive_metastore.py +2 -1
- datahub/ingestion/source/sql/sql_common.py +3 -9
- datahub/ingestion/source/state/stale_entity_removal_handler.py +4 -8
- datahub/ingestion/source/superset.py +4 -6
- datahub/ingestion/source/tableau/tableau_common.py +1 -1
- datahub/lite/duckdb_lite.py +1 -3
- datahub/metadata/_schema_classes.py +31 -1
- datahub/metadata/schema.avsc +56 -4
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +129 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +131 -3
- datahub/sdk/dataset.py +2 -2
- datahub/sql_parsing/sqlglot_utils.py +1 -4
- {acryl_datahub-1.0.0rc12.dist-info → acryl_datahub-1.0.0rc14.dist-info}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc12.dist-info → acryl_datahub-1.0.0rc14.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0rc12.dist-info → acryl_datahub-1.0.0rc14.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
|
-
datahub/_version.py,sha256=
|
|
3
|
+
datahub/_version.py,sha256=C2TzTmMmCl7vDOyuiPr7OIocmpZ-RmEJ2l0zU6ccz44,322
|
|
4
4
|
datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
|
|
5
5
|
datahub/errors.py,sha256=w6h8b27j9XlmPbTwqpu7-wgiTrXlHzcnUOnJ_iOrwzo,520
|
|
6
6
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -93,7 +93,7 @@ datahub/cli/specific/structuredproperties_cli.py,sha256=Rb06XJKxqda5RSUo188B90Wn
|
|
|
93
93
|
datahub/cli/specific/user_cli.py,sha256=jGAokb1NRu8obs6P2g4OL2NQdFgpUBa9De55TBBtun0,1897
|
|
94
94
|
datahub/configuration/__init__.py,sha256=5TN3a7CWNsLRHpdj-sv2bxKWF2IslvJwE6EpNMFrIS4,123
|
|
95
95
|
datahub/configuration/_config_enum.py,sha256=ul2hr5gMmdLvBINicFkMNMi1ApmnmZSwNdUYYted5nk,1447
|
|
96
|
-
datahub/configuration/common.py,sha256=
|
|
96
|
+
datahub/configuration/common.py,sha256=bt_kiy2blqHbxbG-aM_8RNAZoIKMfaMzOZhtknnyLXg,10410
|
|
97
97
|
datahub/configuration/config_loader.py,sha256=hRzPFxkz-w9IqkpSa5vwCzSra1p49DyfeJNeyqGa8-4,6827
|
|
98
98
|
datahub/configuration/connection_resolver.py,sha256=n4-6MwMiOEDgTouxO0SMjTILKVhJPo6-naE6FuR5qMs,1516
|
|
99
99
|
datahub/configuration/datetimes.py,sha256=nayNc0mmlVKH6oVv9ud6C1dDUiZPGabW-YZxvrkosPg,2870
|
|
@@ -124,7 +124,7 @@ datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
|
|
|
124
124
|
datahub/emitter/mcp_builder.py,sha256=Q1bX2BthNvZ7ae71XYF6ICoiN8IOqaAd_h3zOct57Q0,11752
|
|
125
125
|
datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
|
|
126
126
|
datahub/emitter/request_helper.py,sha256=33ORG3S3OVy97_jlWBRn7yUM5XCIkRN6WSdJvN7Ofcg,670
|
|
127
|
-
datahub/emitter/rest_emitter.py,sha256=
|
|
127
|
+
datahub/emitter/rest_emitter.py,sha256=yJ_QCVe4K-ILXQOhS7CiTHG5Gw2xu4H9mscAnOvfUY4,23633
|
|
128
128
|
datahub/emitter/serialization_helper.py,sha256=q12Avmf70Vy4ttQGMJoTKlE5EsybMKNg2w3MQeZiHvk,3652
|
|
129
129
|
datahub/emitter/sql_parsing_builder.py,sha256=Cr5imZrm3dYDSCACt5MFscgHCtVbHTD6IjUmsvsKoEs,11991
|
|
130
130
|
datahub/emitter/synchronized_file_emitter.py,sha256=s4ATuxalI4GDAkrZTaGSegxBdvvNPZ9jRSdtElU0kNs,1805
|
|
@@ -164,23 +164,23 @@ datahub/ingestion/fs/http_fs.py,sha256=NBIKp4vl7mW0YfVfkfpO3R6DBGqSC7f6EE_da0yz2
|
|
|
164
164
|
datahub/ingestion/fs/local_fs.py,sha256=oWf-PZsl5sI-9eHWGeKlfKYagbQaSZ9fGfNbxcFji14,885
|
|
165
165
|
datahub/ingestion/fs/s3_fs.py,sha256=B113EdaCelb80uF0n2rsLFettWB41RqFxa9X_XKRzZg,3190
|
|
166
166
|
datahub/ingestion/glossary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
167
|
-
datahub/ingestion/glossary/classification_mixin.py,sha256=
|
|
167
|
+
datahub/ingestion/glossary/classification_mixin.py,sha256=znNNYnMwQW0eNR1OsoOASonfpMpQ7y0u3AmMJwpgl4Y,13645
|
|
168
168
|
datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGot6z9Cir5Vuc,2981
|
|
169
169
|
datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
|
|
170
170
|
datahub/ingestion/glossary/datahub_classifier.py,sha256=O7wm6gQT1Jf2QSKdWjJQbS5oSzJwplXzfza26Gdq5Mg,7555
|
|
171
171
|
datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
172
|
-
datahub/ingestion/graph/client.py,sha256=
|
|
172
|
+
datahub/ingestion/graph/client.py,sha256=DpGenZBQ5yziaDRNzKDSlMnE2GWoJe_yk2MdyU3UnLM,65551
|
|
173
173
|
datahub/ingestion/graph/config.py,sha256=_oha8Je7P80ZmrkZUAaRHyYbdMmTkMI5JkYjEP2Ri1Q,751
|
|
174
174
|
datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
|
|
175
175
|
datahub/ingestion/graph/entity_versioning.py,sha256=nrcNz0Qm6kpE6oTu_mrYUQDx14KPspBTc6R9SyFUY6c,6901
|
|
176
176
|
datahub/ingestion/graph/filters.py,sha256=TL9JDVhpzKLfKf0m9vvzp3XCg3hecElaYRh0rajYfM8,6922
|
|
177
177
|
datahub/ingestion/reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
178
|
-
datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py,sha256=
|
|
178
|
+
datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py,sha256=iEulcZMLBQuUfe9MAYyobMekvMcNm4dqVcS_C_2KfrI,9736
|
|
179
179
|
datahub/ingestion/reporting/file_reporter.py,sha256=tiWukmMxHrTQI3rOAumsq6lRlw8T6spqpS6XBDYnrZU,1640
|
|
180
180
|
datahub/ingestion/reporting/reporting_provider_registry.py,sha256=jTYSh3T4sensjnHQfPLiIcbA2dG8w0px9ghChAJjGdU,310
|
|
181
181
|
datahub/ingestion/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
182
182
|
datahub/ingestion/run/connection.py,sha256=mngNzr5aRLUDa5Izqxa0xkdDEqEqcDuacWSKIlkdvPc,1483
|
|
183
|
-
datahub/ingestion/run/pipeline.py,sha256=
|
|
183
|
+
datahub/ingestion/run/pipeline.py,sha256=pzd6LV1weecVgjMpWyM4gRzJ7FU7fhn0E5Vui8bw4fE,29938
|
|
184
184
|
datahub/ingestion/run/pipeline_config.py,sha256=EDwqlid4h_qyqyeTRCEqb1RiFA4py_T-Poz1eIKmzT4,4101
|
|
185
185
|
datahub/ingestion/run/sink_callback.py,sha256=xZAzaKkR0dcQP838pMJWsA52oaQXV5BiqXkpxEpJ_9U,2856
|
|
186
186
|
datahub/ingestion/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -188,7 +188,7 @@ datahub/ingestion/sink/blackhole.py,sha256=-jYcWo4i8q7312bCIoHrGr7nT9JdPvA7c4jvS
|
|
|
188
188
|
datahub/ingestion/sink/console.py,sha256=TZfhA0Ec2eNCrMH7RRy2JOdUE-U-hkoIQrPm1CmKLQs,591
|
|
189
189
|
datahub/ingestion/sink/datahub_kafka.py,sha256=_cjuXu5I6G0zJ2UK7hMbaKjMPZXeIwRMgm7CVeTiNtc,2578
|
|
190
190
|
datahub/ingestion/sink/datahub_lite.py,sha256=7u2aWm7ENLshKHl-PkjJg6Mrw4bWs8sTfKIBz4mm8Ak,1879
|
|
191
|
-
datahub/ingestion/sink/datahub_rest.py,sha256=
|
|
191
|
+
datahub/ingestion/sink/datahub_rest.py,sha256=KLUFteqGPmMvKaMbZG055uBYNyNUDkt_ziuJcjaNl1o,12781
|
|
192
192
|
datahub/ingestion/sink/file.py,sha256=SxXJPJpkIGoaqRjCcSmj2ZE3xE4rLlBABBGwpTj5LWI,3271
|
|
193
193
|
datahub/ingestion/sink/sink_registry.py,sha256=JRBWx8qEYg0ubSTyhqwgSWctgxwyp6fva9GoN2LwBao,490
|
|
194
194
|
datahub/ingestion/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -203,8 +203,8 @@ datahub/ingestion/source/ge_profiling_config.py,sha256=FlWfXoVoayabVXNMB9qETEU0G
|
|
|
203
203
|
datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suabri1yV5geaAPjzg2eORDc,2559
|
|
204
204
|
datahub/ingestion/source/ldap.py,sha256=CNr3foofIpoCXu_GGqfcajlQE2qkHr5isYwVcDutdkk,18695
|
|
205
205
|
datahub/ingestion/source/metabase.py,sha256=j8DRV2GvisezidL1JZ5HJLF_hdFdtvaoyDoEdEyh0Ks,32603
|
|
206
|
-
datahub/ingestion/source/mlflow.py,sha256=
|
|
207
|
-
datahub/ingestion/source/mode.py,sha256=
|
|
206
|
+
datahub/ingestion/source/mlflow.py,sha256=n7jk-IsWYbuIQdW2C2rTr8DAihZiY5gYnSdCxfJ6c6Q,24301
|
|
207
|
+
datahub/ingestion/source/mode.py,sha256=6WJKukK4VbNZwc5UM200iMlP_Chiwx8y2jFoclWgy0U,64044
|
|
208
208
|
datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-OPSNGU,21237
|
|
209
209
|
datahub/ingestion/source/nifi.py,sha256=w5TPnqPmpotvzSsJROi6nUiHWPUVC6u1g0CzXIE6FNs,56903
|
|
210
210
|
datahub/ingestion/source/openapi.py,sha256=39ep3etbWh8NBPjTXXwH3mieC5P6bMVAjhvK7UvcTis,17372
|
|
@@ -215,7 +215,7 @@ datahub/ingestion/source/redash.py,sha256=YxjSad-X_wPmxYH8dJmFz_VCFhiLTCTSlK99Wd
|
|
|
215
215
|
datahub/ingestion/source/salesforce.py,sha256=d56tfYqg1rGDvMkLznmBJII55B1Zs8XTaQrrW-wHdLo,32679
|
|
216
216
|
datahub/ingestion/source/source_registry.py,sha256=a2mLjJPLkSI-gYCTb_7U7Jo4D8jGknNQ_yScPIihXFk,1208
|
|
217
217
|
datahub/ingestion/source/sql_queries.py,sha256=Ip7UZub7fgMh7P5jL_zJPY7lSkc9GGTy8GJ8lqZrcsE,9502
|
|
218
|
-
datahub/ingestion/source/superset.py,sha256=
|
|
218
|
+
datahub/ingestion/source/superset.py,sha256=WrpCiZEC17cmFGcfUTTqUdnKASq7ZpT0ih-4xqB9qt4,30976
|
|
219
219
|
datahub/ingestion/source/abs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
220
220
|
datahub/ingestion/source/abs/config.py,sha256=mBQe0JTaP-Rcv4HnMUUySoYbSr4r3jDEMioxaXHnxXU,6709
|
|
221
221
|
datahub/ingestion/source/abs/datalake_profiler_config.py,sha256=Rkf64evufyVGPiE4VK8QAjzBiJFu85tOGMmJ0lJZ2Og,3600
|
|
@@ -268,11 +268,11 @@ datahub/ingestion/source/cassandra/cassandra_profiling.py,sha256=DkSIryZNwLei5Pa
|
|
|
268
268
|
datahub/ingestion/source/cassandra/cassandra_utils.py,sha256=j-LidYkaCTmGnpUVNLsax_c3z32PsQbsbHeYojygd1s,5105
|
|
269
269
|
datahub/ingestion/source/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
270
270
|
datahub/ingestion/source/common/data_reader.py,sha256=XbSxiRTYrk6seOz0ZjVjzSpGvP8lEjmqXrNI4cdYYmQ,1819
|
|
271
|
-
datahub/ingestion/source/common/subtypes.py,sha256=
|
|
271
|
+
datahub/ingestion/source/common/subtypes.py,sha256=LCJefUZ9o8yyhNXOy_HJefBOt93Cmn9r3m4VtCiK4iM,2643
|
|
272
272
|
datahub/ingestion/source/data_lake_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
273
273
|
datahub/ingestion/source/data_lake_common/config.py,sha256=qUk83B01hjuBKHvVz8SmXnVCy5eFj-2-2QLEOrAdbgk,359
|
|
274
274
|
datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=nxu7osuzqxScPFc-1ODA2M1c_xPNPpRH_SMMU7zKOIE,6212
|
|
275
|
-
datahub/ingestion/source/data_lake_common/path_spec.py,sha256=
|
|
275
|
+
datahub/ingestion/source/data_lake_common/path_spec.py,sha256=U--s2M78CJDyA7dUwOtWhZxeGxNC6a6fIp_mv_hn7KY,23469
|
|
276
276
|
datahub/ingestion/source/datahub/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
277
277
|
datahub/ingestion/source/datahub/config.py,sha256=JohcVz2pYnHbmJd0SGcIDH7Lp-K6MIJlswkid0vTQO4,4762
|
|
278
278
|
datahub/ingestion/source/datahub/datahub_api_reader.py,sha256=hlKADVEPoTFiRGKqRsMF5mL4fSu_IrIW8Nx7LpEzvkM,2134
|
|
@@ -283,17 +283,17 @@ datahub/ingestion/source/datahub/report.py,sha256=VHBfCbwFRzdLdB7hQG9ST4EiZxl_vB
|
|
|
283
283
|
datahub/ingestion/source/datahub/state.py,sha256=PZoT7sSK1wadVf5vN6phrgr7I6LL7ePP-EJjP1OO0bQ,3507
|
|
284
284
|
datahub/ingestion/source/dbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
285
285
|
datahub/ingestion/source/dbt/dbt_cloud.py,sha256=tNpSHbPlLq-oFGbJsdkWY9kIaWmpjcZLWhj1CSewGGY,17981
|
|
286
|
-
datahub/ingestion/source/dbt/dbt_common.py,sha256=
|
|
286
|
+
datahub/ingestion/source/dbt/dbt_common.py,sha256=p2uYqWTwIFFomxdKDvoWPUOto9sZa42dKzvQpRvDqEY,80702
|
|
287
287
|
datahub/ingestion/source/dbt/dbt_core.py,sha256=izfsJhPyv5e14H-5BXWhEeN1P6hdZvcjmutEptVxY4U,22987
|
|
288
|
-
datahub/ingestion/source/dbt/dbt_tests.py,sha256=
|
|
288
|
+
datahub/ingestion/source/dbt/dbt_tests.py,sha256=ZbQdOEZzTe-AqV38AzP1d2KMsfqK87OU6CNUDWgb_Ag,9804
|
|
289
289
|
datahub/ingestion/source/delta_lake/__init__.py,sha256=u5oqUeus81ONAtdl6o9Puw33ODSMun-0wLIamrZ4BUM,71
|
|
290
290
|
datahub/ingestion/source/delta_lake/config.py,sha256=km8WCmjjyRrMy9kJ7JxZZIdS1pKIhKznWQGMYUsF_4s,3522
|
|
291
291
|
datahub/ingestion/source/delta_lake/delta_lake_utils.py,sha256=VqIDPEXepOnlk4oWMeRaneSpQBlWmlCKAa1wGUl1sfk,1525
|
|
292
292
|
datahub/ingestion/source/delta_lake/report.py,sha256=uR4e4QA_jv8lL3CV-wE5t43H8pUqrGmx_ItLqN9flPI,587
|
|
293
293
|
datahub/ingestion/source/delta_lake/source.py,sha256=1OxdbH_KcC6WFbf78XueKphnmCcIGizUepQ-LQK_hbk,13968
|
|
294
294
|
datahub/ingestion/source/dremio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
295
|
-
datahub/ingestion/source/dremio/dremio_api.py,sha256=
|
|
296
|
-
datahub/ingestion/source/dremio/dremio_aspects.py,sha256=
|
|
295
|
+
datahub/ingestion/source/dremio/dremio_api.py,sha256=h4rjnRacggFXzIQVVsKFNgTUixUZh2gPHH4_7rSGx2g,33413
|
|
296
|
+
datahub/ingestion/source/dremio/dremio_aspects.py,sha256=oWV2_mSpq3Bh42YJ1QVbAyp-Uihf2WIT6VsHGsGTgzk,18248
|
|
297
297
|
datahub/ingestion/source/dremio/dremio_config.py,sha256=5SP66ewGYN0OnyWgpU33EZOmtICsclTtBX5DSYLwl3c,5782
|
|
298
298
|
datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py,sha256=cAFnutqBxl_xKwyktPNQbZRao07cg01zOvT-w7lTZTI,3072
|
|
299
299
|
datahub/ingestion/source/dremio/dremio_entities.py,sha256=3H3vIvj5ab4d8gmB9-rbZfwRgW87gT1DdjWiMjNgqJ4,15069
|
|
@@ -303,7 +303,7 @@ datahub/ingestion/source/dremio/dremio_source.py,sha256=XMx3EP0ciIaQjMffNljp8w-G
|
|
|
303
303
|
datahub/ingestion/source/dremio/dremio_sql_queries.py,sha256=W0rcXawlwJOHNYr5o73rilMijtFOO3cVkn6pY-JLc6o,8186
|
|
304
304
|
datahub/ingestion/source/dynamodb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
305
305
|
datahub/ingestion/source/dynamodb/data_reader.py,sha256=vC77KpcP8LJN0g8wsPRDVw4sebv0ZWIP3tJkEIHaomA,3120
|
|
306
|
-
datahub/ingestion/source/dynamodb/dynamodb.py,sha256=
|
|
306
|
+
datahub/ingestion/source/dynamodb/dynamodb.py,sha256=vPDp0Au4qpkKJVDXdOb6lwUtNKDTeDKP0yHIcQxXK5k,22632
|
|
307
307
|
datahub/ingestion/source/fivetran/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
308
308
|
datahub/ingestion/source/fivetran/config.py,sha256=BP3KRfAQ6H5qyEeJNu9vNfZNwLoyj4Tl2kXiLVR5DNM,9027
|
|
309
309
|
datahub/ingestion/source/fivetran/data_classes.py,sha256=ecdUJH5BEze0yv-uFpKWPNaNmV1gORDA2XMFk0zhcBw,595
|
|
@@ -333,7 +333,7 @@ datahub/ingestion/source/kafka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
|
|
|
333
333
|
datahub/ingestion/source/kafka/kafka.py,sha256=mboUWQmlumEwcXwY2POeK1L8tdk5-CABakZ-MWbvdNQ,26579
|
|
334
334
|
datahub/ingestion/source/kafka/kafka_schema_registry_base.py,sha256=13XjSwqyVhH1CJUFHAbWdmmv_Rw0Ju_9HQdBmIzPNNA,566
|
|
335
335
|
datahub/ingestion/source/kafka_connect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
336
|
-
datahub/ingestion/source/kafka_connect/common.py,sha256=
|
|
336
|
+
datahub/ingestion/source/kafka_connect/common.py,sha256=lH64n1v_rJamWGfidBeuQJj8W1_IvOBpXQLR2YZaEvQ,7057
|
|
337
337
|
datahub/ingestion/source/kafka_connect/kafka_connect.py,sha256=AVAgBvgH7kM9I2ke3mwr8CfIL1J2SdVHH_86rnCFwrM,17727
|
|
338
338
|
datahub/ingestion/source/kafka_connect/sink_connectors.py,sha256=rNxolagqwQWQmVp4mDr1C-1TB6Drxc2b1dM9JSjNnuA,12905
|
|
339
339
|
datahub/ingestion/source/kafka_connect/source_connectors.py,sha256=UKcKGEtQGtVcaAuGQiNXQ0REtlBYMpZpMr1juJ-N1QM,21087
|
|
@@ -399,13 +399,13 @@ datahub/ingestion/source/redshift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
|
|
|
399
399
|
datahub/ingestion/source/redshift/config.py,sha256=l_hlgsCjvlcgcFQpd5WMKlW8nqQUhaMGec8FnUbSl6Y,8997
|
|
400
400
|
datahub/ingestion/source/redshift/datashares.py,sha256=kH3YkoenOa59XZU12XeUf283lOOAITYD9jOXpy8R06E,9227
|
|
401
401
|
datahub/ingestion/source/redshift/exception.py,sha256=dxzYUIv5B_FAWhOuzG2u5We7FX-ar4jhOXPXAlEIvgM,2055
|
|
402
|
-
datahub/ingestion/source/redshift/lineage.py,sha256=
|
|
403
|
-
datahub/ingestion/source/redshift/lineage_v2.py,sha256=
|
|
404
|
-
datahub/ingestion/source/redshift/profile.py,sha256=
|
|
405
|
-
datahub/ingestion/source/redshift/query.py,sha256=
|
|
406
|
-
datahub/ingestion/source/redshift/redshift.py,sha256=
|
|
402
|
+
datahub/ingestion/source/redshift/lineage.py,sha256=IPF8vHy2MFyhK-hu2-lxV2-kcnNAEzltPLnnIvwIBMY,44100
|
|
403
|
+
datahub/ingestion/source/redshift/lineage_v2.py,sha256=ZMxPmmZ-O-Fid6VqnaUt6FyLSPHY8LXESYLj8fTZy1g,17523
|
|
404
|
+
datahub/ingestion/source/redshift/profile.py,sha256=dq7m9YG3TvEMbplwVIutUpzbXLPH8KIj9SuWNo7PWWE,4323
|
|
405
|
+
datahub/ingestion/source/redshift/query.py,sha256=rkWEpxW7HVCtcMQLQ5hAYenE_4q4884B4lL67OULbuo,47814
|
|
406
|
+
datahub/ingestion/source/redshift/redshift.py,sha256=whMujnJxwNT2ZXnOVRrZQiy317hlsvbARzabKmI3oN8,43536
|
|
407
407
|
datahub/ingestion/source/redshift/redshift_data_reader.py,sha256=zc69jwXHdF-w8J4Hq-ZQ6BjHQ75Ij2iNDMpoRJlcmlU,1724
|
|
408
|
-
datahub/ingestion/source/redshift/redshift_schema.py,sha256=
|
|
408
|
+
datahub/ingestion/source/redshift/redshift_schema.py,sha256=7F-l_omOuKMuGE_rBWXVPG_GWXFKnCMzC4frNxZB9cs,24800
|
|
409
409
|
datahub/ingestion/source/redshift/report.py,sha256=O3QFozHlmMbH9b7KxbqhgTgr_0tCryj6FIzMiN6kRxw,3044
|
|
410
410
|
datahub/ingestion/source/redshift/usage.py,sha256=eSdB1MYZeQokkQOwl9LPdpo-oCBJSwxJBotSpJ9XjBc,17473
|
|
411
411
|
datahub/ingestion/source/s3/__init__.py,sha256=HjqFPj11WtNFZM3kcVshlDb7kOsc19-l_3LM8PBjlJM,56
|
|
@@ -456,19 +456,19 @@ datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=ySFm7WDk8FW9KjCn
|
|
|
456
456
|
datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=EmYb2FEcdLwei92atRBQ3iKH7av4YBZCIFTgPmLo0Ng,13092
|
|
457
457
|
datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=nAbudDVh9A0kqao3jnIdgBlFNhNk1WIxoU1cofeXkFQ,33905
|
|
458
458
|
datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
459
|
-
datahub/ingestion/source/sql/athena.py,sha256=
|
|
459
|
+
datahub/ingestion/source/sql/athena.py,sha256=X4SC1kwKJ3a3T0wGQIlOQxzJKE-LEr-U55QKpRUH4j8,23861
|
|
460
460
|
datahub/ingestion/source/sql/clickhouse.py,sha256=uSRy-HKAiGFTHVLoVtGoh23X0O1lwyYUaK8BaWkYhps,25555
|
|
461
461
|
datahub/ingestion/source/sql/cockroachdb.py,sha256=XaD7eae34plU9ISRC6PzYX9q6RdT2qkzjH6CpTOgkx4,1443
|
|
462
462
|
datahub/ingestion/source/sql/druid.py,sha256=IjGZdntb5hubkIzzT9qDRDpyfbckEg2GwRncvC5mDSs,2722
|
|
463
463
|
datahub/ingestion/source/sql/hana.py,sha256=0PIvcX0Rz59NyR7Ag5Bv1MBV_UbJwxl9UAopo_xe_CA,1342
|
|
464
|
-
datahub/ingestion/source/sql/hive.py,sha256=
|
|
465
|
-
datahub/ingestion/source/sql/hive_metastore.py,sha256=
|
|
464
|
+
datahub/ingestion/source/sql/hive.py,sha256=tfRgzatF4cDb3F7gNXF9zEjFOFrcI318K6yGgykW_EQ,30212
|
|
465
|
+
datahub/ingestion/source/sql/hive_metastore.py,sha256=HW0zoHKarBYb8oVCy5fHvPOn-pTo25LctW_AusmH0hQ,36252
|
|
466
466
|
datahub/ingestion/source/sql/mariadb.py,sha256=Hm102kmfs_1rd4lsTYhzVMZq5S3B6cyfvpHSzJjqvMw,737
|
|
467
467
|
datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_k94o7g-0,3350
|
|
468
468
|
datahub/ingestion/source/sql/oracle.py,sha256=it9qhUkGRHTq_F5DoEsCBLYnB02divzxDlBvXACH4Pk,27712
|
|
469
469
|
datahub/ingestion/source/sql/postgres.py,sha256=uC1kYEI8VdxiZ1Y9IxMWzwmg11wtMqYN0e2fkok1rxo,11972
|
|
470
470
|
datahub/ingestion/source/sql/presto.py,sha256=PB-CS5MX2dSRFRHjlxfkLHGXLZXFNCsVAAyRBtY6HMg,3611
|
|
471
|
-
datahub/ingestion/source/sql/sql_common.py,sha256=
|
|
471
|
+
datahub/ingestion/source/sql/sql_common.py,sha256=jsweel_-vesNtcPonnfS11OUrlcZnS3wGt5r0dYTPnM,48637
|
|
472
472
|
datahub/ingestion/source/sql/sql_config.py,sha256=CBXkCpzBAGrWAXJFte_i5TmpzcsMJwEjGHpfzd6vAow,8964
|
|
473
473
|
datahub/ingestion/source/sql/sql_generic.py,sha256=9AERvkK8kdJUeDOzCYJDb93xdv6Z4DGho0NfeHj5Uyg,2740
|
|
474
474
|
datahub/ingestion/source/sql/sql_generic_profiler.py,sha256=8cDmNpT_UXzYmP8-RWoDCnewmVGCj2cYCzH9_gSsF3o,11590
|
|
@@ -492,7 +492,7 @@ datahub/ingestion/source/state/profiling_state.py,sha256=lsWu7oZhB9nSlqoklvjs-Lj
|
|
|
492
492
|
datahub/ingestion/source/state/profiling_state_handler.py,sha256=jDMiIrAq8k4GrYoh9Ymh0ZAmzejYFk8E1W7-kuw6lXg,4295
|
|
493
493
|
datahub/ingestion/source/state/redundant_run_skip_handler.py,sha256=h28twxcsMNvI74bUjAKleRYid8kfIyWS7Y11aBldDlY,9435
|
|
494
494
|
datahub/ingestion/source/state/sql_common_state.py,sha256=OtJpJfMTBSgyR37dn3w-nnZwlc0nFNb2GoUzIWhnyAc,143
|
|
495
|
-
datahub/ingestion/source/state/stale_entity_removal_handler.py,sha256
|
|
495
|
+
datahub/ingestion/source/state/stale_entity_removal_handler.py,sha256=-KZjprFEO2tFtt2j236uRV1GVZEL5Q7Mt7TCZWfcxs8,14921
|
|
496
496
|
datahub/ingestion/source/state/stateful_ingestion_base.py,sha256=N0Qzp2t2qAf27WndhsvGbCYCd29dSrLY3TSfbO0hoKA,17369
|
|
497
497
|
datahub/ingestion/source/state/usage_common_state.py,sha256=TJyb0CpwibsduJYI854EFdtrwWnz7JC-IkzKUXVGDx0,983
|
|
498
498
|
datahub/ingestion/source/state/use_case_handler.py,sha256=3g8ddTvGXHe0dCiyTkyFeNmR8a3bhwywtIt8EpK5oQs,1271
|
|
@@ -502,7 +502,7 @@ datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py
|
|
|
502
502
|
datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
|
|
503
503
|
datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
504
504
|
datahub/ingestion/source/tableau/tableau.py,sha256=AtQYzVWBLwrEjwgfBlBIv7aJJwZVloQSgJqt9ML6IrU,154137
|
|
505
|
-
datahub/ingestion/source/tableau/tableau_common.py,sha256=
|
|
505
|
+
datahub/ingestion/source/tableau/tableau_common.py,sha256=OhBJDdX-cT93BIDVYPHAxNJYr0dvpT_udqY_GR9kk_E,26945
|
|
506
506
|
datahub/ingestion/source/tableau/tableau_constant.py,sha256=ZcAeHsQUXVVL26ORly0ByZk_GJAFbxaKuJAlX_sYMac,2686
|
|
507
507
|
datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=nSyx9RzC6TCQDm-cTVJ657qT8iDwzk_8JMKpohhmOc4,1046
|
|
508
508
|
datahub/ingestion/source/tableau/tableau_validation.py,sha256=pd--LcTLTfrFsouhCOvGC_2IjeMfKbJV81EEo3ibMwE,1820
|
|
@@ -569,15 +569,15 @@ datahub/integrations/assertion/snowflake/metric_sql_generator.py,sha256=7lCSZJ9P
|
|
|
569
569
|
datahub/integrations/great_expectations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
570
570
|
datahub/integrations/great_expectations/action.py,sha256=78ywIwsfmxXbQQ0emou15ziasdr852dDk9qqSolaHac,100
|
|
571
571
|
datahub/lite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
572
|
-
datahub/lite/duckdb_lite.py,sha256=
|
|
572
|
+
datahub/lite/duckdb_lite.py,sha256=O_LSh7lm7xuqFCyhU0tkxHq8ndqORLz9aZ5T-1k18SQ,32518
|
|
573
573
|
datahub/lite/duckdb_lite_config.py,sha256=PGY5Hab_xbbqoA1hf7OKySBJ2JQJaLNKl-4CO39ad3g,157
|
|
574
574
|
datahub/lite/lite_local.py,sha256=jsAwvnMJz_aR_a7y1ju2_ER3J3PS8wtEhutA9GmSihA,2858
|
|
575
575
|
datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw,286
|
|
576
576
|
datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
|
|
577
577
|
datahub/lite/lite_util.py,sha256=Cm6trMTeo0X1fv4nSsW9lC0jqce7Jt-05GhOtIGzsVc,4559
|
|
578
578
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
579
|
-
datahub/metadata/_schema_classes.py,sha256=
|
|
580
|
-
datahub/metadata/schema.avsc,sha256=
|
|
579
|
+
datahub/metadata/_schema_classes.py,sha256=WMINRH1eF7TmnGXSrUCVw5mxplZf5wXGy8QCAm4pxTk,994687
|
|
580
|
+
datahub/metadata/schema.avsc,sha256=88IHgp2lvp9_uY4XY4xH2LmgNReNfUOnbi6bofQzjs0,743287
|
|
581
581
|
datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
|
|
582
582
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
583
583
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
@@ -723,9 +723,9 @@ datahub/metadata/schemas/DataPlatformInstanceKey.avsc,sha256=nHFRKlg98lfqYyHZEAX
|
|
|
723
723
|
datahub/metadata/schemas/DataPlatformInstanceProperties.avsc,sha256=4-UrBTtVAR0rKQ4OPt4MVZeFtolXzIajGtyh3KC8-MQ,1623
|
|
724
724
|
datahub/metadata/schemas/DataPlatformKey.avsc,sha256=5Z2adruXKzSucmgCba768UXdsGsYBH9t9DvFF9L9mxo,461
|
|
725
725
|
datahub/metadata/schemas/DataProcessInfo.avsc,sha256=n4Zuk4kpHrHI2BdINhG-OucdCefb2GEsDv5mXQtSWIw,1558
|
|
726
|
-
datahub/metadata/schemas/DataProcessInstanceInput.avsc,sha256=
|
|
726
|
+
datahub/metadata/schemas/DataProcessInstanceInput.avsc,sha256=qyo5BGB7s2HLcc9crHuIxB0yo5budfrF58zh3Uk6Yrw,6293
|
|
727
727
|
datahub/metadata/schemas/DataProcessInstanceKey.avsc,sha256=YSEVtSWql1IZ9AG37HmJZ4118pgi8kVCygI_GqFf3YA,945
|
|
728
|
-
datahub/metadata/schemas/DataProcessInstanceOutput.avsc,sha256=
|
|
728
|
+
datahub/metadata/schemas/DataProcessInstanceOutput.avsc,sha256=O33dSMbj_l8SmtCC-MRT1Edl3xIIl4yB1KdVxhJ6Yi0,6348
|
|
729
729
|
datahub/metadata/schemas/DataProcessInstanceProperties.avsc,sha256=2qsDFeSA2-ag5IVetgD8mW2k--F6CwmYXM3KOE6edU8,3836
|
|
730
730
|
datahub/metadata/schemas/DataProcessInstanceRelationships.avsc,sha256=VhBpnyGGvO06WEnM6zy4PmjiT0nivRQfkSdJCUgIavw,2358
|
|
731
731
|
datahub/metadata/schemas/DataProcessInstanceRunEvent.avsc,sha256=zwTYULEnpMbqwkLN8NbXW9PQWFG4X6TZkZwTQ1Wb53Y,6713
|
|
@@ -880,7 +880,7 @@ datahub/sdk/_attribution.py,sha256=0Trh8steVd27GOr9MKCZeawbuDD2_q3GIsZlCtHqEUg,1
|
|
|
880
880
|
datahub/sdk/_shared.py,sha256=pHVKEJ50BoLw0fLLAm9zYsynNDN_bPI26qlj8nk2iyY,19582
|
|
881
881
|
datahub/sdk/_utils.py,sha256=aGE665Su8SGtj2CRDiTaXNYrJ8ADBsS0m4DmaXw79b8,1027
|
|
882
882
|
datahub/sdk/container.py,sha256=yw_vw9Jl1wOYNwMHxQHLz5ZvVQVDWWHi9CWBR3hOCd8,7547
|
|
883
|
-
datahub/sdk/dataset.py,sha256=
|
|
883
|
+
datahub/sdk/dataset.py,sha256=Izfkn6gIhTAF-A0r8nx8lseiLP-z9s0ljc-l7KD5IsM,25107
|
|
884
884
|
datahub/sdk/entity.py,sha256=uk0SDZ5kGNDpt1qUYXi8M036-0dgpm42RQgCUSppMwk,3952
|
|
885
885
|
datahub/sdk/entity_client.py,sha256=a9-n2IMxBfbY8t8qnYNkMj1VzvmvFwylLJ8bVyFX3iU,4276
|
|
886
886
|
datahub/sdk/main_client.py,sha256=wVCYTdl_ZhKAjacWs0NP4Lo8AEOqOozbkJPt7J6Ya_g,2769
|
|
@@ -917,7 +917,7 @@ datahub/sql_parsing/sql_parsing_aggregator.py,sha256=XNZWjeaRhzaT92mzsJZGJfYaxJE
|
|
|
917
917
|
datahub/sql_parsing/sql_parsing_common.py,sha256=cZ4WvVyHZuXDGjnBvKMX2_fz2EMextB5WQWcK0_saBo,3155
|
|
918
918
|
datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
|
|
919
919
|
datahub/sql_parsing/sqlglot_lineage.py,sha256=l0kT8MuRIg96X7BNJaboMznF54b-yvM2nMTLyF2d0Nw,47446
|
|
920
|
-
datahub/sql_parsing/sqlglot_utils.py,sha256=
|
|
920
|
+
datahub/sql_parsing/sqlglot_utils.py,sha256=HP6awSU4ijmwjmTvGA_d0X_RO9O3rbGdkbVAWEhAcck,14667
|
|
921
921
|
datahub/sql_parsing/tool_meta_extractor.py,sha256=qEPq8RFWyK0tmSPNlluvd5cxgwbd2v6m9ViSY4hm2QM,6822
|
|
922
922
|
datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
923
923
|
datahub/telemetry/stats.py,sha256=TwaQisQlD2Bk0uw__pP6u3Ovz9r-Ip4pCwpnto4r5e0,959
|
|
@@ -1022,9 +1022,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1022
1022
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1023
1023
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1024
1024
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1025
|
-
acryl_datahub-1.0.
|
|
1026
|
-
acryl_datahub-1.0.
|
|
1027
|
-
acryl_datahub-1.0.
|
|
1028
|
-
acryl_datahub-1.0.
|
|
1029
|
-
acryl_datahub-1.0.
|
|
1030
|
-
acryl_datahub-1.0.
|
|
1025
|
+
acryl_datahub-1.0.0rc14.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
1026
|
+
acryl_datahub-1.0.0rc14.dist-info/METADATA,sha256=ZUnaweLZeU553HkZi4ROQ0W3zFWrFFtyLr8P-i0mftE,175337
|
|
1027
|
+
acryl_datahub-1.0.0rc14.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
|
|
1028
|
+
acryl_datahub-1.0.0rc14.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
|
|
1029
|
+
acryl_datahub-1.0.0rc14.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1030
|
+
acryl_datahub-1.0.0rc14.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
datahub/configuration/common.py
CHANGED
|
@@ -130,7 +130,7 @@ class PermissiveConfigModel(ConfigModel):
|
|
|
130
130
|
# It is usually used for argument bags that are passed through to third-party libraries.
|
|
131
131
|
|
|
132
132
|
class Config:
|
|
133
|
-
if PYDANTIC_VERSION_2:
|
|
133
|
+
if PYDANTIC_VERSION_2: # noqa: SIM108
|
|
134
134
|
extra = "allow"
|
|
135
135
|
else:
|
|
136
136
|
extra = Extra.allow
|
datahub/emitter/rest_emitter.py
CHANGED
|
@@ -4,6 +4,9 @@ import functools
|
|
|
4
4
|
import json
|
|
5
5
|
import logging
|
|
6
6
|
import os
|
|
7
|
+
from collections import defaultdict
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from enum import auto
|
|
7
10
|
from json.decoder import JSONDecodeError
|
|
8
11
|
from typing import (
|
|
9
12
|
TYPE_CHECKING,
|
|
@@ -17,6 +20,7 @@ from typing import (
|
|
|
17
20
|
Union,
|
|
18
21
|
)
|
|
19
22
|
|
|
23
|
+
import pydantic
|
|
20
24
|
import requests
|
|
21
25
|
from deprecated import deprecated
|
|
22
26
|
from requests.adapters import HTTPAdapter, Retry
|
|
@@ -27,10 +31,12 @@ from datahub.cli import config_utils
|
|
|
27
31
|
from datahub.cli.cli_utils import ensure_has_system_metadata, fixup_gms_url, get_or_else
|
|
28
32
|
from datahub.cli.env_utils import get_boolean_env_variable
|
|
29
33
|
from datahub.configuration.common import (
|
|
34
|
+
ConfigEnum,
|
|
30
35
|
ConfigModel,
|
|
31
36
|
ConfigurationError,
|
|
32
37
|
OperationalError,
|
|
33
38
|
)
|
|
39
|
+
from datahub.emitter.aspect import JSON_CONTENT_TYPE
|
|
34
40
|
from datahub.emitter.generic_emitter import Emitter
|
|
35
41
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
36
42
|
from datahub.emitter.request_helper import make_curl_command
|
|
@@ -77,6 +83,17 @@ BATCH_INGEST_MAX_PAYLOAD_LENGTH = int(
|
|
|
77
83
|
)
|
|
78
84
|
|
|
79
85
|
|
|
86
|
+
class RestSinkEndpoint(ConfigEnum):
|
|
87
|
+
RESTLI = auto()
|
|
88
|
+
OPENAPI = auto()
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
DEFAULT_REST_SINK_ENDPOINT = pydantic.parse_obj_as(
|
|
92
|
+
RestSinkEndpoint,
|
|
93
|
+
os.getenv("DATAHUB_REST_SINK_DEFAULT_ENDPOINT", RestSinkEndpoint.RESTLI),
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
80
97
|
class RequestsSessionConfig(ConfigModel):
|
|
81
98
|
timeout: Union[float, Tuple[float, float], None] = _DEFAULT_TIMEOUT_SEC
|
|
82
99
|
|
|
@@ -143,10 +160,31 @@ class RequestsSessionConfig(ConfigModel):
|
|
|
143
160
|
return session
|
|
144
161
|
|
|
145
162
|
|
|
163
|
+
@dataclass
|
|
164
|
+
class _Chunk:
|
|
165
|
+
items: List[str]
|
|
166
|
+
total_bytes: int = 0
|
|
167
|
+
|
|
168
|
+
def add_item(self, item: str) -> bool:
|
|
169
|
+
item_bytes = len(item.encode())
|
|
170
|
+
if not self.items: # Always add at least one item even if over byte limit
|
|
171
|
+
self.items.append(item)
|
|
172
|
+
self.total_bytes += item_bytes
|
|
173
|
+
return True
|
|
174
|
+
self.items.append(item)
|
|
175
|
+
self.total_bytes += item_bytes
|
|
176
|
+
return True
|
|
177
|
+
|
|
178
|
+
@staticmethod
|
|
179
|
+
def join(chunk: "_Chunk") -> str:
|
|
180
|
+
return "[" + ",".join(chunk.items) + "]"
|
|
181
|
+
|
|
182
|
+
|
|
146
183
|
class DataHubRestEmitter(Closeable, Emitter):
|
|
147
184
|
_gms_server: str
|
|
148
185
|
_token: Optional[str]
|
|
149
186
|
_session: requests.Session
|
|
187
|
+
_openapi_ingestion: bool
|
|
150
188
|
|
|
151
189
|
def __init__(
|
|
152
190
|
self,
|
|
@@ -162,6 +200,7 @@ class DataHubRestEmitter(Closeable, Emitter):
|
|
|
162
200
|
ca_certificate_path: Optional[str] = None,
|
|
163
201
|
client_certificate_path: Optional[str] = None,
|
|
164
202
|
disable_ssl_verification: bool = False,
|
|
203
|
+
openapi_ingestion: bool = False,
|
|
165
204
|
):
|
|
166
205
|
if not gms_server:
|
|
167
206
|
raise ConfigurationError("gms server is required")
|
|
@@ -174,9 +213,13 @@ class DataHubRestEmitter(Closeable, Emitter):
|
|
|
174
213
|
self._gms_server = fixup_gms_url(gms_server)
|
|
175
214
|
self._token = token
|
|
176
215
|
self.server_config: Dict[str, Any] = {}
|
|
177
|
-
|
|
216
|
+
self._openapi_ingestion = openapi_ingestion
|
|
178
217
|
self._session = requests.Session()
|
|
179
218
|
|
|
219
|
+
logger.debug(
|
|
220
|
+
f"Using {'OpenAPI' if self._openapi_ingestion else 'Restli'} for ingestion."
|
|
221
|
+
)
|
|
222
|
+
|
|
180
223
|
headers = {
|
|
181
224
|
"X-RestLi-Protocol-Version": "2.0.0",
|
|
182
225
|
"X-DataHub-Py-Cli-Version": nice_version_name(),
|
|
@@ -264,6 +307,43 @@ class DataHubRestEmitter(Closeable, Emitter):
|
|
|
264
307
|
|
|
265
308
|
return DataHubGraph.from_emitter(self)
|
|
266
309
|
|
|
310
|
+
def _to_openapi_request(
|
|
311
|
+
self,
|
|
312
|
+
mcp: Union[MetadataChangeProposal, MetadataChangeProposalWrapper],
|
|
313
|
+
async_flag: Optional[bool] = None,
|
|
314
|
+
async_default: bool = False,
|
|
315
|
+
) -> Optional[Tuple[str, List[Dict[str, Any]]]]:
|
|
316
|
+
if mcp.aspect and mcp.aspectName:
|
|
317
|
+
resolved_async_flag = (
|
|
318
|
+
async_flag if async_flag is not None else async_default
|
|
319
|
+
)
|
|
320
|
+
url = f"{self._gms_server}/openapi/v3/entity/{mcp.entityType}?async={'true' if resolved_async_flag else 'false'}"
|
|
321
|
+
|
|
322
|
+
if isinstance(mcp, MetadataChangeProposalWrapper):
|
|
323
|
+
aspect_value = pre_json_transform(
|
|
324
|
+
mcp.to_obj(simplified_structure=True)
|
|
325
|
+
)["aspect"]["json"]
|
|
326
|
+
else:
|
|
327
|
+
obj = mcp.aspect.to_obj()
|
|
328
|
+
if obj.get("value") and obj.get("contentType") == JSON_CONTENT_TYPE:
|
|
329
|
+
obj = json.loads(obj["value"])
|
|
330
|
+
aspect_value = pre_json_transform(obj)
|
|
331
|
+
return (
|
|
332
|
+
url,
|
|
333
|
+
[
|
|
334
|
+
{
|
|
335
|
+
"urn": mcp.entityUrn,
|
|
336
|
+
mcp.aspectName: {
|
|
337
|
+
"value": aspect_value,
|
|
338
|
+
"systemMetadata": mcp.systemMetadata.to_obj()
|
|
339
|
+
if mcp.systemMetadata
|
|
340
|
+
else None,
|
|
341
|
+
},
|
|
342
|
+
}
|
|
343
|
+
],
|
|
344
|
+
)
|
|
345
|
+
return None
|
|
346
|
+
|
|
267
347
|
def emit(
|
|
268
348
|
self,
|
|
269
349
|
item: Union[
|
|
@@ -317,18 +397,24 @@ class DataHubRestEmitter(Closeable, Emitter):
|
|
|
317
397
|
mcp: Union[MetadataChangeProposal, MetadataChangeProposalWrapper],
|
|
318
398
|
async_flag: Optional[bool] = None,
|
|
319
399
|
) -> None:
|
|
320
|
-
url = f"{self._gms_server}/aspects?action=ingestProposal"
|
|
321
400
|
ensure_has_system_metadata(mcp)
|
|
322
401
|
|
|
323
|
-
|
|
324
|
-
|
|
402
|
+
if self._openapi_ingestion:
|
|
403
|
+
request = self._to_openapi_request(mcp, async_flag, async_default=False)
|
|
404
|
+
if request:
|
|
405
|
+
self._emit_generic(request[0], payload=request[1])
|
|
406
|
+
else:
|
|
407
|
+
url = f"{self._gms_server}/aspects?action=ingestProposal"
|
|
325
408
|
|
|
326
|
-
|
|
327
|
-
payload_dict
|
|
409
|
+
mcp_obj = pre_json_transform(mcp.to_obj())
|
|
410
|
+
payload_dict = {"proposal": mcp_obj}
|
|
328
411
|
|
|
329
|
-
|
|
412
|
+
if async_flag is not None:
|
|
413
|
+
payload_dict["async"] = "true" if async_flag else "false"
|
|
330
414
|
|
|
331
|
-
|
|
415
|
+
payload = json.dumps(payload_dict)
|
|
416
|
+
|
|
417
|
+
self._emit_generic(url, payload)
|
|
332
418
|
|
|
333
419
|
def emit_mcps(
|
|
334
420
|
self,
|
|
@@ -337,10 +423,75 @@ class DataHubRestEmitter(Closeable, Emitter):
|
|
|
337
423
|
) -> int:
|
|
338
424
|
if _DATAHUB_EMITTER_TRACE:
|
|
339
425
|
logger.debug(f"Attempting to emit MCP batch of size {len(mcps)}")
|
|
340
|
-
|
|
426
|
+
|
|
341
427
|
for mcp in mcps:
|
|
342
428
|
ensure_has_system_metadata(mcp)
|
|
343
429
|
|
|
430
|
+
if self._openapi_ingestion:
|
|
431
|
+
return self._emit_openapi_mcps(mcps, async_flag)
|
|
432
|
+
else:
|
|
433
|
+
return self._emit_restli_mcps(mcps, async_flag)
|
|
434
|
+
|
|
435
|
+
def _emit_openapi_mcps(
|
|
436
|
+
self,
|
|
437
|
+
mcps: Sequence[Union[MetadataChangeProposal, MetadataChangeProposalWrapper]],
|
|
438
|
+
async_flag: Optional[bool] = None,
|
|
439
|
+
) -> int:
|
|
440
|
+
"""
|
|
441
|
+
1. Grouping MCPs by their entity URL
|
|
442
|
+
2. Breaking down large batches into smaller chunks based on both:
|
|
443
|
+
* Total byte size (INGEST_MAX_PAYLOAD_BYTES)
|
|
444
|
+
* Maximum number of items (BATCH_INGEST_MAX_PAYLOAD_LENGTH)
|
|
445
|
+
|
|
446
|
+
The Chunk class encapsulates both the items and their byte size tracking
|
|
447
|
+
Serializing the items only once with json.dumps(request[1]) and reusing that
|
|
448
|
+
The chunking logic handles edge cases (always accepting at least one item per chunk)
|
|
449
|
+
The joining logic is efficient with a simple string concatenation
|
|
450
|
+
|
|
451
|
+
:param mcps: metadata change proposals to transmit
|
|
452
|
+
:param async_flag: the mode
|
|
453
|
+
:return: number of requests
|
|
454
|
+
"""
|
|
455
|
+
# group by entity url
|
|
456
|
+
batches: Dict[str, List[_Chunk]] = defaultdict(
|
|
457
|
+
lambda: [_Chunk(items=[])]
|
|
458
|
+
) # Initialize with one empty Chunk
|
|
459
|
+
|
|
460
|
+
for mcp in mcps:
|
|
461
|
+
request = self._to_openapi_request(mcp, async_flag, async_default=True)
|
|
462
|
+
if request:
|
|
463
|
+
current_chunk = batches[request[0]][-1] # Get the last chunk
|
|
464
|
+
# Only serialize once
|
|
465
|
+
serialized_item = json.dumps(request[1][0])
|
|
466
|
+
item_bytes = len(serialized_item.encode())
|
|
467
|
+
|
|
468
|
+
# If adding this item would exceed max_bytes, create a new chunk
|
|
469
|
+
# Unless the chunk is empty (always add at least one item)
|
|
470
|
+
if current_chunk.items and (
|
|
471
|
+
current_chunk.total_bytes + item_bytes > INGEST_MAX_PAYLOAD_BYTES
|
|
472
|
+
or len(current_chunk.items) >= BATCH_INGEST_MAX_PAYLOAD_LENGTH
|
|
473
|
+
):
|
|
474
|
+
new_chunk = _Chunk(items=[])
|
|
475
|
+
batches[request[0]].append(new_chunk)
|
|
476
|
+
current_chunk = new_chunk
|
|
477
|
+
|
|
478
|
+
current_chunk.add_item(serialized_item)
|
|
479
|
+
|
|
480
|
+
responses = []
|
|
481
|
+
for url, chunks in batches.items():
|
|
482
|
+
for chunk in chunks:
|
|
483
|
+
response = self._emit_generic(url, payload=_Chunk.join(chunk))
|
|
484
|
+
responses.append(response)
|
|
485
|
+
|
|
486
|
+
return len(responses)
|
|
487
|
+
|
|
488
|
+
def _emit_restli_mcps(
|
|
489
|
+
self,
|
|
490
|
+
mcps: Sequence[Union[MetadataChangeProposal, MetadataChangeProposalWrapper]],
|
|
491
|
+
async_flag: Optional[bool] = None,
|
|
492
|
+
) -> int:
|
|
493
|
+
url = f"{self._gms_server}/aspects?action=ingestProposalBatch"
|
|
494
|
+
|
|
344
495
|
mcp_objs = [pre_json_transform(mcp.to_obj()) for mcp in mcps]
|
|
345
496
|
|
|
346
497
|
# As a safety mechanism, we need to make sure we don't exceed the max payload size for GMS.
|
|
@@ -392,7 +543,10 @@ class DataHubRestEmitter(Closeable, Emitter):
|
|
|
392
543
|
payload = json.dumps(snapshot)
|
|
393
544
|
self._emit_generic(url, payload)
|
|
394
545
|
|
|
395
|
-
def _emit_generic(self, url: str, payload: str) ->
|
|
546
|
+
def _emit_generic(self, url: str, payload: Union[str, Any]) -> requests.Response:
|
|
547
|
+
if not isinstance(payload, str):
|
|
548
|
+
payload = json.dumps(payload)
|
|
549
|
+
|
|
396
550
|
curl_command = make_curl_command(self._session, "POST", url, payload)
|
|
397
551
|
payload_size = len(payload)
|
|
398
552
|
if payload_size > INGEST_MAX_PAYLOAD_BYTES:
|
|
@@ -408,6 +562,7 @@ class DataHubRestEmitter(Closeable, Emitter):
|
|
|
408
562
|
try:
|
|
409
563
|
response = self._session.post(url, data=payload)
|
|
410
564
|
response.raise_for_status()
|
|
565
|
+
return response
|
|
411
566
|
except HTTPError as e:
|
|
412
567
|
try:
|
|
413
568
|
info: Dict = response.json()
|
|
@@ -279,11 +279,7 @@ class ClassificationHandler:
|
|
|
279
279
|
"Dataset_Name": dataset_name,
|
|
280
280
|
}
|
|
281
281
|
),
|
|
282
|
-
values=(
|
|
283
|
-
sample_data[schema_field.fieldPath]
|
|
284
|
-
if schema_field.fieldPath in sample_data
|
|
285
|
-
else []
|
|
286
|
-
),
|
|
282
|
+
values=sample_data.get(schema_field.fieldPath, []),
|
|
287
283
|
)
|
|
288
284
|
)
|
|
289
285
|
|
|
@@ -32,7 +32,11 @@ from datahub.configuration.common import ConfigModel, GraphError, OperationalErr
|
|
|
32
32
|
from datahub.emitter.aspect import TIMESERIES_ASPECT_MAP
|
|
33
33
|
from datahub.emitter.mce_builder import DEFAULT_ENV, Aspect
|
|
34
34
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
35
|
-
from datahub.emitter.rest_emitter import
|
|
35
|
+
from datahub.emitter.rest_emitter import (
|
|
36
|
+
DEFAULT_REST_SINK_ENDPOINT,
|
|
37
|
+
DatahubRestEmitter,
|
|
38
|
+
RestSinkEndpoint,
|
|
39
|
+
)
|
|
36
40
|
from datahub.emitter.serialization_helper import post_json_transform
|
|
37
41
|
from datahub.ingestion.graph.config import (
|
|
38
42
|
DatahubClientConfig as DatahubClientConfig,
|
|
@@ -141,6 +145,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
141
145
|
ca_certificate_path=self.config.ca_certificate_path,
|
|
142
146
|
client_certificate_path=self.config.client_certificate_path,
|
|
143
147
|
disable_ssl_verification=self.config.disable_ssl_verification,
|
|
148
|
+
openapi_ingestion=DEFAULT_REST_SINK_ENDPOINT == RestSinkEndpoint.OPENAPI,
|
|
144
149
|
)
|
|
145
150
|
|
|
146
151
|
self.server_id = _MISSING_SERVER_ID
|
|
@@ -782,9 +787,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
782
787
|
results: Dict = self._post_generic(url, search_body)
|
|
783
788
|
num_entities = results["value"]["numEntities"]
|
|
784
789
|
logger.debug(f"Matched {num_entities} containers")
|
|
785
|
-
entities_yielded: int = 0
|
|
786
790
|
for x in results["value"]["entities"]:
|
|
787
|
-
entities_yielded += 1
|
|
788
791
|
logger.debug(f"yielding {x['entity']}")
|
|
789
792
|
yield x["entity"]
|
|
790
793
|
|
|
@@ -163,7 +163,7 @@ class DatahubIngestionRunSummaryProvider(PipelineRunListener):
|
|
|
163
163
|
key: DatahubIngestionRunSummaryProvider._convert_sets_to_lists(value)
|
|
164
164
|
for key, value in obj.items()
|
|
165
165
|
}
|
|
166
|
-
elif isinstance(obj, list
|
|
166
|
+
elif isinstance(obj, (list, set)):
|
|
167
167
|
return [
|
|
168
168
|
DatahubIngestionRunSummaryProvider._convert_sets_to_lists(element)
|
|
169
169
|
for element in obj
|
|
@@ -526,10 +526,8 @@ class Pipeline:
|
|
|
526
526
|
Evaluates the commit_policy for each committable in the context and triggers the commit operation
|
|
527
527
|
on the committable if its required commit policies are satisfied.
|
|
528
528
|
"""
|
|
529
|
-
has_errors: bool = (
|
|
530
|
-
|
|
531
|
-
if self.source.get_report().failures or self.sink.get_report().failures
|
|
532
|
-
else False
|
|
529
|
+
has_errors: bool = bool(
|
|
530
|
+
self.source.get_report().failures or self.sink.get_report().failures
|
|
533
531
|
)
|
|
534
532
|
has_warnings: bool = bool(
|
|
535
533
|
self.source.get_report().warnings or self.sink.get_report().warnings
|