acryl-datahub 1.2.0.11rc4__py3-none-any.whl → 1.3.0rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.11rc4.dist-info → acryl_datahub-1.3.0rc4.dist-info}/METADATA +2475 -2472
- {acryl_datahub-1.2.0.11rc4.dist-info → acryl_datahub-1.3.0rc4.dist-info}/RECORD +42 -40
- datahub/_version.py +1 -1
- datahub/cli/docker_check.py +1 -1
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +296 -0
- datahub/ingestion/api/source.py +29 -5
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +2 -2
- datahub/ingestion/source/cassandra/cassandra_profiling.py +2 -2
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/dremio/dremio_reporting.py +0 -2
- datahub/ingestion/source/dremio/dremio_source.py +2 -2
- datahub/ingestion/source/fivetran/config.py +32 -5
- datahub/ingestion/source/fivetran/fivetran.py +0 -1
- datahub/ingestion/source/fivetran/fivetran_log_api.py +13 -0
- datahub/ingestion/source/fivetran/fivetran_query.py +43 -28
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/grafana/models.py +9 -1
- datahub/ingestion/source/grafana/report.py +1 -2
- datahub/ingestion/source/hex/hex.py +0 -2
- datahub/ingestion/source/redshift/redshift.py +2 -2
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/snowflake/snowflake_report.py +0 -2
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +2 -2
- datahub/ingestion/source/sql/oracle.py +1 -1
- datahub/ingestion/source/sql/sql_common.py +25 -17
- datahub/ingestion/source/sql/teradata.py +1 -2
- datahub/ingestion/source/sql_queries.py +1 -2
- datahub/ingestion/source/tableau/tableau.py +0 -2
- datahub/ingestion/source/unity/config.py +49 -29
- datahub/ingestion/source/unity/report.py +1 -2
- datahub/ingestion/source_report/ingestion_stage.py +54 -12
- datahub/metadata/_internal_schema_classes.py +169 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/schema.avsc +101 -0
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- {acryl_datahub-1.2.0.11rc4.dist-info → acryl_datahub-1.3.0rc4.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.11rc4.dist-info → acryl_datahub-1.3.0rc4.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.2.0.11rc4.dist-info → acryl_datahub-1.3.0rc4.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.11rc4.dist-info → acryl_datahub-1.3.0rc4.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
acryl_datahub-1.
|
|
1
|
+
acryl_datahub-1.3.0rc4.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=IE1A1EIFh0zNNTxCNZuHp84ueLDs5dQeRmsZJgB-YLw,321
|
|
5
5
|
datahub/entrypoints.py,sha256=9Qf-37rNnTzbGlx8S75OCDazIclFp6zWNcCEL1zCZto,9015
|
|
6
6
|
datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -72,7 +72,7 @@ datahub/cli/cli_utils.py,sha256=0jTTAKuDZ8GzZwGHYytcT_MPR3Rb2DAcbr9n1H2T2sE,1617
|
|
|
72
72
|
datahub/cli/config_utils.py,sha256=EeBGfhmf4AxYoTfnZ4GSiGIgpzJFkduNjN_FwmxZGhA,4889
|
|
73
73
|
datahub/cli/container_cli.py,sha256=D0zWP3_3aww8_RTkMugOoOlILz3dPJ0TE9asQDLCm6E,1697
|
|
74
74
|
datahub/cli/delete_cli.py,sha256=0YJeWuXPGY0kbSn1AXK1-8SfCGBxb78ZbO53RAgyjQg,26515
|
|
75
|
-
datahub/cli/docker_check.py,sha256=
|
|
75
|
+
datahub/cli/docker_check.py,sha256=Iy5zFww7ZX-kdpBjqxW1s827DGalka2LLlxT2f1KU6g,12978
|
|
76
76
|
datahub/cli/docker_cli.py,sha256=3pzoe_qbWLhG27-M2wBU5MLLJM0xPlmC-EyoueoQEL4,33091
|
|
77
77
|
datahub/cli/env_utils.py,sha256=RQzjg4JE29hjPt4v7p-RuqoOr99w8E3DBHWiN2Sm7T4,252
|
|
78
78
|
datahub/cli/exists_cli.py,sha256=1cUYNh3GqNgVHWTrfMRGJoo9tFZNXcLetMaDbLaig6o,1233
|
|
@@ -151,13 +151,14 @@ datahub/ingestion/api/registry.py,sha256=LbdZr89465Lj7ptQRVB4vI1JR1igWABvQFj9-WX
|
|
|
151
151
|
datahub/ingestion/api/report.py,sha256=1w63Y2yN49IaDLZaIvXEjRU3yVb_9t3wzymSI-fumZM,18959
|
|
152
152
|
datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
|
|
153
153
|
datahub/ingestion/api/sink.py,sha256=bureB3_sFXNISCM4yZSqhxMHW-ctDkAQqA0lJgQhJQ4,6047
|
|
154
|
-
datahub/ingestion/api/source.py,sha256=
|
|
154
|
+
datahub/ingestion/api/source.py,sha256=SKQFnA2OTT4jcy59ae1KF_ZFa_nIyFzRohkZJKixjIk,22712
|
|
155
155
|
datahub/ingestion/api/source_helpers.py,sha256=XT9y5HgfVeF52jrX39vlLn1SdXpLVyT2Su8oGNsddYo,21148
|
|
156
|
+
datahub/ingestion/api/source_protocols.py,sha256=llWgfxDquowIovgWqfhdiS1dzUQ3Y_SmCaq501S-NLc,768
|
|
156
157
|
datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
|
|
157
158
|
datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL74lo,6327
|
|
158
159
|
datahub/ingestion/api/auto_work_units/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
159
160
|
datahub/ingestion/api/auto_work_units/auto_dataset_properties_aspect.py,sha256=ID_6N3nWl2qohsSGizUCqo3d2MNyDeVbyWroQpSOSsc,5059
|
|
160
|
-
datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py,sha256
|
|
161
|
+
datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py,sha256=-667F-xWArmeVaW-3mGsoIMNKPrnLx6KM4OfzMGsMOo,18064
|
|
161
162
|
datahub/ingestion/autogenerated/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
162
163
|
datahub/ingestion/autogenerated/capability_summary.json,sha256=9Ns5gYfVq0LYogaYtb0ioDPfu8SVhftiq9R7l0irQwg,111506
|
|
163
164
|
datahub/ingestion/autogenerated/lineage.json,sha256=8BdZF-5V5kJbX4mfFav8Zg-jHjzfkAEGk-pu1atLN4I,10029
|
|
@@ -227,7 +228,7 @@ datahub/ingestion/source/pulsar.py,sha256=u5F8QnCLJsht5-7XCiUTsnfhCPIpKVB_l32CgM
|
|
|
227
228
|
datahub/ingestion/source/redash.py,sha256=C4cDikWymbL88fDqaIPX5WA3f2sIEtH7bmhJKkmXJsM,30652
|
|
228
229
|
datahub/ingestion/source/salesforce.py,sha256=dMQ2jMu9P8r0rmREQA6KuFgAbegJ7WnHpUmMaUfHPDI,40942
|
|
229
230
|
datahub/ingestion/source/source_registry.py,sha256=a2mLjJPLkSI-gYCTb_7U7Jo4D8jGknNQ_yScPIihXFk,1208
|
|
230
|
-
datahub/ingestion/source/sql_queries.py,sha256=
|
|
231
|
+
datahub/ingestion/source/sql_queries.py,sha256=RexNsG-COCiKNyL0mfoeNoMrkXxmB5UBOSJKJnHNHy0,14563
|
|
231
232
|
datahub/ingestion/source/superset.py,sha256=qZ1SMeejwiM_ZkPlERXi3OSmabWcrsJSxZe98Eo9tqA,57866
|
|
232
233
|
datahub/ingestion/source/abs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
233
234
|
datahub/ingestion/source/abs/config.py,sha256=WW9JWbzqAJDblAcJKtNeuBHqOeJsB57lW2PqSD65-BU,6729
|
|
@@ -266,9 +267,9 @@ datahub/ingestion/source/bigquery_v2/bigquery_data_reader.py,sha256=DeT3v_Z82__8
|
|
|
266
267
|
datahub/ingestion/source/bigquery_v2/bigquery_helper.py,sha256=QER3gY8e_k1_eNVj7cBso7ZzrWl_vO5PYSa6CpvqNx8,1554
|
|
267
268
|
datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256=9_sfX8BE2vt9RjBMyq27UxCxBaSlD5o3L4gQxrwlPvA,4961
|
|
268
269
|
datahub/ingestion/source/bigquery_v2/bigquery_queries.py,sha256=2syDMaRpYEbtGUVejVAK5d6g8HqM54ZyEM908uLJ55o,3393
|
|
269
|
-
datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=
|
|
270
|
+
datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=zlTkqOmt5zxnO40rVTYHF3fclj4OVlLtqUXwW5WIIcM,7855
|
|
270
271
|
datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=zbYb1EYnCJxgvsU8oT_76l0q_BW1exVjMWM1GAgd1nc,32600
|
|
271
|
-
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=
|
|
272
|
+
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=_NLFRRXsrxMZ8Vjg2jVL4Pg1_NGt9hzn9EWBooJZ8so,51566
|
|
272
273
|
datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
|
|
273
274
|
datahub/ingestion/source/bigquery_v2/common.py,sha256=IinOy-RO4UZGxSf5scaN02672BzZuNsjJZ56axti6iI,4016
|
|
274
275
|
datahub/ingestion/source/bigquery_v2/lineage.py,sha256=jju14mJbAUMA_K3j2yq-TdZV202cjd5rBAsDPJGEVno,44900
|
|
@@ -280,8 +281,8 @@ datahub/ingestion/source/cassandra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQe
|
|
|
280
281
|
datahub/ingestion/source/cassandra/cassandra.py,sha256=pNy61Z4kTqL_wGcWIYee5fnZiuJDseDcRcQwsxeAssk,14487
|
|
281
282
|
datahub/ingestion/source/cassandra/cassandra_api.py,sha256=wCJx-1ZByGMgPkORBO420sGucKkxXXE4pOLWXxdpMIw,14222
|
|
282
283
|
datahub/ingestion/source/cassandra/cassandra_config.py,sha256=w9LBiT8XrGvXlrvpcAU_xm82GiE4nUfEg-VKIX6MRMY,4446
|
|
283
|
-
datahub/ingestion/source/cassandra/cassandra_profiling.py,sha256=
|
|
284
|
-
datahub/ingestion/source/cassandra/cassandra_utils.py,sha256
|
|
284
|
+
datahub/ingestion/source/cassandra/cassandra_profiling.py,sha256=nNQwjParCnvhT9nF-uwGtKmAR0dBS9eqAxfknV1CKiA,11022
|
|
285
|
+
datahub/ingestion/source/cassandra/cassandra_utils.py,sha256=-BsrK1R5jCQs-kUJYVj1x5zm_rG4teCYwS_r-OT3mCE,5002
|
|
285
286
|
datahub/ingestion/source/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
286
287
|
datahub/ingestion/source/common/data_platforms.py,sha256=HhuP3YIEi2WpyKDjUU8RiM0a2qjHWQcvc8kcqub0cVo,548
|
|
287
288
|
datahub/ingestion/source/common/data_reader.py,sha256=XbSxiRTYrk6seOz0ZjVjzSpGvP8lEjmqXrNI4cdYYmQ,1819
|
|
@@ -319,8 +320,8 @@ datahub/ingestion/source/dremio/dremio_config.py,sha256=6Re-CIkLxi90VfBCeUTZ4bBv
|
|
|
319
320
|
datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py,sha256=MQk8BAHLufN69CntFfOV8K59A_AvLC-vwMS33Jw8bBg,3069
|
|
320
321
|
datahub/ingestion/source/dremio/dremio_entities.py,sha256=1gZrNqTp3Pm6vqGDQaWt3HkxEuHKxpGYQ4geVoFvxWI,15147
|
|
321
322
|
datahub/ingestion/source/dremio/dremio_profiling.py,sha256=TAcnpo8ZRKhLDHnQSJzJg3YdwTSyEa73LUAzENs7wG4,12287
|
|
322
|
-
datahub/ingestion/source/dremio/dremio_reporting.py,sha256=
|
|
323
|
-
datahub/ingestion/source/dremio/dremio_source.py,sha256=
|
|
323
|
+
datahub/ingestion/source/dremio/dremio_reporting.py,sha256=UEj-6FMdIWsry5535_kM2hLze5aPRMatTwvI0Bd2BSo,2140
|
|
324
|
+
datahub/ingestion/source/dremio/dremio_source.py,sha256=JFgzQiYcIkKcyiTJstYdfNxIVCGAcbi198kPQ55bYso,25596
|
|
324
325
|
datahub/ingestion/source/dremio/dremio_sql_queries.py,sha256=wA1hqKk9cKMJDyEdZRQcDDLZPGYwuNqrvleUHTkWgrQ,10508
|
|
325
326
|
datahub/ingestion/source/dynamodb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
326
327
|
datahub/ingestion/source/dynamodb/data_reader.py,sha256=vC77KpcP8LJN0g8wsPRDVw4sebv0ZWIP3tJkEIHaomA,3120
|
|
@@ -333,13 +334,13 @@ datahub/ingestion/source/excel/report.py,sha256=oEkeI8J6is7zB9iz4RqASu_-Q5xl36lA
|
|
|
333
334
|
datahub/ingestion/source/excel/source.py,sha256=w_vOz4UD7BcXBBDKoo81_6-QFeOPITuXqkfjIMHCQj4,23827
|
|
334
335
|
datahub/ingestion/source/excel/util.py,sha256=YYmadYuCiT-4_MfQM0YSE7wuDcE0k8o2KrlOKM9Z6eI,406
|
|
335
336
|
datahub/ingestion/source/fivetran/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
336
|
-
datahub/ingestion/source/fivetran/config.py,sha256=
|
|
337
|
+
datahub/ingestion/source/fivetran/config.py,sha256=vNmnQM3oekr2dOLPria-wjCLmp27bcYypIfoA6xx5k8,10290
|
|
337
338
|
datahub/ingestion/source/fivetran/data_classes.py,sha256=ecdUJH5BEze0yv-uFpKWPNaNmV1gORDA2XMFk0zhcBw,595
|
|
338
|
-
datahub/ingestion/source/fivetran/fivetran.py,sha256=
|
|
339
|
-
datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256
|
|
340
|
-
datahub/ingestion/source/fivetran/fivetran_query.py,sha256=
|
|
339
|
+
datahub/ingestion/source/fivetran/fivetran.py,sha256=Up5wbLk7hBk9b0pqcHwW6b0H52UJj90cmLhn0QJeZ4g,14416
|
|
340
|
+
datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=-ibtfgxFv08P5_X5PVqV4CocxAjRWmY858esQL5OaAQ,13697
|
|
341
|
+
datahub/ingestion/source/fivetran/fivetran_query.py,sha256=VJTka6cdIzlqy0aWyviMO2uSHcL0ZQFTCefUnnjv_Bk,6578
|
|
341
342
|
datahub/ingestion/source/gc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
342
|
-
datahub/ingestion/source/gc/datahub_gc.py,sha256=
|
|
343
|
+
datahub/ingestion/source/gc/datahub_gc.py,sha256=nJ6QbHnTPL0MamWxNTZA26FMZsnmvUdUwnQcDS81K9s,12723
|
|
343
344
|
datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=mUWcMt-_FL1SYGIgI4lGZDZGXspUUTv__5GN1W2oJ3s,17118
|
|
344
345
|
datahub/ingestion/source/gc/execution_request_cleanup.py,sha256=y-9ZIs_DZPUzYH1CI6HmaAZg3olNNA7MjT8HrCqAI0k,11159
|
|
345
346
|
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=4-qQR_2HGIYU8kC2hRIsJyKKMb9lKq4B6paJm_abUk4,12628
|
|
@@ -355,13 +356,13 @@ datahub/ingestion/source/grafana/grafana_api.py,sha256=pSbaHGPQ4HajCyVnvfEjl9zr2
|
|
|
355
356
|
datahub/ingestion/source/grafana/grafana_config.py,sha256=Xyt4jY9vKJuu99PX6cpOZ3WANTn00aUTYvItIPVC6XQ,3683
|
|
356
357
|
datahub/ingestion/source/grafana/grafana_source.py,sha256=tUYW0bXvnwhfBpSRQh4KobhOThXhezdH4hy2IPBkgag,21727
|
|
357
358
|
datahub/ingestion/source/grafana/lineage.py,sha256=qDWCiceOotVApOpcGhRK9OTqyRJIPqXcJi6CKnfK8z0,7178
|
|
358
|
-
datahub/ingestion/source/grafana/models.py,sha256=
|
|
359
|
-
datahub/ingestion/source/grafana/report.py,sha256=
|
|
359
|
+
datahub/ingestion/source/grafana/models.py,sha256=cOEYJ5DpmGkQKxqBq3iQE6D7aFLRGjGY6cbSP8gkFss,4676
|
|
360
|
+
datahub/ingestion/source/grafana/report.py,sha256=gNXKwGYCO6PLiqiM1K_Hv11vJuzxqcxiPNWdCY6dKNQ,2860
|
|
360
361
|
datahub/ingestion/source/grafana/types.py,sha256=Bz0-FIPBXHaBjfFHYGJhE20c2vYZwAsXr70MVGjSu6s,443
|
|
361
362
|
datahub/ingestion/source/hex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
362
363
|
datahub/ingestion/source/hex/api.py,sha256=rKr6GmhIs98_SQZYG4egZlS049WH4ZxW2-7Ueq8692Q,12940
|
|
363
364
|
datahub/ingestion/source/hex/constants.py,sha256=8hUTMWyG5keTNfXoLu_Dh413Hw_mGGJX1atiiDZyKtg,271
|
|
364
|
-
datahub/ingestion/source/hex/hex.py,sha256=
|
|
365
|
+
datahub/ingestion/source/hex/hex.py,sha256=NaMibmvFXoCD84G0-asEDILbiluZyulbZZg23moP0hI,13328
|
|
365
366
|
datahub/ingestion/source/hex/mapper.py,sha256=IyDAE-TzZUji3ICI_9gkYC3dQN3gl6kERRWNVRk80fQ,13905
|
|
366
367
|
datahub/ingestion/source/hex/model.py,sha256=eri4aRo1eXcE2SWjzCnPFMhzPTiJ8w8zC4GN7Lgpr74,1864
|
|
367
368
|
datahub/ingestion/source/hex/query_fetcher.py,sha256=r9UvF_qwswkRlNY7AI8p46eqAYSxVtjVE2e7eO4XagA,13384
|
|
@@ -451,10 +452,10 @@ datahub/ingestion/source/redshift/exception.py,sha256=dxzYUIv5B_FAWhOuzG2u5We7FX
|
|
|
451
452
|
datahub/ingestion/source/redshift/lineage.py,sha256=nqrvWJqaI493i1hIZ_7patrdOb16sZrgSSGapdMcEiU,31710
|
|
452
453
|
datahub/ingestion/source/redshift/profile.py,sha256=H1Xtc2rXScUv4w0b2BbM7POjYEwqIql_rpWvlumY_EM,4309
|
|
453
454
|
datahub/ingestion/source/redshift/query.py,sha256=HKobQ-0crARgT8Mkfe-WBqVR9ZadYCZ9DGaUoEHHHww,48234
|
|
454
|
-
datahub/ingestion/source/redshift/redshift.py,sha256=
|
|
455
|
+
datahub/ingestion/source/redshift/redshift.py,sha256=RN8rao3j7nocnnD6oPcEju09-8mOZTE4vFkgy_13Az8,41293
|
|
455
456
|
datahub/ingestion/source/redshift/redshift_data_reader.py,sha256=zc69jwXHdF-w8J4Hq-ZQ6BjHQ75Ij2iNDMpoRJlcmlU,1724
|
|
456
457
|
datahub/ingestion/source/redshift/redshift_schema.py,sha256=7F-l_omOuKMuGE_rBWXVPG_GWXFKnCMzC4frNxZB9cs,24800
|
|
457
|
-
datahub/ingestion/source/redshift/report.py,sha256=
|
|
458
|
+
datahub/ingestion/source/redshift/report.py,sha256=aCFDFUbz5xde8b_eRIHSBiELoo9LZFtDpp2lSadiPHU,2937
|
|
458
459
|
datahub/ingestion/source/redshift/usage.py,sha256=Q7R-caJovLXv33uZepMGX5Cvm4DqQSLZdiL_s-p06wU,17473
|
|
459
460
|
datahub/ingestion/source/s3/__init__.py,sha256=HjqFPj11WtNFZM3kcVshlDb7kOsc19-l_3LM8PBjlJM,56
|
|
460
461
|
datahub/ingestion/source/s3/config.py,sha256=lElFXgEpKDT9SVoiXvtx98wV6Gp880qP4pLQaOGJGOo,7828
|
|
@@ -500,9 +501,9 @@ datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=nam-bYV6wL9LfR
|
|
|
500
501
|
datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=PmQi-qDlRhdJ-PsJ7x-EScIiswWRAxDDOKHydvN3mTY,7404
|
|
501
502
|
datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=lAMA--X3nbWFdNs1DTHNm7crctB3RilX_pB-zy47piI,45528
|
|
502
503
|
datahub/ingestion/source/snowflake/snowflake_query.py,sha256=wLDaYZrWJ0794KKn69rB_QF0_8Bzu5l_7L6mD77KVc4,40469
|
|
503
|
-
datahub/ingestion/source/snowflake/snowflake_report.py,sha256=
|
|
504
|
+
datahub/ingestion/source/snowflake/snowflake_report.py,sha256=fA6C-p9wM-jyTsXE_suTbCtrE_lle-5LI52S7wFYf00,6701
|
|
504
505
|
datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=Dh_p0WpSaWOFMCsBgd3FpkQ_65k3QPq2VUnapp2VKuY,41431
|
|
505
|
-
datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256
|
|
506
|
+
datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=-JBfYgAXKMbVYu3f2viQoOQ0O2wv6GWfw1giOiETbpc,59091
|
|
506
507
|
datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=maZyFkfrbVogEFM0tTKRiNp9c_1muv6YfleSd3q0umI,6341
|
|
507
508
|
datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=5Li4H8KuS4qBKR98L2P-JZI79UXsOjcAFxZZyio9NU0,5787
|
|
508
509
|
datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=eA9xh-G1Ydr1OwUUtrbXUWp26hE1jF0zvyKNky_i_nQ,8887
|
|
@@ -521,10 +522,10 @@ datahub/ingestion/source/sql/hive.py,sha256=SPmAWlk63V-s-loBTU2hXsQA7xA4sa0iPK6p
|
|
|
521
522
|
datahub/ingestion/source/sql/hive_metastore.py,sha256=UBB7mV2eKuCxv3voi0F3tqF2MyRObSYxArAxETZfO4E,35997
|
|
522
523
|
datahub/ingestion/source/sql/mariadb.py,sha256=om6QoG5UtDldt1N6AfIWp3T-HXNaaqFmpz2i0JAemfM,654
|
|
523
524
|
datahub/ingestion/source/sql/mysql.py,sha256=_KhTODU7mqAoJOlrvRdPa7ihQkYLkgrZwaseQbasotM,5358
|
|
524
|
-
datahub/ingestion/source/sql/oracle.py,sha256=
|
|
525
|
+
datahub/ingestion/source/sql/oracle.py,sha256=nKMM1O67SkxCgT781eENl5xXpIR8_p5joTSdAYzQwHY,29988
|
|
525
526
|
datahub/ingestion/source/sql/postgres.py,sha256=blkO6bI0eDKFK8UNwUYcYtm_ObrQuWVSy5GyfdhL5dg,14274
|
|
526
527
|
datahub/ingestion/source/sql/presto.py,sha256=58py4M3UYxkGpbBFA1o96H154eUhD2dBm1hpxxYlYYM,4256
|
|
527
|
-
datahub/ingestion/source/sql/sql_common.py,sha256=
|
|
528
|
+
datahub/ingestion/source/sql/sql_common.py,sha256=EZGoeGlOYZoOrXOiKDI-S1mw-sPVV33PZQ_mPJlEvRc,57759
|
|
528
529
|
datahub/ingestion/source/sql/sql_config.py,sha256=u3nGZYYl1WtaxfNsDU5bglgZ5Jq3Fxk9xei_CUIAXB0,8222
|
|
529
530
|
datahub/ingestion/source/sql/sql_generic.py,sha256=9AERvkK8kdJUeDOzCYJDb93xdv6Z4DGho0NfeHj5Uyg,2740
|
|
530
531
|
datahub/ingestion/source/sql/sql_generic_profiler.py,sha256=Zr39j4SI1fPTx1JdopVJyBslFnyp3lZCeb1th9eEB5c,11723
|
|
@@ -534,7 +535,7 @@ datahub/ingestion/source/sql/sql_utils.py,sha256=q-Bsk6WxlsRtrw9RXBxvqI3zuaMTC_F
|
|
|
534
535
|
datahub/ingestion/source/sql/sqlalchemy_data_reader.py,sha256=FvHZ4JEK3aR2DYOBZiT_ZsAy12RjTu4t_KIR_92B11k,2644
|
|
535
536
|
datahub/ingestion/source/sql/sqlalchemy_uri.py,sha256=u0ZvgdJjXZdo_vl7YIQfYuuWbGwpnH6OSozI2e8ZV4I,858
|
|
536
537
|
datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py,sha256=KOpbmDIE2h1hyYEsbVHJi2B7FlsyUMTXZx4diyzltQg,1826
|
|
537
|
-
datahub/ingestion/source/sql/teradata.py,sha256=
|
|
538
|
+
datahub/ingestion/source/sql/teradata.py,sha256=Cij6ZKkkXoJaaSOdQQzBI0QtCC7lGRgUuT9TdC5_YTM,65507
|
|
538
539
|
datahub/ingestion/source/sql/trino.py,sha256=o5hm84iwRHO59TD2LaEqYgF2LYIcSUIKmlgu1VudGBY,19254
|
|
539
540
|
datahub/ingestion/source/sql/two_tier_sql_source.py,sha256=xlpQ9thbEn6uta6eVMzsHzmXFGg30VWInYwg0URgTK4,5784
|
|
540
541
|
datahub/ingestion/source/sql/vertica.py,sha256=LUpIzDpzFRtMcXCSG2EC5CBZbveBOvnB27P_pDrXC9o,33501
|
|
@@ -560,14 +561,14 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
|
|
|
560
561
|
datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=DziD57PbHn2Tcy51tYXCG-GQgyTGMUxnkuzVS_xihFY,4079
|
|
561
562
|
datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
|
|
562
563
|
datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
563
|
-
datahub/ingestion/source/tableau/tableau.py,sha256=
|
|
564
|
+
datahub/ingestion/source/tableau/tableau.py,sha256=OpAiVincp7x4XNHHB8HJWTG6By8cDQOgqa1vhg-78w4,158341
|
|
564
565
|
datahub/ingestion/source/tableau/tableau_common.py,sha256=2vE7DIigPvMNcTCWSou0tliaVy9MgFR1qwqnE4pilw8,27086
|
|
565
566
|
datahub/ingestion/source/tableau/tableau_constant.py,sha256=2WPAHN-GAR83_c3eTTNd8cy0-zC8GIXeUdSxX_mNdas,2608
|
|
566
567
|
datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=wsVD0SkGUwb-H9_g0aDclKwYkcoxugaWyAcyAMgBCAU,1136
|
|
567
568
|
datahub/ingestion/source/tableau/tableau_validation.py,sha256=Hjbfc1AMIkGgzo5ffWXtNRjrxSxzHvw7-dYZDt4d3WE,1819
|
|
568
569
|
datahub/ingestion/source/unity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
569
570
|
datahub/ingestion/source/unity/analyze_profiler.py,sha256=2pqkFY30CfN4aHgFZZntjeG0hNhBytZJvXC13VfTc1I,4689
|
|
570
|
-
datahub/ingestion/source/unity/config.py,sha256=
|
|
571
|
+
datahub/ingestion/source/unity/config.py,sha256=A5lkm-koBDOnBSSCTzOvYlsSIxT-xbK3NcJMS6xJMaQ,20914
|
|
571
572
|
datahub/ingestion/source/unity/connection_test.py,sha256=Dwpz4AIc6ZDwq6pWmRCSCuDUgNjPP_bVAVJumgAAS4w,2661
|
|
572
573
|
datahub/ingestion/source/unity/ge_profiler.py,sha256=NBRHZceq-f95iUn7u0h7cgcd9nAc48Aa-lmp_BqE0As,8409
|
|
573
574
|
datahub/ingestion/source/unity/hive_metastore_proxy.py,sha256=IAWWJjaW0si_UF52Se2D7wmdYRY_afUG4QlVmQu6xaw,15351
|
|
@@ -576,7 +577,7 @@ datahub/ingestion/source/unity/proxy.py,sha256=7TG1B9vdVdM3mmVkHDaLv2AXFfMkx1o8g
|
|
|
576
577
|
datahub/ingestion/source/unity/proxy_patch.py,sha256=gVYl5Fm_ase0iwBf3yDg7PE3bbTOl92RR-JgofHNkus,12374
|
|
577
578
|
datahub/ingestion/source/unity/proxy_profiling.py,sha256=WLqvYP6MziaisA4LYL4T_GA-kPt6Xdde7bfaYsjYw40,9663
|
|
578
579
|
datahub/ingestion/source/unity/proxy_types.py,sha256=dp7fRqIjaFCn6ivbgXOGHcw9bQQhZg6u-fdTK053oFM,10163
|
|
579
|
-
datahub/ingestion/source/unity/report.py,sha256=
|
|
580
|
+
datahub/ingestion/source/unity/report.py,sha256=wa5ER1hLc-362iWS9MNwDJf_adfAb3tfge_O3wSAaTc,3291
|
|
580
581
|
datahub/ingestion/source/unity/source.py,sha256=fC8pgacaXr9AQlwmz9FynJMJgNQ9MsDFbxUGxQRVtWo,57897
|
|
581
582
|
datahub/ingestion/source/unity/tag_entities.py,sha256=-Z-XYc1XhquE-Eoksn9v0o11ZjV9CWz8n6zeXLbzluQ,7275
|
|
582
583
|
datahub/ingestion/source/unity/usage.py,sha256=0wETBAaZvHI_EGgBlxX3bKsVHEAdnUV8_bKI_lbyWjY,11500
|
|
@@ -593,7 +594,7 @@ datahub/ingestion/source_config/csv_enricher.py,sha256=IROxxfFJA56dHkmmbjjhb7h1p
|
|
|
593
594
|
datahub/ingestion/source_config/operation_config.py,sha256=hxF2RM0jk0HUPXYiliMniXBC-wz-ZPcs90ZGLfHT8rE,3924
|
|
594
595
|
datahub/ingestion/source_config/pulsar.py,sha256=zi3QTAw8CzzuwXgU-GUCuLyneT5pxHsLqZFyd15ECYs,5604
|
|
595
596
|
datahub/ingestion/source_report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
596
|
-
datahub/ingestion/source_report/ingestion_stage.py,sha256=
|
|
597
|
+
datahub/ingestion/source_report/ingestion_stage.py,sha256=0MY39QetRovYd1iBNSy0OW11YyaOsPaqhQi-1svmAcY,3106
|
|
597
598
|
datahub/ingestion/source_report/pulsar.py,sha256=f6CMNw8TyPp3tuSGsLLPEhSvoQLXwxtaaM6GmNvsANU,1119
|
|
598
599
|
datahub/ingestion/source_report/time_window.py,sha256=9yI5l2S1DcF7ClvUHLeN8m62I5vlhV9k-aQqSZh2l7w,229
|
|
599
600
|
datahub/ingestion/transformer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -642,8 +643,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
|
|
|
642
643
|
datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
|
|
643
644
|
datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
|
|
644
645
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
645
|
-
datahub/metadata/_internal_schema_classes.py,sha256=
|
|
646
|
-
datahub/metadata/schema.avsc,sha256=
|
|
646
|
+
datahub/metadata/_internal_schema_classes.py,sha256=1UZsNj9XmThYFXbG39BVKlaTTFywzayhVVon6svD3kM,1076970
|
|
647
|
+
datahub/metadata/schema.avsc,sha256=P6j7fiukfv03ZW8gis3m3mVKGlSV2JhgMcmrtf5sU7Q,775491
|
|
647
648
|
datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
|
|
648
649
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
649
650
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
@@ -704,7 +705,7 @@ datahub/metadata/com/linkedin/pegasus2avro/ownership/__init__.py,sha256=r813MW_b
|
|
|
704
705
|
datahub/metadata/com/linkedin/pegasus2avro/persona/__init__.py,sha256=Y0iT9AeLsLAVzbcXk1hlqqUSPzSjNwn6DqvKxtsLq6A,305
|
|
705
706
|
datahub/metadata/com/linkedin/pegasus2avro/platform/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
|
|
706
707
|
datahub/metadata/com/linkedin/pegasus2avro/platform/event/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
|
|
707
|
-
datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py,sha256=
|
|
708
|
+
datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py,sha256=r4N_gf1o1AUux8hE1e_y6Aq27DaVOMF5U8GuATGlCuY,631
|
|
708
709
|
datahub/metadata/com/linkedin/pegasus2avro/platformresource/__init__.py,sha256=O8Zf6PhoTHFyf6hzAyqYASRyfFfdZiuvpLx7Ygw73WE,415
|
|
709
710
|
datahub/metadata/com/linkedin/pegasus2avro/policy/__init__.py,sha256=NOszM_xottEFQwmlDYkdci1oRddIadcNKd9w5EvuikU,917
|
|
710
711
|
datahub/metadata/com/linkedin/pegasus2avro/post/__init__.py,sha256=FFiWH2BSo057ZwsnFKjpCoAMo9szvC7Vn_RzGTSLnY0,518
|
|
@@ -926,6 +927,7 @@ datahub/metadata/schemas/QueryKey.avsc,sha256=VI4oIHvAO7f0lN_7V3QVuBfHcPz31c57Xt
|
|
|
926
927
|
datahub/metadata/schemas/QueryProperties.avsc,sha256=26Q3zzuzJbUCUG7IJ3q_OEdcNbyzloZzDJWPbv3GQAk,5589
|
|
927
928
|
datahub/metadata/schemas/QuerySubjects.avsc,sha256=PDM6Ek1AkjwLGDk17Rjg7KVtE2tbgFpc1XTGkhoG4MA,1437
|
|
928
929
|
datahub/metadata/schemas/QueryUsageStatistics.avsc,sha256=z1gfAnXdBoPEeERi5RESjrdBuS6AcIdqdN5JqWOSuNo,6192
|
|
930
|
+
datahub/metadata/schemas/RelationshipChangeEvent.avsc,sha256=il7yiTXf9nkZvSyqH8GUu0j2jws_n-QlABXThLJ871c,8288
|
|
929
931
|
datahub/metadata/schemas/RoleKey.avsc,sha256=Uas5jFViSHXhFqq8D4P6-UXqywOth3coztjQ5wA7wL0,449
|
|
930
932
|
datahub/metadata/schemas/RoleMembership.avsc,sha256=Al3LXKRowCiHhgTfwr3a-piID3Ld5kN-6-e9edTZ0uU,570
|
|
931
933
|
datahub/metadata/schemas/RoleProperties.avsc,sha256=tDw-WF1uBGIcrk38nOnXs3FCF_YjBhScarJbreQvwjE,3037
|
|
@@ -940,7 +942,7 @@ datahub/metadata/schemas/Status.avsc,sha256=rPZSXSJdwnNywqNx2qll8cdt54aYgI-YUbRr
|
|
|
940
942
|
datahub/metadata/schemas/StructuredProperties.avsc,sha256=qe45sKZ9XrLcf15Gt03Ttzt2J_kJYHvN-DAOSErSYuY,7028
|
|
941
943
|
datahub/metadata/schemas/StructuredPropertyDefinition.avsc,sha256=a-6TaOQ4A7LDFLshmaFRBcXjz11p4vM0Q3X35GN4Zo0,11737
|
|
942
944
|
datahub/metadata/schemas/StructuredPropertyKey.avsc,sha256=lp7tQBgeriEU1YMQ6a4-6aUGSWDqNl00lLDym97j1yI,618
|
|
943
|
-
datahub/metadata/schemas/StructuredPropertySettings.avsc,sha256=
|
|
945
|
+
datahub/metadata/schemas/StructuredPropertySettings.avsc,sha256=oHPbDuV_Wr_zR9vmTwZZKRzeaBx5nyqN1zn0cfIRhuQ,4044
|
|
944
946
|
datahub/metadata/schemas/SubTypes.avsc,sha256=bhXbzK020zDyQno97Xp05vmoMeZ82IGu2jz7pWDo3RQ,655
|
|
945
947
|
datahub/metadata/schemas/SystemMetadata.avsc,sha256=XEU32-oZsyVwMii-DlQSVDaUTfKQ9n7K0ChMJ07KHvQ,4457
|
|
946
948
|
datahub/metadata/schemas/TagKey.avsc,sha256=BfckMlx-wg_LV1_PFVgItfNBPtCQ8_erGeQM4LzOXmY,640
|
|
@@ -1123,8 +1125,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1123
1125
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1124
1126
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1125
1127
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1126
|
-
acryl_datahub-1.
|
|
1127
|
-
acryl_datahub-1.
|
|
1128
|
-
acryl_datahub-1.
|
|
1129
|
-
acryl_datahub-1.
|
|
1130
|
-
acryl_datahub-1.
|
|
1128
|
+
acryl_datahub-1.3.0rc4.dist-info/METADATA,sha256=nsoIeC_TnJAbmM1wNUIO_dpEiFFZR2xr1AiwJzuJnqk,184332
|
|
1129
|
+
acryl_datahub-1.3.0rc4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
1130
|
+
acryl_datahub-1.3.0rc4.dist-info/entry_points.txt,sha256=pzsBoTx-D-iTcmpX8oCGCyzlHP2112EygUMzZWz56M8,10105
|
|
1131
|
+
acryl_datahub-1.3.0rc4.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1132
|
+
acryl_datahub-1.3.0rc4.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
datahub/cli/docker_check.py
CHANGED
|
@@ -13,7 +13,7 @@ import yaml
|
|
|
13
13
|
from datahub.configuration.common import ExceptionWithProps
|
|
14
14
|
|
|
15
15
|
# Docker seems to under-report memory allocated, so we also need a bit of buffer to account for it.
|
|
16
|
-
MIN_MEMORY_NEEDED = 4 # GB
|
|
16
|
+
MIN_MEMORY_NEEDED = 4.3 # GB
|
|
17
17
|
MIN_DISK_SPACE_NEEDED = 13 # GB
|
|
18
18
|
|
|
19
19
|
DOCKER_COMPOSE_PROJECT_NAME = os.getenv("DATAHUB_COMPOSE_PROJECT_NAME", "datahub")
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
import os
|
|
3
4
|
from typing import TYPE_CHECKING, Iterable, List
|
|
4
5
|
|
|
5
6
|
from datahub.emitter.rest_emitter import INGEST_MAX_PAYLOAD_BYTES
|
|
@@ -7,15 +8,36 @@ from datahub.emitter.serialization_helper import pre_json_transform
|
|
|
7
8
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
8
9
|
from datahub.metadata.schema_classes import (
|
|
9
10
|
DatasetProfileClass,
|
|
11
|
+
QueryPropertiesClass,
|
|
12
|
+
QuerySubjectsClass,
|
|
10
13
|
SchemaFieldClass,
|
|
11
14
|
SchemaMetadataClass,
|
|
15
|
+
UpstreamLineageClass,
|
|
12
16
|
)
|
|
13
17
|
|
|
14
18
|
if TYPE_CHECKING:
|
|
15
19
|
from datahub.ingestion.api.source import SourceReport
|
|
16
20
|
|
|
21
|
+
|
|
22
|
+
# TODO: ordering
|
|
23
|
+
# In the cases where we trim collections of data (e.g. fields in schema, upstream lineage, query subjects), given
|
|
24
|
+
# those collections are typically unordered, we should consider sorting them by some criteria (e.g. size, alphabetically)
|
|
25
|
+
# so that the trimming is deterministic and predictable and more importantly consistent across executions.
|
|
26
|
+
# In the case of schemaMetadata, that's more relevant as currently we may be trimming fields while adding nested ones,
|
|
27
|
+
# which may lead to poorly schema rendering in the UI.
|
|
28
|
+
|
|
17
29
|
logger = logging.getLogger(__name__)
|
|
18
30
|
|
|
31
|
+
DEFAULT_QUERY_PROPERTIES_STATEMENT_MAX_PAYLOAD_BYTES = 5 * 1024 * 1024 # 5MB
|
|
32
|
+
QUERY_PROPERTIES_STATEMENT_MAX_PAYLOAD_BYTES = int(
|
|
33
|
+
os.environ.get(
|
|
34
|
+
"QUERY_PROPERTIES_STATEMENT_MAX_PAYLOAD_BYTES",
|
|
35
|
+
DEFAULT_QUERY_PROPERTIES_STATEMENT_MAX_PAYLOAD_BYTES,
|
|
36
|
+
)
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
QUERY_STATEMENT_TRUNCATION_BUFFER = 100
|
|
40
|
+
|
|
19
41
|
|
|
20
42
|
class EnsureAspectSizeProcessor:
|
|
21
43
|
def __init__(
|
|
@@ -81,6 +103,274 @@ class EnsureAspectSizeProcessor:
|
|
|
81
103
|
|
|
82
104
|
schema.fields = accepted_fields
|
|
83
105
|
|
|
106
|
+
def ensure_query_subjects_size(
|
|
107
|
+
self, entity_urn: str, query_subjects: QuerySubjectsClass
|
|
108
|
+
) -> None:
|
|
109
|
+
"""
|
|
110
|
+
Ensure query subjects aspect does not exceed allowed size by removing column-level lineage first,
|
|
111
|
+
then table lineage if necessary.
|
|
112
|
+
"""
|
|
113
|
+
if not query_subjects.subjects:
|
|
114
|
+
return
|
|
115
|
+
|
|
116
|
+
total_subjects_size = 0
|
|
117
|
+
accepted_table_level_subjects = []
|
|
118
|
+
accepted_column_level_subjects = []
|
|
119
|
+
column_level_subjects_with_sizes = []
|
|
120
|
+
table_level_subjects_with_sizes = []
|
|
121
|
+
|
|
122
|
+
# Separate column-level and table-level subjects
|
|
123
|
+
for subject in query_subjects.subjects:
|
|
124
|
+
subject_size = len(json.dumps(pre_json_transform(subject.to_obj())))
|
|
125
|
+
|
|
126
|
+
if subject.entity.startswith("urn:li:schemaField:"):
|
|
127
|
+
column_level_subjects_with_sizes.append((subject, subject_size))
|
|
128
|
+
else:
|
|
129
|
+
table_level_subjects_with_sizes.append((subject, subject_size))
|
|
130
|
+
|
|
131
|
+
# Once we find one that doesn't fit, stop everything else to prevent inconsistencies
|
|
132
|
+
first_skip_done = False
|
|
133
|
+
|
|
134
|
+
# First, try to include all table-level subjects
|
|
135
|
+
for subject, subject_size in table_level_subjects_with_sizes:
|
|
136
|
+
if total_subjects_size + subject_size < self.payload_constraint:
|
|
137
|
+
accepted_table_level_subjects.append(subject)
|
|
138
|
+
total_subjects_size += subject_size
|
|
139
|
+
else:
|
|
140
|
+
first_skip_done = True
|
|
141
|
+
break
|
|
142
|
+
|
|
143
|
+
# Then, add column-level subjects if there's remaining space
|
|
144
|
+
# Only process if we successfully included all table-level subjects
|
|
145
|
+
if not first_skip_done:
|
|
146
|
+
for subject, subject_size in column_level_subjects_with_sizes:
|
|
147
|
+
if total_subjects_size + subject_size < self.payload_constraint:
|
|
148
|
+
accepted_column_level_subjects.append(subject)
|
|
149
|
+
total_subjects_size += subject_size
|
|
150
|
+
else:
|
|
151
|
+
first_skip_done = True
|
|
152
|
+
break
|
|
153
|
+
|
|
154
|
+
if first_skip_done:
|
|
155
|
+
# Log aggregate warnings
|
|
156
|
+
table_level_skipped_count = len(table_level_subjects_with_sizes) - len(
|
|
157
|
+
accepted_table_level_subjects
|
|
158
|
+
)
|
|
159
|
+
column_level_skipped_count = len(column_level_subjects_with_sizes) - len(
|
|
160
|
+
accepted_column_level_subjects
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
self._maybe_warn_query_subjects(
|
|
164
|
+
entity_urn, table_level_skipped_count, "table-level lineage subjects"
|
|
165
|
+
)
|
|
166
|
+
self._maybe_warn_query_subjects(
|
|
167
|
+
entity_urn, column_level_skipped_count, "column-level lineage subjects"
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
query_subjects.subjects = (
|
|
171
|
+
accepted_table_level_subjects + accepted_column_level_subjects
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
def _maybe_warn_query_subjects(
|
|
175
|
+
self, entity_urn: str, skipped_count: int, item_type: str
|
|
176
|
+
) -> None:
|
|
177
|
+
"""Log warning for query subjects truncation if any items were skipped."""
|
|
178
|
+
if skipped_count > 0:
|
|
179
|
+
self.report.warning(
|
|
180
|
+
title="Query subjects truncated due to size constraint",
|
|
181
|
+
message="Query subjects contained too much data and would have caused ingestion to fail",
|
|
182
|
+
context=f"Skipped {skipped_count} {item_type} for {entity_urn} due to aspect size constraints",
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
def _maybe_warn_upstream_lineage(
|
|
186
|
+
self, entity_urn: str, skipped_count: int, item_type: str
|
|
187
|
+
) -> None:
|
|
188
|
+
"""Log warning for upstream lineage truncation if any items were skipped."""
|
|
189
|
+
if skipped_count > 0:
|
|
190
|
+
self.report.warning(
|
|
191
|
+
title="Upstream lineage truncated due to size constraint",
|
|
192
|
+
message="Upstream lineage contained too much data and would have caused ingestion to fail",
|
|
193
|
+
context=f"Skipped {skipped_count} {item_type} for {entity_urn} due to aspect size constraints",
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
def ensure_upstream_lineage_size( # noqa: C901
|
|
197
|
+
self, entity_urn: str, upstream_lineage: UpstreamLineageClass
|
|
198
|
+
) -> None:
|
|
199
|
+
"""
|
|
200
|
+
Ensure upstream lineage aspect does not exceed allowed size by removing lineage in priority order:
|
|
201
|
+
first NONE fine-grained lineages (lowest priority), then FIELD_SET fine-grained lineages,
|
|
202
|
+
then DATASET fine-grained lineages, and finally upstreams (highest priority).
|
|
203
|
+
"""
|
|
204
|
+
if not upstream_lineage.fineGrainedLineages and not upstream_lineage.upstreams:
|
|
205
|
+
return
|
|
206
|
+
|
|
207
|
+
total_lineage_size = 0
|
|
208
|
+
accepted_upstreams = []
|
|
209
|
+
accepted_dataset_fg_lineages = []
|
|
210
|
+
accepted_field_set_fg_lineages = []
|
|
211
|
+
accepted_none_fg_lineages = []
|
|
212
|
+
upstream_items_with_sizes = []
|
|
213
|
+
dataset_fg_items_with_sizes = []
|
|
214
|
+
field_set_fg_items_with_sizes = []
|
|
215
|
+
none_fg_items_with_sizes = []
|
|
216
|
+
|
|
217
|
+
# Add upstreams (highest priority)
|
|
218
|
+
if upstream_lineage.upstreams:
|
|
219
|
+
for upstream in upstream_lineage.upstreams:
|
|
220
|
+
upstream_size = len(json.dumps(pre_json_transform(upstream.to_obj())))
|
|
221
|
+
upstream_items_with_sizes.append((upstream, upstream_size))
|
|
222
|
+
|
|
223
|
+
# Separate fine-grained lineage items by upstreamType: DATASET > FIELD_SET > NONE
|
|
224
|
+
if upstream_lineage.fineGrainedLineages:
|
|
225
|
+
for fg_lineage in upstream_lineage.fineGrainedLineages:
|
|
226
|
+
fg_lineage_size = len(
|
|
227
|
+
json.dumps(pre_json_transform(fg_lineage.to_obj()))
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
upstream_type_str = str(fg_lineage.upstreamType)
|
|
231
|
+
if upstream_type_str == "DATASET":
|
|
232
|
+
dataset_fg_items_with_sizes.append((fg_lineage, fg_lineage_size))
|
|
233
|
+
elif upstream_type_str == "FIELD_SET":
|
|
234
|
+
field_set_fg_items_with_sizes.append((fg_lineage, fg_lineage_size))
|
|
235
|
+
elif upstream_type_str == "NONE":
|
|
236
|
+
none_fg_items_with_sizes.append((fg_lineage, fg_lineage_size))
|
|
237
|
+
|
|
238
|
+
# Once we find one that doesn't fit, stop everything else to prevent inconsistencies
|
|
239
|
+
first_skip_done = False
|
|
240
|
+
|
|
241
|
+
# First, include all upstreams (highest priority)
|
|
242
|
+
for item, item_size in upstream_items_with_sizes:
|
|
243
|
+
if total_lineage_size + item_size < self.payload_constraint:
|
|
244
|
+
accepted_upstreams.append(item)
|
|
245
|
+
total_lineage_size += item_size
|
|
246
|
+
else:
|
|
247
|
+
first_skip_done = True
|
|
248
|
+
break
|
|
249
|
+
|
|
250
|
+
# Second, include DATASET fine-grained lineages if no upstreams were skipped
|
|
251
|
+
if not first_skip_done:
|
|
252
|
+
for fg_lineage, fg_lineage_size in dataset_fg_items_with_sizes:
|
|
253
|
+
if total_lineage_size + fg_lineage_size < self.payload_constraint:
|
|
254
|
+
accepted_dataset_fg_lineages.append(fg_lineage)
|
|
255
|
+
total_lineage_size += fg_lineage_size
|
|
256
|
+
else:
|
|
257
|
+
first_skip_done = True
|
|
258
|
+
break
|
|
259
|
+
|
|
260
|
+
# Third, include FIELD_SET fine-grained lineages if no higher priority items were skipped
|
|
261
|
+
if not first_skip_done:
|
|
262
|
+
for fg_lineage, fg_lineage_size in field_set_fg_items_with_sizes:
|
|
263
|
+
if total_lineage_size + fg_lineage_size < self.payload_constraint:
|
|
264
|
+
accepted_field_set_fg_lineages.append(fg_lineage)
|
|
265
|
+
total_lineage_size += fg_lineage_size
|
|
266
|
+
else:
|
|
267
|
+
first_skip_done = True
|
|
268
|
+
break
|
|
269
|
+
|
|
270
|
+
# Finally, include NONE fine-grained lineages if no higher priority items were skipped
|
|
271
|
+
if not first_skip_done:
|
|
272
|
+
for fg_lineage, fg_lineage_size in none_fg_items_with_sizes:
|
|
273
|
+
if total_lineage_size + fg_lineage_size < self.payload_constraint:
|
|
274
|
+
accepted_none_fg_lineages.append(fg_lineage)
|
|
275
|
+
total_lineage_size += fg_lineage_size
|
|
276
|
+
else:
|
|
277
|
+
first_skip_done = True
|
|
278
|
+
break
|
|
279
|
+
|
|
280
|
+
# Log aggregate warnings instead of per-item warnings
|
|
281
|
+
if first_skip_done:
|
|
282
|
+
upstreams_skipped_count = len(upstream_items_with_sizes) - len(
|
|
283
|
+
accepted_upstreams
|
|
284
|
+
)
|
|
285
|
+
dataset_fg_skipped_count = len(dataset_fg_items_with_sizes) - len(
|
|
286
|
+
accepted_dataset_fg_lineages
|
|
287
|
+
)
|
|
288
|
+
field_set_fg_skipped_count = len(field_set_fg_items_with_sizes) - len(
|
|
289
|
+
accepted_field_set_fg_lineages
|
|
290
|
+
)
|
|
291
|
+
none_fg_skipped_count = len(none_fg_items_with_sizes) - len(
|
|
292
|
+
accepted_none_fg_lineages
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
self._maybe_warn_upstream_lineage(
|
|
296
|
+
entity_urn, upstreams_skipped_count, "upstream datasets"
|
|
297
|
+
)
|
|
298
|
+
self._maybe_warn_upstream_lineage(
|
|
299
|
+
entity_urn,
|
|
300
|
+
dataset_fg_skipped_count,
|
|
301
|
+
"dataset-level fine-grained lineages",
|
|
302
|
+
)
|
|
303
|
+
self._maybe_warn_upstream_lineage(
|
|
304
|
+
entity_urn,
|
|
305
|
+
field_set_fg_skipped_count,
|
|
306
|
+
"field-set-level fine-grained lineages",
|
|
307
|
+
)
|
|
308
|
+
self._maybe_warn_upstream_lineage(
|
|
309
|
+
entity_urn, none_fg_skipped_count, "none-level fine-grained lineages"
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
# Combine all accepted fine-grained lineages
|
|
313
|
+
accepted_fine_grained_lineages = (
|
|
314
|
+
accepted_dataset_fg_lineages
|
|
315
|
+
+ accepted_field_set_fg_lineages
|
|
316
|
+
+ accepted_none_fg_lineages
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
upstream_lineage.upstreams = accepted_upstreams
|
|
320
|
+
upstream_lineage.fineGrainedLineages = (
|
|
321
|
+
accepted_fine_grained_lineages if accepted_fine_grained_lineages else None
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
def ensure_query_properties_size(
|
|
325
|
+
self, entity_urn: str, query_properties: QueryPropertiesClass
|
|
326
|
+
) -> None:
|
|
327
|
+
"""
|
|
328
|
+
Ensure query properties aspect does not exceed allowed size by truncating the query statement value.
|
|
329
|
+
Uses a configurable max payload size that is the minimum between QUERY_PROPERTIES_STATEMENT_MAX_PAYLOAD_BYTES
|
|
330
|
+
and INGEST_MAX_PAYLOAD_BYTES.
|
|
331
|
+
|
|
332
|
+
We have found surprisingly large query statements (e.g. 20MB+) that caused ingestion to fail;
|
|
333
|
+
that was INSERT INTO VALUES with huge list of values.
|
|
334
|
+
"""
|
|
335
|
+
if not query_properties.statement or not query_properties.statement.value:
|
|
336
|
+
return
|
|
337
|
+
|
|
338
|
+
max_payload_size = min(
|
|
339
|
+
QUERY_PROPERTIES_STATEMENT_MAX_PAYLOAD_BYTES, self.payload_constraint
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
current_size = len(json.dumps(pre_json_transform(query_properties.to_obj())))
|
|
343
|
+
|
|
344
|
+
if current_size < max_payload_size:
|
|
345
|
+
return
|
|
346
|
+
|
|
347
|
+
reduction_needed = (
|
|
348
|
+
current_size - max_payload_size + QUERY_STATEMENT_TRUNCATION_BUFFER
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
statement_value_size = len(query_properties.statement.value)
|
|
352
|
+
original_statement_size = statement_value_size
|
|
353
|
+
|
|
354
|
+
# Only truncate if reduction is actually needed and possible
|
|
355
|
+
if statement_value_size > reduction_needed > 0:
|
|
356
|
+
new_statement_length = statement_value_size - reduction_needed
|
|
357
|
+
truncated_statement = query_properties.statement.value[
|
|
358
|
+
:new_statement_length
|
|
359
|
+
]
|
|
360
|
+
|
|
361
|
+
truncation_message = f"... [original value was {original_statement_size} bytes and truncated to {new_statement_length} bytes]"
|
|
362
|
+
query_properties.statement.value = truncated_statement + truncation_message
|
|
363
|
+
|
|
364
|
+
self.report.warning(
|
|
365
|
+
title="Query properties truncated due to size constraint",
|
|
366
|
+
message="Query properties contained too much data and would have caused ingestion to fail",
|
|
367
|
+
context=f"Query statement was truncated from {original_statement_size} to {new_statement_length} characters for {entity_urn} due to aspect size constraints",
|
|
368
|
+
)
|
|
369
|
+
else:
|
|
370
|
+
logger.warning(
|
|
371
|
+
f"Cannot truncate query statement for {entity_urn} as it is smaller than or equal to the required reduction size {reduction_needed}. That means that 'ensure_query_properties_size' must be extended to trim other fields different than statement."
|
|
372
|
+
)
|
|
373
|
+
|
|
84
374
|
def ensure_aspect_size(
|
|
85
375
|
self,
|
|
86
376
|
stream: Iterable[MetadataWorkUnit],
|
|
@@ -96,4 +386,10 @@ class EnsureAspectSizeProcessor:
|
|
|
96
386
|
self.ensure_schema_metadata_size(wu.get_urn(), schema)
|
|
97
387
|
elif profile := wu.get_aspect_of_type(DatasetProfileClass):
|
|
98
388
|
self.ensure_dataset_profile_size(wu.get_urn(), profile)
|
|
389
|
+
elif query_subjects := wu.get_aspect_of_type(QuerySubjectsClass):
|
|
390
|
+
self.ensure_query_subjects_size(wu.get_urn(), query_subjects)
|
|
391
|
+
elif upstream_lineage := wu.get_aspect_of_type(UpstreamLineageClass):
|
|
392
|
+
self.ensure_upstream_lineage_size(wu.get_urn(), upstream_lineage)
|
|
393
|
+
elif query_properties := wu.get_aspect_of_type(QueryPropertiesClass):
|
|
394
|
+
self.ensure_query_properties_size(wu.get_urn(), query_properties)
|
|
99
395
|
yield wu
|