acryl-datahub 1.2.0.11rc4__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (43) hide show
  1. {acryl_datahub-1.2.0.11rc4.dist-info → acryl_datahub-1.3.0.dist-info}/METADATA +2582 -2577
  2. {acryl_datahub-1.2.0.11rc4.dist-info → acryl_datahub-1.3.0.dist-info}/RECORD +43 -40
  3. datahub/_version.py +1 -1
  4. datahub/cli/docker_check.py +1 -1
  5. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +296 -0
  6. datahub/ingestion/api/source.py +29 -5
  7. datahub/ingestion/api/source_protocols.py +23 -0
  8. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  9. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +2 -2
  10. datahub/ingestion/source/cassandra/cassandra_profiling.py +2 -2
  11. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  12. datahub/ingestion/source/dremio/dremio_reporting.py +0 -2
  13. datahub/ingestion/source/dremio/dremio_source.py +2 -2
  14. datahub/ingestion/source/fivetran/config.py +30 -5
  15. datahub/ingestion/source/fivetran/fivetran.py +0 -1
  16. datahub/ingestion/source/fivetran/fivetran_log_api.py +13 -0
  17. datahub/ingestion/source/fivetran/fivetran_query.py +43 -28
  18. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  19. datahub/ingestion/source/grafana/models.py +9 -1
  20. datahub/ingestion/source/grafana/report.py +1 -2
  21. datahub/ingestion/source/hex/hex.py +0 -2
  22. datahub/ingestion/source/redshift/redshift.py +2 -2
  23. datahub/ingestion/source/redshift/report.py +0 -2
  24. datahub/ingestion/source/snowflake/snowflake_report.py +0 -2
  25. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +2 -2
  26. datahub/ingestion/source/sql/oracle.py +1 -1
  27. datahub/ingestion/source/sql/sql_common.py +25 -17
  28. datahub/ingestion/source/sql/teradata.py +1 -2
  29. datahub/ingestion/source/sql_queries.py +1 -2
  30. datahub/ingestion/source/tableau/tableau.py +0 -2
  31. datahub/ingestion/source/unity/config.py +11 -42
  32. datahub/ingestion/source/unity/connection.py +61 -0
  33. datahub/ingestion/source/unity/report.py +1 -2
  34. datahub/ingestion/source_report/ingestion_stage.py +54 -12
  35. datahub/metadata/_internal_schema_classes.py +169 -0
  36. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  37. datahub/metadata/schema.avsc +101 -0
  38. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  39. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  40. {acryl_datahub-1.2.0.11rc4.dist-info → acryl_datahub-1.3.0.dist-info}/WHEEL +0 -0
  41. {acryl_datahub-1.2.0.11rc4.dist-info → acryl_datahub-1.3.0.dist-info}/entry_points.txt +0 -0
  42. {acryl_datahub-1.2.0.11rc4.dist-info → acryl_datahub-1.3.0.dist-info}/licenses/LICENSE +0 -0
  43. {acryl_datahub-1.2.0.11rc4.dist-info → acryl_datahub-1.3.0.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
- acryl_datahub-1.2.0.11rc4.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.3.0.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=_VILFvBE67asqk9L6Hqvhg65t1Q8zAbMrsKHYLWBj4I,324
4
+ datahub/_version.py,sha256=9bBnOhXKK0Kz9kBFa9_js_7Pj_9YvV8did9BXYK34H8,318
5
5
  datahub/entrypoints.py,sha256=9Qf-37rNnTzbGlx8S75OCDazIclFp6zWNcCEL1zCZto,9015
6
6
  datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -72,7 +72,7 @@ datahub/cli/cli_utils.py,sha256=0jTTAKuDZ8GzZwGHYytcT_MPR3Rb2DAcbr9n1H2T2sE,1617
72
72
  datahub/cli/config_utils.py,sha256=EeBGfhmf4AxYoTfnZ4GSiGIgpzJFkduNjN_FwmxZGhA,4889
73
73
  datahub/cli/container_cli.py,sha256=D0zWP3_3aww8_RTkMugOoOlILz3dPJ0TE9asQDLCm6E,1697
74
74
  datahub/cli/delete_cli.py,sha256=0YJeWuXPGY0kbSn1AXK1-8SfCGBxb78ZbO53RAgyjQg,26515
75
- datahub/cli/docker_check.py,sha256=CE6YNdX4XsXT8GYiQOLbOLi2x_-kK2aQqmTl0ZP0Uu4,12976
75
+ datahub/cli/docker_check.py,sha256=Iy5zFww7ZX-kdpBjqxW1s827DGalka2LLlxT2f1KU6g,12978
76
76
  datahub/cli/docker_cli.py,sha256=3pzoe_qbWLhG27-M2wBU5MLLJM0xPlmC-EyoueoQEL4,33091
77
77
  datahub/cli/env_utils.py,sha256=RQzjg4JE29hjPt4v7p-RuqoOr99w8E3DBHWiN2Sm7T4,252
78
78
  datahub/cli/exists_cli.py,sha256=1cUYNh3GqNgVHWTrfMRGJoo9tFZNXcLetMaDbLaig6o,1233
@@ -151,13 +151,14 @@ datahub/ingestion/api/registry.py,sha256=LbdZr89465Lj7ptQRVB4vI1JR1igWABvQFj9-WX
151
151
  datahub/ingestion/api/report.py,sha256=1w63Y2yN49IaDLZaIvXEjRU3yVb_9t3wzymSI-fumZM,18959
152
152
  datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
153
153
  datahub/ingestion/api/sink.py,sha256=bureB3_sFXNISCM4yZSqhxMHW-ctDkAQqA0lJgQhJQ4,6047
154
- datahub/ingestion/api/source.py,sha256=JASs7WygVB6g-tcwtchaftzv3lNtlVM31lEa242pn44,21853
154
+ datahub/ingestion/api/source.py,sha256=SKQFnA2OTT4jcy59ae1KF_ZFa_nIyFzRohkZJKixjIk,22712
155
155
  datahub/ingestion/api/source_helpers.py,sha256=XT9y5HgfVeF52jrX39vlLn1SdXpLVyT2Su8oGNsddYo,21148
156
+ datahub/ingestion/api/source_protocols.py,sha256=llWgfxDquowIovgWqfhdiS1dzUQ3Y_SmCaq501S-NLc,768
156
157
  datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
157
158
  datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL74lo,6327
158
159
  datahub/ingestion/api/auto_work_units/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
159
160
  datahub/ingestion/api/auto_work_units/auto_dataset_properties_aspect.py,sha256=ID_6N3nWl2qohsSGizUCqo3d2MNyDeVbyWroQpSOSsc,5059
160
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py,sha256=0BwkpLhORbsiTHq0g_N_1cVVoZYdLR3qz02mNmsV9-M,4444
161
+ datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py,sha256=-667F-xWArmeVaW-3mGsoIMNKPrnLx6KM4OfzMGsMOo,18064
161
162
  datahub/ingestion/autogenerated/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
162
163
  datahub/ingestion/autogenerated/capability_summary.json,sha256=9Ns5gYfVq0LYogaYtb0ioDPfu8SVhftiq9R7l0irQwg,111506
163
164
  datahub/ingestion/autogenerated/lineage.json,sha256=8BdZF-5V5kJbX4mfFav8Zg-jHjzfkAEGk-pu1atLN4I,10029
@@ -227,7 +228,7 @@ datahub/ingestion/source/pulsar.py,sha256=u5F8QnCLJsht5-7XCiUTsnfhCPIpKVB_l32CgM
227
228
  datahub/ingestion/source/redash.py,sha256=C4cDikWymbL88fDqaIPX5WA3f2sIEtH7bmhJKkmXJsM,30652
228
229
  datahub/ingestion/source/salesforce.py,sha256=dMQ2jMu9P8r0rmREQA6KuFgAbegJ7WnHpUmMaUfHPDI,40942
229
230
  datahub/ingestion/source/source_registry.py,sha256=a2mLjJPLkSI-gYCTb_7U7Jo4D8jGknNQ_yScPIihXFk,1208
230
- datahub/ingestion/source/sql_queries.py,sha256=mhEIbNywHpZKcX22ENdJolD3z5x4TNR5gXAEmX7ejfQ,14666
231
+ datahub/ingestion/source/sql_queries.py,sha256=RexNsG-COCiKNyL0mfoeNoMrkXxmB5UBOSJKJnHNHy0,14563
231
232
  datahub/ingestion/source/superset.py,sha256=qZ1SMeejwiM_ZkPlERXi3OSmabWcrsJSxZe98Eo9tqA,57866
232
233
  datahub/ingestion/source/abs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
233
234
  datahub/ingestion/source/abs/config.py,sha256=WW9JWbzqAJDblAcJKtNeuBHqOeJsB57lW2PqSD65-BU,6729
@@ -266,9 +267,9 @@ datahub/ingestion/source/bigquery_v2/bigquery_data_reader.py,sha256=DeT3v_Z82__8
266
267
  datahub/ingestion/source/bigquery_v2/bigquery_helper.py,sha256=QER3gY8e_k1_eNVj7cBso7ZzrWl_vO5PYSa6CpvqNx8,1554
267
268
  datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256=9_sfX8BE2vt9RjBMyq27UxCxBaSlD5o3L4gQxrwlPvA,4961
268
269
  datahub/ingestion/source/bigquery_v2/bigquery_queries.py,sha256=2syDMaRpYEbtGUVejVAK5d6g8HqM54ZyEM908uLJ55o,3393
269
- datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=v7_zkZzymKPmZKWAxnxmvmHC-8TQVGHUT-pBQFNehqc,7962
270
+ datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=zlTkqOmt5zxnO40rVTYHF3fclj4OVlLtqUXwW5WIIcM,7855
270
271
  datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=zbYb1EYnCJxgvsU8oT_76l0q_BW1exVjMWM1GAgd1nc,32600
271
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=c9a-SlZDOYNiS__vC5ezVVNM0UHasXWxWNRZkkP_aOo,51552
272
+ datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=_NLFRRXsrxMZ8Vjg2jVL4Pg1_NGt9hzn9EWBooJZ8so,51566
272
273
  datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
273
274
  datahub/ingestion/source/bigquery_v2/common.py,sha256=IinOy-RO4UZGxSf5scaN02672BzZuNsjJZ56axti6iI,4016
274
275
  datahub/ingestion/source/bigquery_v2/lineage.py,sha256=jju14mJbAUMA_K3j2yq-TdZV202cjd5rBAsDPJGEVno,44900
@@ -280,8 +281,8 @@ datahub/ingestion/source/cassandra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQe
280
281
  datahub/ingestion/source/cassandra/cassandra.py,sha256=pNy61Z4kTqL_wGcWIYee5fnZiuJDseDcRcQwsxeAssk,14487
281
282
  datahub/ingestion/source/cassandra/cassandra_api.py,sha256=wCJx-1ZByGMgPkORBO420sGucKkxXXE4pOLWXxdpMIw,14222
282
283
  datahub/ingestion/source/cassandra/cassandra_config.py,sha256=w9LBiT8XrGvXlrvpcAU_xm82GiE4nUfEg-VKIX6MRMY,4446
283
- datahub/ingestion/source/cassandra/cassandra_profiling.py,sha256=FdgPK_8s8otTOJDqNM4rpF6Mn4lFWbnjTaKEChzn2iE,11011
284
- datahub/ingestion/source/cassandra/cassandra_utils.py,sha256=j-LidYkaCTmGnpUVNLsax_c3z32PsQbsbHeYojygd1s,5105
284
+ datahub/ingestion/source/cassandra/cassandra_profiling.py,sha256=nNQwjParCnvhT9nF-uwGtKmAR0dBS9eqAxfknV1CKiA,11022
285
+ datahub/ingestion/source/cassandra/cassandra_utils.py,sha256=-BsrK1R5jCQs-kUJYVj1x5zm_rG4teCYwS_r-OT3mCE,5002
285
286
  datahub/ingestion/source/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
286
287
  datahub/ingestion/source/common/data_platforms.py,sha256=HhuP3YIEi2WpyKDjUU8RiM0a2qjHWQcvc8kcqub0cVo,548
287
288
  datahub/ingestion/source/common/data_reader.py,sha256=XbSxiRTYrk6seOz0ZjVjzSpGvP8lEjmqXrNI4cdYYmQ,1819
@@ -319,8 +320,8 @@ datahub/ingestion/source/dremio/dremio_config.py,sha256=6Re-CIkLxi90VfBCeUTZ4bBv
319
320
  datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py,sha256=MQk8BAHLufN69CntFfOV8K59A_AvLC-vwMS33Jw8bBg,3069
320
321
  datahub/ingestion/source/dremio/dremio_entities.py,sha256=1gZrNqTp3Pm6vqGDQaWt3HkxEuHKxpGYQ4geVoFvxWI,15147
321
322
  datahub/ingestion/source/dremio/dremio_profiling.py,sha256=TAcnpo8ZRKhLDHnQSJzJg3YdwTSyEa73LUAzENs7wG4,12287
322
- datahub/ingestion/source/dremio/dremio_reporting.py,sha256=YRKM6PvoJYHLBXmOGwkgou_8x8_oA2xaqTWWoVuwFMY,2247
323
- datahub/ingestion/source/dremio/dremio_source.py,sha256=GZnpJhuqxCzDz4tTklFt2dSfF-L6rMhlvzGuvvibD7k,25563
323
+ datahub/ingestion/source/dremio/dremio_reporting.py,sha256=UEj-6FMdIWsry5535_kM2hLze5aPRMatTwvI0Bd2BSo,2140
324
+ datahub/ingestion/source/dremio/dremio_source.py,sha256=JFgzQiYcIkKcyiTJstYdfNxIVCGAcbi198kPQ55bYso,25596
324
325
  datahub/ingestion/source/dremio/dremio_sql_queries.py,sha256=wA1hqKk9cKMJDyEdZRQcDDLZPGYwuNqrvleUHTkWgrQ,10508
325
326
  datahub/ingestion/source/dynamodb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
326
327
  datahub/ingestion/source/dynamodb/data_reader.py,sha256=vC77KpcP8LJN0g8wsPRDVw4sebv0ZWIP3tJkEIHaomA,3120
@@ -333,13 +334,13 @@ datahub/ingestion/source/excel/report.py,sha256=oEkeI8J6is7zB9iz4RqASu_-Q5xl36lA
333
334
  datahub/ingestion/source/excel/source.py,sha256=w_vOz4UD7BcXBBDKoo81_6-QFeOPITuXqkfjIMHCQj4,23827
334
335
  datahub/ingestion/source/excel/util.py,sha256=YYmadYuCiT-4_MfQM0YSE7wuDcE0k8o2KrlOKM9Z6eI,406
335
336
  datahub/ingestion/source/fivetran/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
336
- datahub/ingestion/source/fivetran/config.py,sha256=vjN90fT98paJEZyJgw7UCapVfujiDH9_iPX8jUbOEWE,9087
337
+ datahub/ingestion/source/fivetran/config.py,sha256=6yriUMtTPMZUHqbZ9gzyFduPVt6CxzirdYSg4k-ziYI,10285
337
338
  datahub/ingestion/source/fivetran/data_classes.py,sha256=ecdUJH5BEze0yv-uFpKWPNaNmV1gORDA2XMFk0zhcBw,595
338
- datahub/ingestion/source/fivetran/fivetran.py,sha256=s8wcECtmuugUoZ0Zdthq0SIPpTLvziZXuhhUX9bJ5N4,14492
339
- datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=PNzuykiiFTU8FhBIfUbW6udURZpz_35aq7rfffbpIfA,13010
340
- datahub/ingestion/source/fivetran/fivetran_query.py,sha256=WE7kZ54zR1RKg_GNXHoKKEBiyw7PhNp-Ei8Y4OMXXGc,5608
339
+ datahub/ingestion/source/fivetran/fivetran.py,sha256=Up5wbLk7hBk9b0pqcHwW6b0H52UJj90cmLhn0QJeZ4g,14416
340
+ datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=-ibtfgxFv08P5_X5PVqV4CocxAjRWmY858esQL5OaAQ,13697
341
+ datahub/ingestion/source/fivetran/fivetran_query.py,sha256=VJTka6cdIzlqy0aWyviMO2uSHcL0ZQFTCefUnnjv_Bk,6578
341
342
  datahub/ingestion/source/gc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
342
- datahub/ingestion/source/gc/datahub_gc.py,sha256=EXO-Stj6gGMLTSTbSBC-C3_zpjpQtFN9pAMWR95ma0I,12830
343
+ datahub/ingestion/source/gc/datahub_gc.py,sha256=nJ6QbHnTPL0MamWxNTZA26FMZsnmvUdUwnQcDS81K9s,12723
343
344
  datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=mUWcMt-_FL1SYGIgI4lGZDZGXspUUTv__5GN1W2oJ3s,17118
344
345
  datahub/ingestion/source/gc/execution_request_cleanup.py,sha256=y-9ZIs_DZPUzYH1CI6HmaAZg3olNNA7MjT8HrCqAI0k,11159
345
346
  datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=4-qQR_2HGIYU8kC2hRIsJyKKMb9lKq4B6paJm_abUk4,12628
@@ -355,13 +356,13 @@ datahub/ingestion/source/grafana/grafana_api.py,sha256=pSbaHGPQ4HajCyVnvfEjl9zr2
355
356
  datahub/ingestion/source/grafana/grafana_config.py,sha256=Xyt4jY9vKJuu99PX6cpOZ3WANTn00aUTYvItIPVC6XQ,3683
356
357
  datahub/ingestion/source/grafana/grafana_source.py,sha256=tUYW0bXvnwhfBpSRQh4KobhOThXhezdH4hy2IPBkgag,21727
357
358
  datahub/ingestion/source/grafana/lineage.py,sha256=qDWCiceOotVApOpcGhRK9OTqyRJIPqXcJi6CKnfK8z0,7178
358
- datahub/ingestion/source/grafana/models.py,sha256=MqMGdQKjvn0cdvryEyX54w8zGZwMiBdUAvhFM1TNv1I,4401
359
- datahub/ingestion/source/grafana/report.py,sha256=LTRxjXYCYHwOaj1zrenCDAvFBE5U5tAv6Wh5LlfsgfE,2963
359
+ datahub/ingestion/source/grafana/models.py,sha256=cOEYJ5DpmGkQKxqBq3iQE6D7aFLRGjGY6cbSP8gkFss,4676
360
+ datahub/ingestion/source/grafana/report.py,sha256=gNXKwGYCO6PLiqiM1K_Hv11vJuzxqcxiPNWdCY6dKNQ,2860
360
361
  datahub/ingestion/source/grafana/types.py,sha256=Bz0-FIPBXHaBjfFHYGJhE20c2vYZwAsXr70MVGjSu6s,443
361
362
  datahub/ingestion/source/hex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
362
363
  datahub/ingestion/source/hex/api.py,sha256=rKr6GmhIs98_SQZYG4egZlS049WH4ZxW2-7Ueq8692Q,12940
363
364
  datahub/ingestion/source/hex/constants.py,sha256=8hUTMWyG5keTNfXoLu_Dh413Hw_mGGJX1atiiDZyKtg,271
364
- datahub/ingestion/source/hex/hex.py,sha256=LI6P5YHXl8eoveWUbjwIDf0pU2bvrdQXE0Q_tKSCw90,13435
365
+ datahub/ingestion/source/hex/hex.py,sha256=NaMibmvFXoCD84G0-asEDILbiluZyulbZZg23moP0hI,13328
365
366
  datahub/ingestion/source/hex/mapper.py,sha256=IyDAE-TzZUji3ICI_9gkYC3dQN3gl6kERRWNVRk80fQ,13905
366
367
  datahub/ingestion/source/hex/model.py,sha256=eri4aRo1eXcE2SWjzCnPFMhzPTiJ8w8zC4GN7Lgpr74,1864
367
368
  datahub/ingestion/source/hex/query_fetcher.py,sha256=r9UvF_qwswkRlNY7AI8p46eqAYSxVtjVE2e7eO4XagA,13384
@@ -451,10 +452,10 @@ datahub/ingestion/source/redshift/exception.py,sha256=dxzYUIv5B_FAWhOuzG2u5We7FX
451
452
  datahub/ingestion/source/redshift/lineage.py,sha256=nqrvWJqaI493i1hIZ_7patrdOb16sZrgSSGapdMcEiU,31710
452
453
  datahub/ingestion/source/redshift/profile.py,sha256=H1Xtc2rXScUv4w0b2BbM7POjYEwqIql_rpWvlumY_EM,4309
453
454
  datahub/ingestion/source/redshift/query.py,sha256=HKobQ-0crARgT8Mkfe-WBqVR9ZadYCZ9DGaUoEHHHww,48234
454
- datahub/ingestion/source/redshift/redshift.py,sha256=zalndYg_LK5aJ8cX_ZuXLcTYajtlavmV-dmQIsjGxjg,41260
455
+ datahub/ingestion/source/redshift/redshift.py,sha256=RN8rao3j7nocnnD6oPcEju09-8mOZTE4vFkgy_13Az8,41293
455
456
  datahub/ingestion/source/redshift/redshift_data_reader.py,sha256=zc69jwXHdF-w8J4Hq-ZQ6BjHQ75Ij2iNDMpoRJlcmlU,1724
456
457
  datahub/ingestion/source/redshift/redshift_schema.py,sha256=7F-l_omOuKMuGE_rBWXVPG_GWXFKnCMzC4frNxZB9cs,24800
457
- datahub/ingestion/source/redshift/report.py,sha256=O3QFozHlmMbH9b7KxbqhgTgr_0tCryj6FIzMiN6kRxw,3044
458
+ datahub/ingestion/source/redshift/report.py,sha256=aCFDFUbz5xde8b_eRIHSBiELoo9LZFtDpp2lSadiPHU,2937
458
459
  datahub/ingestion/source/redshift/usage.py,sha256=Q7R-caJovLXv33uZepMGX5Cvm4DqQSLZdiL_s-p06wU,17473
459
460
  datahub/ingestion/source/s3/__init__.py,sha256=HjqFPj11WtNFZM3kcVshlDb7kOsc19-l_3LM8PBjlJM,56
460
461
  datahub/ingestion/source/s3/config.py,sha256=lElFXgEpKDT9SVoiXvtx98wV6Gp880qP4pLQaOGJGOo,7828
@@ -500,9 +501,9 @@ datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=nam-bYV6wL9LfR
500
501
  datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=PmQi-qDlRhdJ-PsJ7x-EScIiswWRAxDDOKHydvN3mTY,7404
501
502
  datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=lAMA--X3nbWFdNs1DTHNm7crctB3RilX_pB-zy47piI,45528
502
503
  datahub/ingestion/source/snowflake/snowflake_query.py,sha256=wLDaYZrWJ0794KKn69rB_QF0_8Bzu5l_7L6mD77KVc4,40469
503
- datahub/ingestion/source/snowflake/snowflake_report.py,sha256=GPuQTOaR8SCqMBme6Q55vX4zzT4m3ELmp83rp_grQA0,6808
504
+ datahub/ingestion/source/snowflake/snowflake_report.py,sha256=fA6C-p9wM-jyTsXE_suTbCtrE_lle-5LI52S7wFYf00,6701
504
505
  datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=Dh_p0WpSaWOFMCsBgd3FpkQ_65k3QPq2VUnapp2VKuY,41431
505
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=tojh0BQibKfoSqszXiyoQRoLy3g0g4tntyRpZmsjOYk,59084
506
+ datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=-JBfYgAXKMbVYu3f2viQoOQ0O2wv6GWfw1giOiETbpc,59091
506
507
  datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=maZyFkfrbVogEFM0tTKRiNp9c_1muv6YfleSd3q0umI,6341
507
508
  datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=5Li4H8KuS4qBKR98L2P-JZI79UXsOjcAFxZZyio9NU0,5787
508
509
  datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=eA9xh-G1Ydr1OwUUtrbXUWp26hE1jF0zvyKNky_i_nQ,8887
@@ -521,10 +522,10 @@ datahub/ingestion/source/sql/hive.py,sha256=SPmAWlk63V-s-loBTU2hXsQA7xA4sa0iPK6p
521
522
  datahub/ingestion/source/sql/hive_metastore.py,sha256=UBB7mV2eKuCxv3voi0F3tqF2MyRObSYxArAxETZfO4E,35997
522
523
  datahub/ingestion/source/sql/mariadb.py,sha256=om6QoG5UtDldt1N6AfIWp3T-HXNaaqFmpz2i0JAemfM,654
523
524
  datahub/ingestion/source/sql/mysql.py,sha256=_KhTODU7mqAoJOlrvRdPa7ihQkYLkgrZwaseQbasotM,5358
524
- datahub/ingestion/source/sql/oracle.py,sha256=oCQEJ6zG9RnQLYPZrJ4Xuhz5pJlaB9-PcaeudebTsfk,29960
525
+ datahub/ingestion/source/sql/oracle.py,sha256=nKMM1O67SkxCgT781eENl5xXpIR8_p5joTSdAYzQwHY,29988
525
526
  datahub/ingestion/source/sql/postgres.py,sha256=blkO6bI0eDKFK8UNwUYcYtm_ObrQuWVSy5GyfdhL5dg,14274
526
527
  datahub/ingestion/source/sql/presto.py,sha256=58py4M3UYxkGpbBFA1o96H154eUhD2dBm1hpxxYlYYM,4256
527
- datahub/ingestion/source/sql/sql_common.py,sha256=2m3eq_Yn1KPi8x_E0J3vpiuND7BOpnA0zAid4jXaMJw,57383
528
+ datahub/ingestion/source/sql/sql_common.py,sha256=EZGoeGlOYZoOrXOiKDI-S1mw-sPVV33PZQ_mPJlEvRc,57759
528
529
  datahub/ingestion/source/sql/sql_config.py,sha256=u3nGZYYl1WtaxfNsDU5bglgZ5Jq3Fxk9xei_CUIAXB0,8222
529
530
  datahub/ingestion/source/sql/sql_generic.py,sha256=9AERvkK8kdJUeDOzCYJDb93xdv6Z4DGho0NfeHj5Uyg,2740
530
531
  datahub/ingestion/source/sql/sql_generic_profiler.py,sha256=Zr39j4SI1fPTx1JdopVJyBslFnyp3lZCeb1th9eEB5c,11723
@@ -534,7 +535,7 @@ datahub/ingestion/source/sql/sql_utils.py,sha256=q-Bsk6WxlsRtrw9RXBxvqI3zuaMTC_F
534
535
  datahub/ingestion/source/sql/sqlalchemy_data_reader.py,sha256=FvHZ4JEK3aR2DYOBZiT_ZsAy12RjTu4t_KIR_92B11k,2644
535
536
  datahub/ingestion/source/sql/sqlalchemy_uri.py,sha256=u0ZvgdJjXZdo_vl7YIQfYuuWbGwpnH6OSozI2e8ZV4I,858
536
537
  datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py,sha256=KOpbmDIE2h1hyYEsbVHJi2B7FlsyUMTXZx4diyzltQg,1826
537
- datahub/ingestion/source/sql/teradata.py,sha256=zIPYQQQ89cSZCy6XYHn9VcfTImRQw6AWY62aAfm49sE,65610
538
+ datahub/ingestion/source/sql/teradata.py,sha256=Cij6ZKkkXoJaaSOdQQzBI0QtCC7lGRgUuT9TdC5_YTM,65507
538
539
  datahub/ingestion/source/sql/trino.py,sha256=o5hm84iwRHO59TD2LaEqYgF2LYIcSUIKmlgu1VudGBY,19254
539
540
  datahub/ingestion/source/sql/two_tier_sql_source.py,sha256=xlpQ9thbEn6uta6eVMzsHzmXFGg30VWInYwg0URgTK4,5784
540
541
  datahub/ingestion/source/sql/vertica.py,sha256=LUpIzDpzFRtMcXCSG2EC5CBZbveBOvnB27P_pDrXC9o,33501
@@ -560,14 +561,15 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
560
561
  datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=DziD57PbHn2Tcy51tYXCG-GQgyTGMUxnkuzVS_xihFY,4079
561
562
  datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
562
563
  datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
563
- datahub/ingestion/source/tableau/tableau.py,sha256=BgR_-IuzUYuaSV3EOvMcefwBZ_h0dYAvNEn5Dv6meA4,158448
564
+ datahub/ingestion/source/tableau/tableau.py,sha256=OpAiVincp7x4XNHHB8HJWTG6By8cDQOgqa1vhg-78w4,158341
564
565
  datahub/ingestion/source/tableau/tableau_common.py,sha256=2vE7DIigPvMNcTCWSou0tliaVy9MgFR1qwqnE4pilw8,27086
565
566
  datahub/ingestion/source/tableau/tableau_constant.py,sha256=2WPAHN-GAR83_c3eTTNd8cy0-zC8GIXeUdSxX_mNdas,2608
566
567
  datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=wsVD0SkGUwb-H9_g0aDclKwYkcoxugaWyAcyAMgBCAU,1136
567
568
  datahub/ingestion/source/tableau/tableau_validation.py,sha256=Hjbfc1AMIkGgzo5ffWXtNRjrxSxzHvw7-dYZDt4d3WE,1819
568
569
  datahub/ingestion/source/unity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
569
570
  datahub/ingestion/source/unity/analyze_profiler.py,sha256=2pqkFY30CfN4aHgFZZntjeG0hNhBytZJvXC13VfTc1I,4689
570
- datahub/ingestion/source/unity/config.py,sha256=LX379ZW7A3Xhf3PmJoe5af6lFSuRHnJAzPzmnU7HWq8,20353
571
+ datahub/ingestion/source/unity/config.py,sha256=lHvr-PGVcZ0P_2e0RuwmfSRlQRJ81astx4hQZkNrX_k,18713
572
+ datahub/ingestion/source/unity/connection.py,sha256=iCsQhZ1vxzv1qQKTl_sFUZdmBLLIrNdu2X2V8hT7IGI,2441
571
573
  datahub/ingestion/source/unity/connection_test.py,sha256=Dwpz4AIc6ZDwq6pWmRCSCuDUgNjPP_bVAVJumgAAS4w,2661
572
574
  datahub/ingestion/source/unity/ge_profiler.py,sha256=NBRHZceq-f95iUn7u0h7cgcd9nAc48Aa-lmp_BqE0As,8409
573
575
  datahub/ingestion/source/unity/hive_metastore_proxy.py,sha256=IAWWJjaW0si_UF52Se2D7wmdYRY_afUG4QlVmQu6xaw,15351
@@ -576,7 +578,7 @@ datahub/ingestion/source/unity/proxy.py,sha256=7TG1B9vdVdM3mmVkHDaLv2AXFfMkx1o8g
576
578
  datahub/ingestion/source/unity/proxy_patch.py,sha256=gVYl5Fm_ase0iwBf3yDg7PE3bbTOl92RR-JgofHNkus,12374
577
579
  datahub/ingestion/source/unity/proxy_profiling.py,sha256=WLqvYP6MziaisA4LYL4T_GA-kPt6Xdde7bfaYsjYw40,9663
578
580
  datahub/ingestion/source/unity/proxy_types.py,sha256=dp7fRqIjaFCn6ivbgXOGHcw9bQQhZg6u-fdTK053oFM,10163
579
- datahub/ingestion/source/unity/report.py,sha256=_2frKPv_2RRFcCfqlKOks3YR5lrUMIa3zdFJtNO-m6E,3394
581
+ datahub/ingestion/source/unity/report.py,sha256=wa5ER1hLc-362iWS9MNwDJf_adfAb3tfge_O3wSAaTc,3291
580
582
  datahub/ingestion/source/unity/source.py,sha256=fC8pgacaXr9AQlwmz9FynJMJgNQ9MsDFbxUGxQRVtWo,57897
581
583
  datahub/ingestion/source/unity/tag_entities.py,sha256=-Z-XYc1XhquE-Eoksn9v0o11ZjV9CWz8n6zeXLbzluQ,7275
582
584
  datahub/ingestion/source/unity/usage.py,sha256=0wETBAaZvHI_EGgBlxX3bKsVHEAdnUV8_bKI_lbyWjY,11500
@@ -593,7 +595,7 @@ datahub/ingestion/source_config/csv_enricher.py,sha256=IROxxfFJA56dHkmmbjjhb7h1p
593
595
  datahub/ingestion/source_config/operation_config.py,sha256=hxF2RM0jk0HUPXYiliMniXBC-wz-ZPcs90ZGLfHT8rE,3924
594
596
  datahub/ingestion/source_config/pulsar.py,sha256=zi3QTAw8CzzuwXgU-GUCuLyneT5pxHsLqZFyd15ECYs,5604
595
597
  datahub/ingestion/source_report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
596
- datahub/ingestion/source_report/ingestion_stage.py,sha256=SU_FKFZhShZATLcFr735i_hWpdqNGdAWoZxh22p3P1k,1787
598
+ datahub/ingestion/source_report/ingestion_stage.py,sha256=0MY39QetRovYd1iBNSy0OW11YyaOsPaqhQi-1svmAcY,3106
597
599
  datahub/ingestion/source_report/pulsar.py,sha256=f6CMNw8TyPp3tuSGsLLPEhSvoQLXwxtaaM6GmNvsANU,1119
598
600
  datahub/ingestion/source_report/time_window.py,sha256=9yI5l2S1DcF7ClvUHLeN8m62I5vlhV9k-aQqSZh2l7w,229
599
601
  datahub/ingestion/transformer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -642,8 +644,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
642
644
  datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
643
645
  datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
644
646
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
645
- datahub/metadata/_internal_schema_classes.py,sha256=FYFjU83woitdWMNTk-fZhtCfuUVLv05-syASUGMVwGM,1069868
646
- datahub/metadata/schema.avsc,sha256=AS5vIh4JGmKVeY_b0fNjUihKBmACH0voLdyxuDqqAXo,771809
647
+ datahub/metadata/_internal_schema_classes.py,sha256=1UZsNj9XmThYFXbG39BVKlaTTFywzayhVVon6svD3kM,1076970
648
+ datahub/metadata/schema.avsc,sha256=P6j7fiukfv03ZW8gis3m3mVKGlSV2JhgMcmrtf5sU7Q,775491
647
649
  datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
648
650
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
649
651
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
@@ -704,7 +706,7 @@ datahub/metadata/com/linkedin/pegasus2avro/ownership/__init__.py,sha256=r813MW_b
704
706
  datahub/metadata/com/linkedin/pegasus2avro/persona/__init__.py,sha256=Y0iT9AeLsLAVzbcXk1hlqqUSPzSjNwn6DqvKxtsLq6A,305
705
707
  datahub/metadata/com/linkedin/pegasus2avro/platform/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
706
708
  datahub/metadata/com/linkedin/pegasus2avro/platform/event/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
707
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py,sha256=O8ngP2_Di9VUQELe4wr3CAL1UfceN0K0F_rgTBEfUvo,383
709
+ datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py,sha256=r4N_gf1o1AUux8hE1e_y6Aq27DaVOMF5U8GuATGlCuY,631
708
710
  datahub/metadata/com/linkedin/pegasus2avro/platformresource/__init__.py,sha256=O8Zf6PhoTHFyf6hzAyqYASRyfFfdZiuvpLx7Ygw73WE,415
709
711
  datahub/metadata/com/linkedin/pegasus2avro/policy/__init__.py,sha256=NOszM_xottEFQwmlDYkdci1oRddIadcNKd9w5EvuikU,917
710
712
  datahub/metadata/com/linkedin/pegasus2avro/post/__init__.py,sha256=FFiWH2BSo057ZwsnFKjpCoAMo9szvC7Vn_RzGTSLnY0,518
@@ -926,6 +928,7 @@ datahub/metadata/schemas/QueryKey.avsc,sha256=VI4oIHvAO7f0lN_7V3QVuBfHcPz31c57Xt
926
928
  datahub/metadata/schemas/QueryProperties.avsc,sha256=26Q3zzuzJbUCUG7IJ3q_OEdcNbyzloZzDJWPbv3GQAk,5589
927
929
  datahub/metadata/schemas/QuerySubjects.avsc,sha256=PDM6Ek1AkjwLGDk17Rjg7KVtE2tbgFpc1XTGkhoG4MA,1437
928
930
  datahub/metadata/schemas/QueryUsageStatistics.avsc,sha256=z1gfAnXdBoPEeERi5RESjrdBuS6AcIdqdN5JqWOSuNo,6192
931
+ datahub/metadata/schemas/RelationshipChangeEvent.avsc,sha256=il7yiTXf9nkZvSyqH8GUu0j2jws_n-QlABXThLJ871c,8288
929
932
  datahub/metadata/schemas/RoleKey.avsc,sha256=Uas5jFViSHXhFqq8D4P6-UXqywOth3coztjQ5wA7wL0,449
930
933
  datahub/metadata/schemas/RoleMembership.avsc,sha256=Al3LXKRowCiHhgTfwr3a-piID3Ld5kN-6-e9edTZ0uU,570
931
934
  datahub/metadata/schemas/RoleProperties.avsc,sha256=tDw-WF1uBGIcrk38nOnXs3FCF_YjBhScarJbreQvwjE,3037
@@ -940,7 +943,7 @@ datahub/metadata/schemas/Status.avsc,sha256=rPZSXSJdwnNywqNx2qll8cdt54aYgI-YUbRr
940
943
  datahub/metadata/schemas/StructuredProperties.avsc,sha256=qe45sKZ9XrLcf15Gt03Ttzt2J_kJYHvN-DAOSErSYuY,7028
941
944
  datahub/metadata/schemas/StructuredPropertyDefinition.avsc,sha256=a-6TaOQ4A7LDFLshmaFRBcXjz11p4vM0Q3X35GN4Zo0,11737
942
945
  datahub/metadata/schemas/StructuredPropertyKey.avsc,sha256=lp7tQBgeriEU1YMQ6a4-6aUGSWDqNl00lLDym97j1yI,618
943
- datahub/metadata/schemas/StructuredPropertySettings.avsc,sha256=EDNlXfT1TqogfulCanIc-nuYO9ZxRFOGzD9tl3ZJdB8,3732
946
+ datahub/metadata/schemas/StructuredPropertySettings.avsc,sha256=oHPbDuV_Wr_zR9vmTwZZKRzeaBx5nyqN1zn0cfIRhuQ,4044
944
947
  datahub/metadata/schemas/SubTypes.avsc,sha256=bhXbzK020zDyQno97Xp05vmoMeZ82IGu2jz7pWDo3RQ,655
945
948
  datahub/metadata/schemas/SystemMetadata.avsc,sha256=XEU32-oZsyVwMii-DlQSVDaUTfKQ9n7K0ChMJ07KHvQ,4457
946
949
  datahub/metadata/schemas/TagKey.avsc,sha256=BfckMlx-wg_LV1_PFVgItfNBPtCQ8_erGeQM4LzOXmY,640
@@ -1123,8 +1126,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1123
1126
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1124
1127
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1125
1128
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1126
- acryl_datahub-1.2.0.11rc4.dist-info/METADATA,sha256=SxpOrYaLX-58eO685hRF4fMeo3zkiDS0yFIVNELypnI,184162
1127
- acryl_datahub-1.2.0.11rc4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1128
- acryl_datahub-1.2.0.11rc4.dist-info/entry_points.txt,sha256=pzsBoTx-D-iTcmpX8oCGCyzlHP2112EygUMzZWz56M8,10105
1129
- acryl_datahub-1.2.0.11rc4.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1130
- acryl_datahub-1.2.0.11rc4.dist-info/RECORD,,
1129
+ acryl_datahub-1.3.0.dist-info/METADATA,sha256=4oWnajuLBUpYiIu7a_J7r6ddL3EWumCUI_yiesFjvgk,184417
1130
+ acryl_datahub-1.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1131
+ acryl_datahub-1.3.0.dist-info/entry_points.txt,sha256=pzsBoTx-D-iTcmpX8oCGCyzlHP2112EygUMzZWz56M8,10105
1132
+ acryl_datahub-1.3.0.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1133
+ acryl_datahub-1.3.0.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.2.0.11rc4"
3
+ __version__ = "1.3.0"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -13,7 +13,7 @@ import yaml
13
13
  from datahub.configuration.common import ExceptionWithProps
14
14
 
15
15
  # Docker seems to under-report memory allocated, so we also need a bit of buffer to account for it.
16
- MIN_MEMORY_NEEDED = 4 # GB
16
+ MIN_MEMORY_NEEDED = 4.3 # GB
17
17
  MIN_DISK_SPACE_NEEDED = 13 # GB
18
18
 
19
19
  DOCKER_COMPOSE_PROJECT_NAME = os.getenv("DATAHUB_COMPOSE_PROJECT_NAME", "datahub")
@@ -1,5 +1,6 @@
1
1
  import json
2
2
  import logging
3
+ import os
3
4
  from typing import TYPE_CHECKING, Iterable, List
4
5
 
5
6
  from datahub.emitter.rest_emitter import INGEST_MAX_PAYLOAD_BYTES
@@ -7,15 +8,36 @@ from datahub.emitter.serialization_helper import pre_json_transform
7
8
  from datahub.ingestion.api.workunit import MetadataWorkUnit
8
9
  from datahub.metadata.schema_classes import (
9
10
  DatasetProfileClass,
11
+ QueryPropertiesClass,
12
+ QuerySubjectsClass,
10
13
  SchemaFieldClass,
11
14
  SchemaMetadataClass,
15
+ UpstreamLineageClass,
12
16
  )
13
17
 
14
18
  if TYPE_CHECKING:
15
19
  from datahub.ingestion.api.source import SourceReport
16
20
 
21
+
22
+ # TODO: ordering
23
+ # In the cases where we trim collections of data (e.g. fields in schema, upstream lineage, query subjects), given
24
+ # those collections are typically unordered, we should consider sorting them by some criteria (e.g. size, alphabetically)
25
+ # so that the trimming is deterministic and predictable and more importantly consistent across executions.
26
+ # In the case of schemaMetadata, that's more relevant as currently we may be trimming fields while adding nested ones,
27
+ # which may lead to poorly schema rendering in the UI.
28
+
17
29
  logger = logging.getLogger(__name__)
18
30
 
31
+ DEFAULT_QUERY_PROPERTIES_STATEMENT_MAX_PAYLOAD_BYTES = 5 * 1024 * 1024 # 5MB
32
+ QUERY_PROPERTIES_STATEMENT_MAX_PAYLOAD_BYTES = int(
33
+ os.environ.get(
34
+ "QUERY_PROPERTIES_STATEMENT_MAX_PAYLOAD_BYTES",
35
+ DEFAULT_QUERY_PROPERTIES_STATEMENT_MAX_PAYLOAD_BYTES,
36
+ )
37
+ )
38
+
39
+ QUERY_STATEMENT_TRUNCATION_BUFFER = 100
40
+
19
41
 
20
42
  class EnsureAspectSizeProcessor:
21
43
  def __init__(
@@ -81,6 +103,274 @@ class EnsureAspectSizeProcessor:
81
103
 
82
104
  schema.fields = accepted_fields
83
105
 
106
+ def ensure_query_subjects_size(
107
+ self, entity_urn: str, query_subjects: QuerySubjectsClass
108
+ ) -> None:
109
+ """
110
+ Ensure query subjects aspect does not exceed allowed size by removing column-level lineage first,
111
+ then table lineage if necessary.
112
+ """
113
+ if not query_subjects.subjects:
114
+ return
115
+
116
+ total_subjects_size = 0
117
+ accepted_table_level_subjects = []
118
+ accepted_column_level_subjects = []
119
+ column_level_subjects_with_sizes = []
120
+ table_level_subjects_with_sizes = []
121
+
122
+ # Separate column-level and table-level subjects
123
+ for subject in query_subjects.subjects:
124
+ subject_size = len(json.dumps(pre_json_transform(subject.to_obj())))
125
+
126
+ if subject.entity.startswith("urn:li:schemaField:"):
127
+ column_level_subjects_with_sizes.append((subject, subject_size))
128
+ else:
129
+ table_level_subjects_with_sizes.append((subject, subject_size))
130
+
131
+ # Once we find one that doesn't fit, stop everything else to prevent inconsistencies
132
+ first_skip_done = False
133
+
134
+ # First, try to include all table-level subjects
135
+ for subject, subject_size in table_level_subjects_with_sizes:
136
+ if total_subjects_size + subject_size < self.payload_constraint:
137
+ accepted_table_level_subjects.append(subject)
138
+ total_subjects_size += subject_size
139
+ else:
140
+ first_skip_done = True
141
+ break
142
+
143
+ # Then, add column-level subjects if there's remaining space
144
+ # Only process if we successfully included all table-level subjects
145
+ if not first_skip_done:
146
+ for subject, subject_size in column_level_subjects_with_sizes:
147
+ if total_subjects_size + subject_size < self.payload_constraint:
148
+ accepted_column_level_subjects.append(subject)
149
+ total_subjects_size += subject_size
150
+ else:
151
+ first_skip_done = True
152
+ break
153
+
154
+ if first_skip_done:
155
+ # Log aggregate warnings
156
+ table_level_skipped_count = len(table_level_subjects_with_sizes) - len(
157
+ accepted_table_level_subjects
158
+ )
159
+ column_level_skipped_count = len(column_level_subjects_with_sizes) - len(
160
+ accepted_column_level_subjects
161
+ )
162
+
163
+ self._maybe_warn_query_subjects(
164
+ entity_urn, table_level_skipped_count, "table-level lineage subjects"
165
+ )
166
+ self._maybe_warn_query_subjects(
167
+ entity_urn, column_level_skipped_count, "column-level lineage subjects"
168
+ )
169
+
170
+ query_subjects.subjects = (
171
+ accepted_table_level_subjects + accepted_column_level_subjects
172
+ )
173
+
174
+ def _maybe_warn_query_subjects(
175
+ self, entity_urn: str, skipped_count: int, item_type: str
176
+ ) -> None:
177
+ """Log warning for query subjects truncation if any items were skipped."""
178
+ if skipped_count > 0:
179
+ self.report.warning(
180
+ title="Query subjects truncated due to size constraint",
181
+ message="Query subjects contained too much data and would have caused ingestion to fail",
182
+ context=f"Skipped {skipped_count} {item_type} for {entity_urn} due to aspect size constraints",
183
+ )
184
+
185
+ def _maybe_warn_upstream_lineage(
186
+ self, entity_urn: str, skipped_count: int, item_type: str
187
+ ) -> None:
188
+ """Log warning for upstream lineage truncation if any items were skipped."""
189
+ if skipped_count > 0:
190
+ self.report.warning(
191
+ title="Upstream lineage truncated due to size constraint",
192
+ message="Upstream lineage contained too much data and would have caused ingestion to fail",
193
+ context=f"Skipped {skipped_count} {item_type} for {entity_urn} due to aspect size constraints",
194
+ )
195
+
196
+ def ensure_upstream_lineage_size( # noqa: C901
197
+ self, entity_urn: str, upstream_lineage: UpstreamLineageClass
198
+ ) -> None:
199
+ """
200
+ Ensure upstream lineage aspect does not exceed allowed size by removing lineage in priority order:
201
+ first NONE fine-grained lineages (lowest priority), then FIELD_SET fine-grained lineages,
202
+ then DATASET fine-grained lineages, and finally upstreams (highest priority).
203
+ """
204
+ if not upstream_lineage.fineGrainedLineages and not upstream_lineage.upstreams:
205
+ return
206
+
207
+ total_lineage_size = 0
208
+ accepted_upstreams = []
209
+ accepted_dataset_fg_lineages = []
210
+ accepted_field_set_fg_lineages = []
211
+ accepted_none_fg_lineages = []
212
+ upstream_items_with_sizes = []
213
+ dataset_fg_items_with_sizes = []
214
+ field_set_fg_items_with_sizes = []
215
+ none_fg_items_with_sizes = []
216
+
217
+ # Add upstreams (highest priority)
218
+ if upstream_lineage.upstreams:
219
+ for upstream in upstream_lineage.upstreams:
220
+ upstream_size = len(json.dumps(pre_json_transform(upstream.to_obj())))
221
+ upstream_items_with_sizes.append((upstream, upstream_size))
222
+
223
+ # Separate fine-grained lineage items by upstreamType: DATASET > FIELD_SET > NONE
224
+ if upstream_lineage.fineGrainedLineages:
225
+ for fg_lineage in upstream_lineage.fineGrainedLineages:
226
+ fg_lineage_size = len(
227
+ json.dumps(pre_json_transform(fg_lineage.to_obj()))
228
+ )
229
+
230
+ upstream_type_str = str(fg_lineage.upstreamType)
231
+ if upstream_type_str == "DATASET":
232
+ dataset_fg_items_with_sizes.append((fg_lineage, fg_lineage_size))
233
+ elif upstream_type_str == "FIELD_SET":
234
+ field_set_fg_items_with_sizes.append((fg_lineage, fg_lineage_size))
235
+ elif upstream_type_str == "NONE":
236
+ none_fg_items_with_sizes.append((fg_lineage, fg_lineage_size))
237
+
238
+ # Once we find one that doesn't fit, stop everything else to prevent inconsistencies
239
+ first_skip_done = False
240
+
241
+ # First, include all upstreams (highest priority)
242
+ for item, item_size in upstream_items_with_sizes:
243
+ if total_lineage_size + item_size < self.payload_constraint:
244
+ accepted_upstreams.append(item)
245
+ total_lineage_size += item_size
246
+ else:
247
+ first_skip_done = True
248
+ break
249
+
250
+ # Second, include DATASET fine-grained lineages if no upstreams were skipped
251
+ if not first_skip_done:
252
+ for fg_lineage, fg_lineage_size in dataset_fg_items_with_sizes:
253
+ if total_lineage_size + fg_lineage_size < self.payload_constraint:
254
+ accepted_dataset_fg_lineages.append(fg_lineage)
255
+ total_lineage_size += fg_lineage_size
256
+ else:
257
+ first_skip_done = True
258
+ break
259
+
260
+ # Third, include FIELD_SET fine-grained lineages if no higher priority items were skipped
261
+ if not first_skip_done:
262
+ for fg_lineage, fg_lineage_size in field_set_fg_items_with_sizes:
263
+ if total_lineage_size + fg_lineage_size < self.payload_constraint:
264
+ accepted_field_set_fg_lineages.append(fg_lineage)
265
+ total_lineage_size += fg_lineage_size
266
+ else:
267
+ first_skip_done = True
268
+ break
269
+
270
+ # Finally, include NONE fine-grained lineages if no higher priority items were skipped
271
+ if not first_skip_done:
272
+ for fg_lineage, fg_lineage_size in none_fg_items_with_sizes:
273
+ if total_lineage_size + fg_lineage_size < self.payload_constraint:
274
+ accepted_none_fg_lineages.append(fg_lineage)
275
+ total_lineage_size += fg_lineage_size
276
+ else:
277
+ first_skip_done = True
278
+ break
279
+
280
+ # Log aggregate warnings instead of per-item warnings
281
+ if first_skip_done:
282
+ upstreams_skipped_count = len(upstream_items_with_sizes) - len(
283
+ accepted_upstreams
284
+ )
285
+ dataset_fg_skipped_count = len(dataset_fg_items_with_sizes) - len(
286
+ accepted_dataset_fg_lineages
287
+ )
288
+ field_set_fg_skipped_count = len(field_set_fg_items_with_sizes) - len(
289
+ accepted_field_set_fg_lineages
290
+ )
291
+ none_fg_skipped_count = len(none_fg_items_with_sizes) - len(
292
+ accepted_none_fg_lineages
293
+ )
294
+
295
+ self._maybe_warn_upstream_lineage(
296
+ entity_urn, upstreams_skipped_count, "upstream datasets"
297
+ )
298
+ self._maybe_warn_upstream_lineage(
299
+ entity_urn,
300
+ dataset_fg_skipped_count,
301
+ "dataset-level fine-grained lineages",
302
+ )
303
+ self._maybe_warn_upstream_lineage(
304
+ entity_urn,
305
+ field_set_fg_skipped_count,
306
+ "field-set-level fine-grained lineages",
307
+ )
308
+ self._maybe_warn_upstream_lineage(
309
+ entity_urn, none_fg_skipped_count, "none-level fine-grained lineages"
310
+ )
311
+
312
+ # Combine all accepted fine-grained lineages
313
+ accepted_fine_grained_lineages = (
314
+ accepted_dataset_fg_lineages
315
+ + accepted_field_set_fg_lineages
316
+ + accepted_none_fg_lineages
317
+ )
318
+
319
+ upstream_lineage.upstreams = accepted_upstreams
320
+ upstream_lineage.fineGrainedLineages = (
321
+ accepted_fine_grained_lineages if accepted_fine_grained_lineages else None
322
+ )
323
+
324
+ def ensure_query_properties_size(
325
+ self, entity_urn: str, query_properties: QueryPropertiesClass
326
+ ) -> None:
327
+ """
328
+ Ensure query properties aspect does not exceed allowed size by truncating the query statement value.
329
+ Uses a configurable max payload size that is the minimum between QUERY_PROPERTIES_STATEMENT_MAX_PAYLOAD_BYTES
330
+ and INGEST_MAX_PAYLOAD_BYTES.
331
+
332
+ We have found surprisingly large query statements (e.g. 20MB+) that caused ingestion to fail;
333
+ that was INSERT INTO VALUES with huge list of values.
334
+ """
335
+ if not query_properties.statement or not query_properties.statement.value:
336
+ return
337
+
338
+ max_payload_size = min(
339
+ QUERY_PROPERTIES_STATEMENT_MAX_PAYLOAD_BYTES, self.payload_constraint
340
+ )
341
+
342
+ current_size = len(json.dumps(pre_json_transform(query_properties.to_obj())))
343
+
344
+ if current_size < max_payload_size:
345
+ return
346
+
347
+ reduction_needed = (
348
+ current_size - max_payload_size + QUERY_STATEMENT_TRUNCATION_BUFFER
349
+ )
350
+
351
+ statement_value_size = len(query_properties.statement.value)
352
+ original_statement_size = statement_value_size
353
+
354
+ # Only truncate if reduction is actually needed and possible
355
+ if statement_value_size > reduction_needed > 0:
356
+ new_statement_length = statement_value_size - reduction_needed
357
+ truncated_statement = query_properties.statement.value[
358
+ :new_statement_length
359
+ ]
360
+
361
+ truncation_message = f"... [original value was {original_statement_size} bytes and truncated to {new_statement_length} bytes]"
362
+ query_properties.statement.value = truncated_statement + truncation_message
363
+
364
+ self.report.warning(
365
+ title="Query properties truncated due to size constraint",
366
+ message="Query properties contained too much data and would have caused ingestion to fail",
367
+ context=f"Query statement was truncated from {original_statement_size} to {new_statement_length} characters for {entity_urn} due to aspect size constraints",
368
+ )
369
+ else:
370
+ logger.warning(
371
+ f"Cannot truncate query statement for {entity_urn} as it is smaller than or equal to the required reduction size {reduction_needed}. That means that 'ensure_query_properties_size' must be extended to trim other fields different than statement."
372
+ )
373
+
84
374
  def ensure_aspect_size(
85
375
  self,
86
376
  stream: Iterable[MetadataWorkUnit],
@@ -96,4 +386,10 @@ class EnsureAspectSizeProcessor:
96
386
  self.ensure_schema_metadata_size(wu.get_urn(), schema)
97
387
  elif profile := wu.get_aspect_of_type(DatasetProfileClass):
98
388
  self.ensure_dataset_profile_size(wu.get_urn(), profile)
389
+ elif query_subjects := wu.get_aspect_of_type(QuerySubjectsClass):
390
+ self.ensure_query_subjects_size(wu.get_urn(), query_subjects)
391
+ elif upstream_lineage := wu.get_aspect_of_type(UpstreamLineageClass):
392
+ self.ensure_upstream_lineage_size(wu.get_urn(), upstream_lineage)
393
+ elif query_properties := wu.get_aspect_of_type(QueryPropertiesClass):
394
+ self.ensure_query_properties_size(wu.get_urn(), query_properties)
99
395
  yield wu