acryl-datahub 1.2.0.7rc4__py3-none-any.whl → 1.2.0.8rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (35) hide show
  1. {acryl_datahub-1.2.0.7rc4.dist-info → acryl_datahub-1.2.0.8rc2.dist-info}/METADATA +2612 -2612
  2. {acryl_datahub-1.2.0.7rc4.dist-info → acryl_datahub-1.2.0.8rc2.dist-info}/RECORD +35 -33
  3. datahub/_version.py +1 -1
  4. datahub/cli/delete_cli.py +1 -0
  5. datahub/ingestion/api/report.py +4 -0
  6. datahub/ingestion/autogenerated/capability_summary.json +1 -1
  7. datahub/ingestion/graph/client.py +8 -1
  8. datahub/ingestion/source/datahub/config.py +4 -0
  9. datahub/ingestion/source/datahub/datahub_database_reader.py +6 -1
  10. datahub/ingestion/source/iceberg/iceberg.py +74 -32
  11. datahub/ingestion/source/metadata/lineage.py +8 -8
  12. datahub/ingestion/source/redshift/redshift.py +1 -1
  13. datahub/ingestion/source/sql/athena.py +95 -18
  14. datahub/ingestion/source/sql/athena_properties_extractor.py +43 -25
  15. datahub/ingestion/source/superset.py +3 -2
  16. datahub/ingestion/source/tableau/tableau.py +8 -5
  17. datahub/metadata/_internal_schema_classes.py +207 -12
  18. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  19. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +6 -0
  20. datahub/metadata/schema.avsc +160 -12
  21. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  22. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +9 -1
  23. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +77 -1
  24. datahub/metadata/schemas/DataProductKey.avsc +2 -1
  25. datahub/metadata/schemas/DomainKey.avsc +2 -1
  26. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  27. datahub/metadata/schemas/GlossaryTermKey.avsc +2 -1
  28. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  29. datahub/metadata/schemas/StructuredPropertyDefinition.avsc +0 -3
  30. datahub/sql_parsing/sqlglot_lineage.py +121 -28
  31. datahub/sql_parsing/sqlglot_utils.py +12 -1
  32. {acryl_datahub-1.2.0.7rc4.dist-info → acryl_datahub-1.2.0.8rc2.dist-info}/WHEEL +0 -0
  33. {acryl_datahub-1.2.0.7rc4.dist-info → acryl_datahub-1.2.0.8rc2.dist-info}/entry_points.txt +0 -0
  34. {acryl_datahub-1.2.0.7rc4.dist-info → acryl_datahub-1.2.0.8rc2.dist-info}/licenses/LICENSE +0 -0
  35. {acryl_datahub-1.2.0.7rc4.dist-info → acryl_datahub-1.2.0.8rc2.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
- acryl_datahub-1.2.0.7rc4.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.2.0.8rc2.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=82EBfeYSMr3rKnGGc8fqqoIajsOjaTI4AIrgFadY4GE,323
4
+ datahub/_version.py,sha256=61ZxWUlQVKM0CF2BBOi-9OpFZENqh_B4oxFCZYQSJBc,323
5
5
  datahub/entrypoints.py,sha256=9Qf-37rNnTzbGlx8S75OCDazIclFp6zWNcCEL1zCZto,9015
6
6
  datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -71,7 +71,7 @@ datahub/cli/check_cli.py,sha256=PVd3DT9TOK7Ejd0ODKnbRrBnPxsZLfUWgubFRUKwKAA,1697
71
71
  datahub/cli/cli_utils.py,sha256=0jTTAKuDZ8GzZwGHYytcT_MPR3Rb2DAcbr9n1H2T2sE,16170
72
72
  datahub/cli/config_utils.py,sha256=EeBGfhmf4AxYoTfnZ4GSiGIgpzJFkduNjN_FwmxZGhA,4889
73
73
  datahub/cli/container_cli.py,sha256=D0zWP3_3aww8_RTkMugOoOlILz3dPJ0TE9asQDLCm6E,1697
74
- datahub/cli/delete_cli.py,sha256=K6DwCAV9tjIirF4Im3Pu4yRRiBiMaEw4jc4LBCqkpJQ,26482
74
+ datahub/cli/delete_cli.py,sha256=0YJeWuXPGY0kbSn1AXK1-8SfCGBxb78ZbO53RAgyjQg,26515
75
75
  datahub/cli/docker_check.py,sha256=NdMmIG8H15QbJqzuHchrs99Mva7kVE-t7FTd6t3qT_c,12978
76
76
  datahub/cli/docker_cli.py,sha256=aAZIF3oblTSXzQVSanbphd3dCvdypHCyuMt1u3_QZ9M,33067
77
77
  datahub/cli/env_utils.py,sha256=RQzjg4JE29hjPt4v7p-RuqoOr99w8E3DBHWiN2Sm7T4,252
@@ -148,7 +148,7 @@ datahub/ingestion/api/incremental_properties_helper.py,sha256=KzdxdrQtaMV2XMHfPs
148
148
  datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py,sha256=3lLdkkxVqE9MVc26cdXImPeWy16az5BwgcorWxeBV50,1759
149
149
  datahub/ingestion/api/pipeline_run_listener.py,sha256=5uBP__LbMQxJ2utlf07cIzQINqPbUOKiZyOJta6a0og,713
150
150
  datahub/ingestion/api/registry.py,sha256=LbdZr89465Lj7ptQRVB4vI1JR1igWABvQFj9-WX63bI,7454
151
- datahub/ingestion/api/report.py,sha256=-xduHhIRUgf5G51mUb3uTi6GBxVli6ZK25AS5ikXuII,18312
151
+ datahub/ingestion/api/report.py,sha256=OuVZAgNkzSGkKhpOhpqebd9_bEsBCTeoWR1VcanPeD4,18509
152
152
  datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
153
153
  datahub/ingestion/api/sink.py,sha256=GZt48PV56FAhNoma-V5EwwRZvezhb40YH_zprm8_Yo0,4961
154
154
  datahub/ingestion/api/source.py,sha256=uf0fNbiOy0bS_aKFOcNv6NvuZe0LSDIDdNza9hraP7s,21857
@@ -159,7 +159,7 @@ datahub/ingestion/api/auto_work_units/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCe
159
159
  datahub/ingestion/api/auto_work_units/auto_dataset_properties_aspect.py,sha256=ID_6N3nWl2qohsSGizUCqo3d2MNyDeVbyWroQpSOSsc,5059
160
160
  datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py,sha256=0BwkpLhORbsiTHq0g_N_1cVVoZYdLR3qz02mNmsV9-M,4444
161
161
  datahub/ingestion/autogenerated/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
162
- datahub/ingestion/autogenerated/capability_summary.json,sha256=4dBPgl4z4kIDPhk3L8GsAzSzJWGpsRlDt0wuPdVey5s,110230
162
+ datahub/ingestion/autogenerated/capability_summary.json,sha256=78zS8mOo54IkG9l5KJ_YBXkZmERSSo3pDIrefQx-PWA,110199
163
163
  datahub/ingestion/autogenerated/lineage.json,sha256=8BdZF-5V5kJbX4mfFav8Zg-jHjzfkAEGk-pu1atLN4I,10029
164
164
  datahub/ingestion/autogenerated/lineage_helper.py,sha256=I_k1pZSCCCjDbUVifPTfy6fkmV8jqdVhbirE8EkpmxI,4748
165
165
  datahub/ingestion/extractor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -181,7 +181,7 @@ datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGo
181
181
  datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
182
182
  datahub/ingestion/glossary/datahub_classifier.py,sha256=O7wm6gQT1Jf2QSKdWjJQbS5oSzJwplXzfza26Gdq5Mg,7555
183
183
  datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
184
- datahub/ingestion/graph/client.py,sha256=xUURT6KxwOhwuAbUznxrOzmGuXxHI-3MmDgJQHFpaGk,74671
184
+ datahub/ingestion/graph/client.py,sha256=Orf9BHyTeHufB_LRIC-LyJj1kii73tyWTN_I1HJ6-_k,74955
185
185
  datahub/ingestion/graph/config.py,sha256=rmkcqAL8fJoY9QyAeS0Xm8HvwHzV3pCjY-Om-50JJTI,1015
186
186
  datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
187
187
  datahub/ingestion/graph/entity_versioning.py,sha256=nrcNz0Qm6kpE6oTu_mrYUQDx14KPspBTc6R9SyFUY6c,6901
@@ -228,7 +228,7 @@ datahub/ingestion/source/redash.py,sha256=YxjSad-X_wPmxYH8dJmFz_VCFhiLTCTSlK99Wd
228
228
  datahub/ingestion/source/salesforce.py,sha256=UttN3y4Ylbx_yCFCr-33wUEZFR48nTiYeUfOjGIFj2E,40872
229
229
  datahub/ingestion/source/source_registry.py,sha256=a2mLjJPLkSI-gYCTb_7U7Jo4D8jGknNQ_yScPIihXFk,1208
230
230
  datahub/ingestion/source/sql_queries.py,sha256=Zf6Y84WFCD-j0v4HdtcXshAPrtGq5InncXCjnLaAuS4,14657
231
- datahub/ingestion/source/superset.py,sha256=T3-DRuaSqIAQ9gfUenWzBgHcVnejv3yMHeyOK6BEGPo,53671
231
+ datahub/ingestion/source/superset.py,sha256=SLv6DqglPlFJwEskLOqEr-rfgCAb_2AXqWYupk8FqH4,53756
232
232
  datahub/ingestion/source/abs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
233
233
  datahub/ingestion/source/abs/config.py,sha256=WW9JWbzqAJDblAcJKtNeuBHqOeJsB57lW2PqSD65-BU,6729
234
234
  datahub/ingestion/source/abs/datalake_profiler_config.py,sha256=FfrcgK-JEF94vw-l3q6pN6FENXb-wZzW2w1VUZVkwW8,3620
@@ -293,9 +293,9 @@ datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=IYr5y8vy_6Ct
293
293
  datahub/ingestion/source/data_lake_common/object_store.py,sha256=i9Hgb8Ww23QD_jEjzj_2qxA8Nr56krnZfo1qyOWmH9M,23608
294
294
  datahub/ingestion/source/data_lake_common/path_spec.py,sha256=idEhxKXPCbkp7NzLmaTcS2ebAUwGZIBvn8zAUHcD1QM,23801
295
295
  datahub/ingestion/source/datahub/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
296
- datahub/ingestion/source/datahub/config.py,sha256=bjR1U3F85FbtgqmLDW-f_4dQvuw5AsJQxdQlOUeHDUk,5126
296
+ datahub/ingestion/source/datahub/config.py,sha256=ElMoJDuaByhR7ZgXmGTwHJDZDzfGydMkHlSLDnq2Ws0,5257
297
297
  datahub/ingestion/source/datahub/datahub_api_reader.py,sha256=hlKADVEPoTFiRGKqRsMF5mL4fSu_IrIW8Nx7LpEzvkM,2134
298
- datahub/ingestion/source/datahub/datahub_database_reader.py,sha256=gq0_o2Im83AvTDkA9eSrq6dDJbPysysxQnfNpO7a7gM,15297
298
+ datahub/ingestion/source/datahub/datahub_database_reader.py,sha256=KjVa7aoKALbqXvTo1iJpUfyJdW0_sxRe_VFh-9-FgrI,15599
299
299
  datahub/ingestion/source/datahub/datahub_kafka_reader.py,sha256=gnxhhlK-jrfnHqD_4eVmfcdtBNW6pi1N_qkDZ7uSb3o,4187
300
300
  datahub/ingestion/source/datahub/datahub_source.py,sha256=mpBrHxf3sitdPZni5B5atlEheQs8cMuN1i2q72eDelQ,8993
301
301
  datahub/ingestion/source/datahub/report.py,sha256=VHBfCbwFRzdLdB7hQG9ST4EiZxl_vBCU0XxGcZR6Xxs,940
@@ -366,7 +366,7 @@ datahub/ingestion/source/hex/mapper.py,sha256=IyDAE-TzZUji3ICI_9gkYC3dQN3gl6kERR
366
366
  datahub/ingestion/source/hex/model.py,sha256=eri4aRo1eXcE2SWjzCnPFMhzPTiJ8w8zC4GN7Lgpr74,1864
367
367
  datahub/ingestion/source/hex/query_fetcher.py,sha256=r9UvF_qwswkRlNY7AI8p46eqAYSxVtjVE2e7eO4XagA,13384
368
368
  datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
369
- datahub/ingestion/source/iceberg/iceberg.py,sha256=UWfI4sN5uO6f9KzxjY939a_BIkAnPf0ELCmFvf9KuYg,35427
369
+ datahub/ingestion/source/iceberg/iceberg.py,sha256=2E3mhvsIDSHDUd1Prb0nlZnGIsQLIuwNeFRxJPYyS-0,37042
370
370
  datahub/ingestion/source/iceberg/iceberg_common.py,sha256=CD_yHQ_wEgivyLQUTRO9BZJB29S7j5fUVllki-BPwUU,12292
371
371
  datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=9iwp2vpQTi4OMbIKoDZV5lAdvjMR0ls6Llpck9grJIE,9875
372
372
  datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -405,7 +405,7 @@ datahub/ingestion/source/looker/urn_functions.py,sha256=4VvqEfGvIMq3rNHHps0-HlPu
405
405
  datahub/ingestion/source/looker/view_upstream.py,sha256=4FCjZaU6p2G7npB2RJpP4Gv2yLjbvbsYWEbAg55IvjY,26110
406
406
  datahub/ingestion/source/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
407
407
  datahub/ingestion/source/metadata/business_glossary.py,sha256=T_RJHst6iQRghJNmLLPeSBMEDsbEKf3yBldOAgMcGuo,19666
408
- datahub/ingestion/source/metadata/lineage.py,sha256=PA4JwSeQ-30XFMN4O5tPwIu-hZF1e-xMZ_CnEUE2c-Q,9595
408
+ datahub/ingestion/source/metadata/lineage.py,sha256=YgerAUptUVMwrg-s_H8CwTKbbI91Ta74JE-rK3oGUSE,9611
409
409
  datahub/ingestion/source/mock_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
410
410
  datahub/ingestion/source/mock_data/datahub_mock_data.py,sha256=3i3SdBp267cZRszhmD_JWJLTGIot2FI8REFpjJQ4jD8,19822
411
411
  datahub/ingestion/source/mock_data/datahub_mock_data_report.py,sha256=sV_H7JgcuVbrpIBqtGse_BBigMdqP32ZXuanpeXmwVI,331
@@ -451,7 +451,7 @@ datahub/ingestion/source/redshift/exception.py,sha256=dxzYUIv5B_FAWhOuzG2u5We7FX
451
451
  datahub/ingestion/source/redshift/lineage.py,sha256=nqrvWJqaI493i1hIZ_7patrdOb16sZrgSSGapdMcEiU,31710
452
452
  datahub/ingestion/source/redshift/profile.py,sha256=H1Xtc2rXScUv4w0b2BbM7POjYEwqIql_rpWvlumY_EM,4309
453
453
  datahub/ingestion/source/redshift/query.py,sha256=vVIuNUaU4a7AfMFJZlgLuqi0cGVl0gVz8xZUSnPhWvs,47845
454
- datahub/ingestion/source/redshift/redshift.py,sha256=3cWlMKd_coGYhpP9tImkca0zbjhC5Gf8YIwgsWVVjao,41291
454
+ datahub/ingestion/source/redshift/redshift.py,sha256=zalndYg_LK5aJ8cX_ZuXLcTYajtlavmV-dmQIsjGxjg,41260
455
455
  datahub/ingestion/source/redshift/redshift_data_reader.py,sha256=zc69jwXHdF-w8J4Hq-ZQ6BjHQ75Ij2iNDMpoRJlcmlU,1724
456
456
  datahub/ingestion/source/redshift/redshift_schema.py,sha256=7F-l_omOuKMuGE_rBWXVPG_GWXFKnCMzC4frNxZB9cs,24800
457
457
  datahub/ingestion/source/redshift/report.py,sha256=O3QFozHlmMbH9b7KxbqhgTgr_0tCryj6FIzMiN6kRxw,3044
@@ -505,8 +505,8 @@ datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=dmgpwApayUIevyn6l55
505
505
  datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=c6wg_s97Hrckqi0BgAbmnnRQRDDda1-BHFLlnRx0xuw,35753
506
506
  datahub/ingestion/source/snowflake/stored_proc_lineage.py,sha256=rOb78iHiWiK8v8WdVs1xDwVut4Y0OHmszej6IopQfCo,5341
507
507
  datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
508
- datahub/ingestion/source/sql/athena.py,sha256=TPKwL9oRiZlVnqIsOSBWUEwyvoW-1ssXvY4PfjxOR6g,28175
509
- datahub/ingestion/source/sql/athena_properties_extractor.py,sha256=OS2E2HD7xTn0MBy__pIvjKXMfGp02Zf93hQRAPMXE_Y,28533
508
+ datahub/ingestion/source/sql/athena.py,sha256=S5R-3HL7nN2FcryEUlwi2fFWzWQb55iycYFSlTRLcoc,31460
509
+ datahub/ingestion/source/sql/athena_properties_extractor.py,sha256=mKu0ZGyt8qvpWoP6CUf0vLUAz5k7GO2keof5KJd4Wak,29469
510
510
  datahub/ingestion/source/sql/clickhouse.py,sha256=zd5qE6XPw0AXtY_71-n0yz4ua69xP3oxMuIoabAuT3Q,25987
511
511
  datahub/ingestion/source/sql/cockroachdb.py,sha256=WoOKCq7YjsuzSPm1SmKIYZ9CrvlSF8zWmP1fNHn4G3Q,1360
512
512
  datahub/ingestion/source/sql/druid.py,sha256=_tzgTa5jhPUXk6WCmS7p10feCwJm6yUFcOgMZA-OcE8,2922
@@ -554,7 +554,7 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
554
554
  datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=DziD57PbHn2Tcy51tYXCG-GQgyTGMUxnkuzVS_xihFY,4079
555
555
  datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
556
556
  datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
557
- datahub/ingestion/source/tableau/tableau.py,sha256=aadKU4hFzsD2zhaFysTReUyg6FOAGE_HtZjuoLsIy28,155885
557
+ datahub/ingestion/source/tableau/tableau.py,sha256=OCdEHTYhJllnvQKalaEdiRATt04syAs2-O1oBYhCu74,156162
558
558
  datahub/ingestion/source/tableau/tableau_common.py,sha256=4cUm3E8wLfjLSjcAXXWDWVUXAEho_hbsQa2BzAF-vtM,27012
559
559
  datahub/ingestion/source/tableau/tableau_constant.py,sha256=4ngrOwqxf4cgbLR3i0OKI4pUxmHMABKyywfhXQ0GazA,2592
560
560
  datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=nSyx9RzC6TCQDm-cTVJ657qT8iDwzk_8JMKpohhmOc4,1046
@@ -635,8 +635,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
635
635
  datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
636
636
  datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
637
637
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
638
- datahub/metadata/_internal_schema_classes.py,sha256=dA_UHbIGJZTuffGPnvUQ9rjdWHKubr89z4INW_K7RTw,1053890
639
- datahub/metadata/schema.avsc,sha256=o9lFI4loFPIpA8EZZa321MvVVvmzUZ_za_6BFF4en9M,698792
638
+ datahub/metadata/_internal_schema_classes.py,sha256=AExBM83VHb4vsnAWF2nCR2x6hGg8NIsGXtAIAmYMs04,1061410
639
+ datahub/metadata/schema.avsc,sha256=HluHCVmYg7RpOaw9xUMigEJBxlHF5WLdNcqVBKPeoOU,704514
640
640
  datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
641
641
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
642
642
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
@@ -708,12 +708,13 @@ datahub/metadata/com/linkedin/pegasus2avro/schema/__init__.py,sha256=7JpzLs6S_Ey
708
708
  datahub/metadata/com/linkedin/pegasus2avro/schemafield/__init__.py,sha256=HTWeznycKnHBfPEGcCHXPEz83Iq9ypjNaoSfeQeDU9g,397
709
709
  datahub/metadata/com/linkedin/pegasus2avro/secret/__init__.py,sha256=qk61EqqVZF6k1Ct6t4Uo-pLb0WtM1EwJKn1XjVy9LHE,305
710
710
  datahub/metadata/com/linkedin/pegasus2avro/settings/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
711
+ datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py,sha256=ASD1mi7q19HVN9QKzbu2T3GxzdDQ_LqZNTehV3SI12c,531
711
712
  datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py,sha256=hKpTDXj2YEBja4hk2fZx3G9yzJ7eCFxcGCbbN0NydTk,933
712
713
  datahub/metadata/com/linkedin/pegasus2avro/step/__init__.py,sha256=HLNNbqBlyhcg09eXWx_AMD_JoOtBPYEi2kv12PE0R9E,329
713
714
  datahub/metadata/com/linkedin/pegasus2avro/structured/__init__.py,sha256=Cry61gPw6m5MQuJpPxADRm3jhI0XVqzznyD3fVKMkvc,1013
714
715
  datahub/metadata/com/linkedin/pegasus2avro/tag/__init__.py,sha256=Odb4mzloKJIlpoFHODEIxt_OIgFNrZExcyQtvXxjOFQ,290
715
716
  datahub/metadata/com/linkedin/pegasus2avro/telemetry/__init__.py,sha256=N4CJwzAqTrRoCQ2Aoa_e8cUZI_fzn9Zdo2okvO-_nWE,302
716
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py,sha256=CK8dZFt2A11dG9QnjxDrr1QbhP0MS6c4mMXHK688Azc,924
717
+ datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py,sha256=-TWG4__SDxZfD9X6VDalU1ErZRCtyRPwoI8eN4wSta4,1254
717
718
  datahub/metadata/com/linkedin/pegasus2avro/test/__init__.py,sha256=Z4DlDtf-NELFpx44Pk4RL1JlGuxtgEAMa6Sko8QBsGw,711
718
719
  datahub/metadata/com/linkedin/pegasus2avro/timeseries/__init__.py,sha256=6Pbit2drar8n99RFNQiXfYj7PhIzrO1SIpsGELZR4oA,637
719
720
  datahub/metadata/com/linkedin/pegasus2avro/upgrade/__init__.py,sha256=o3U2TuzRSU1uPL-4AOMCPDqEwngqRb6g4-CBFY7eSvQ,525
@@ -729,6 +730,7 @@ datahub/metadata/schemas/AssertionActions.avsc,sha256=zrvXzX2Nv_dmK6I3ZXCVWQ1bVs
729
730
  datahub/metadata/schemas/AssertionInfo.avsc,sha256=djiUVdw0pGd_Ex7uJspasTx2xwwCnd9cpItd76VJqYw,125296
730
731
  datahub/metadata/schemas/AssertionKey.avsc,sha256=EjNaTyzGxtWzTsZd87P2ZSaGX5dn8Y7HGBdqvlQVrFI,638
731
732
  datahub/metadata/schemas/AssertionRunEvent.avsc,sha256=FUyV73bUliBC-a_XFUlfgh75o99-lu1fl36b2q8Pqx4,12886
733
+ datahub/metadata/schemas/AssetSettings.avsc,sha256=LBmSybFUBkQPVmIEF1GwQl0ePmTrXlm0kHbKRIm--lo,2004
732
734
  datahub/metadata/schemas/BrowsePaths.avsc,sha256=NR_4dKuJMk1X2RB4DLkHVSqyMQc4PvT7eR0n6lM5aOM,654
733
735
  datahub/metadata/schemas/BrowsePathsV2.avsc,sha256=rpcXUONOmxCzTzUJZ4UxR5rk3AotiaHkCfqM5uJELjY,1876
734
736
  datahub/metadata/schemas/BusinessAttributeInfo.avsc,sha256=5HpEN9ZP6qdkcIib01F9n54dHoNDrGrml_9o_ZO5JTc,22062
@@ -769,9 +771,9 @@ datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc,sha256=4wac7sluRIq-0ZjO
769
771
  datahub/metadata/schemas/DataHubIngestionSourceKey.avsc,sha256=TGmm9WEGTaABs7kt5Uc-N-kbc5Sd-2sQwx-JpfAptvw,545
770
772
  datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc,sha256=q6ZyMoxInwmrkrXkUgMe-i-WZzAxbjcvJ-EI99SnEp8,599
771
773
  datahub/metadata/schemas/DataHubPageModuleKey.avsc,sha256=NyFN8cVO6s6rtgoLGJJGfcPfpGr5PfmZlIhM6ajldfQ,460
772
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc,sha256=2HK5h0bKefR1GNLYLyMrTTDwRPvyDciaLgiJHNDkinc,9878
774
+ datahub/metadata/schemas/DataHubPageModuleProperties.avsc,sha256=53Fj4ztBJqo9QMWuza2Kdtfpr2nTOTW0XuuXW77ugB8,10347
773
775
  datahub/metadata/schemas/DataHubPageTemplateKey.avsc,sha256=0sVqwL97Rp8YHPytp2RqUP5hIW048hmT2hPNP5k6arc,472
774
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc,sha256=0ndN64UNAADL6G_GVjJLHbe_dBnWhVRjtI3MilOlHQc,5651
776
+ datahub/metadata/schemas/DataHubPageTemplateProperties.avsc,sha256=FyNcZIniQy9m6yN9DT4XsPkDrxUsU7tRTqmfdGoEtMU,8565
775
777
  datahub/metadata/schemas/DataHubPersonaInfo.avsc,sha256=OUvbTgPQsBtzkDDb9pxHXpQ6A7dkL77ZnCXZ-MLEG14,227
776
778
  datahub/metadata/schemas/DataHubPersonaKey.avsc,sha256=ddj-DhXa0_YMdLaGkKLLSklfIeDRvSwPXu8o__YEXUE,448
777
779
  datahub/metadata/schemas/DataHubPolicyInfo.avsc,sha256=yBQe7pAuTMg9aovhugF4EkCRSHO_AN2TP_NM-0-Jg3A,10037
@@ -805,7 +807,7 @@ datahub/metadata/schemas/DataProcessInstanceProperties.avsc,sha256=2qsDFeSA2-ag5
805
807
  datahub/metadata/schemas/DataProcessInstanceRelationships.avsc,sha256=VhBpnyGGvO06WEnM6zy4PmjiT0nivRQfkSdJCUgIavw,2358
806
808
  datahub/metadata/schemas/DataProcessInstanceRunEvent.avsc,sha256=zwTYULEnpMbqwkLN8NbXW9PQWFG4X6TZkZwTQ1Wb53Y,6713
807
809
  datahub/metadata/schemas/DataProcessKey.avsc,sha256=ZZE2HN4mwZtm_TJNcdohFS97WXytFq9HAs_-shor6sY,2518
808
- datahub/metadata/schemas/DataProductKey.avsc,sha256=SyjmL2ieea1P6uipXst37mD5NdGPTqDvJAL3CVo91wk,661
810
+ datahub/metadata/schemas/DataProductKey.avsc,sha256=ECDUbxMwvmgI3tTRbO7UXncbNJUrqLkHTSDoMtBpYYY,684
809
811
  datahub/metadata/schemas/DataProductProperties.avsc,sha256=Lc3duV7YMJLvo_RwckLbW4bbmPrhSS1D-bxVVboNX2c,6930
810
812
  datahub/metadata/schemas/DataTransformLogic.avsc,sha256=nHTH6UzJ2Zz88N2aWa96hawLUR20HP7eSynfPtI1kzg,2111
811
813
  datahub/metadata/schemas/DataTypeInfo.avsc,sha256=MCjzal71P8uIXZg161LrU8rZTJocZeizK-YxYA0Det0,704
@@ -821,7 +823,7 @@ datahub/metadata/schemas/DatasetUsageStatistics.avsc,sha256=jgF1u31kP1XAnnV2B0X8
821
823
  datahub/metadata/schemas/Deprecation.avsc,sha256=p8SBIuKP3XVGeaBI7rROpLNACuoX8eMLRlZz8lGOYV8,1354
822
824
  datahub/metadata/schemas/DisplayProperties.avsc,sha256=MTa_g2s0roxNFFggWU8rslUH3UFe3xe11uUXyh0Go_I,1732
823
825
  datahub/metadata/schemas/Documentation.avsc,sha256=9vIJG9B08FFrC3y5c1XVaT5U3c-b5sOAc5foUxMnyCs,4836
824
- datahub/metadata/schemas/DomainKey.avsc,sha256=TYCcJRWqwbxbQuR5E68pvdeAmfVdYsJuMNhTxVphbqg,676
826
+ datahub/metadata/schemas/DomainKey.avsc,sha256=hDlX4jJTeAXW_VpKEhF8w5_tMZi7JRaVGqbvJL7GojE,699
825
827
  datahub/metadata/schemas/DomainProperties.avsc,sha256=6do6wZ9G6gyt1QowQyi1xldqgdTXspb05FaqWpKJ6eM,3843
826
828
  datahub/metadata/schemas/Domains.avsc,sha256=5mRQcba6Zmp6Y1srbxhOjETutg0I_ZG4ikuS2r9fkR0,804
827
829
  datahub/metadata/schemas/DynamicFormAssignment.avsc,sha256=SXRL5D6kIYWdGl3zLQYxPnkQX71JXQOKrjQNavFqVp0,7339
@@ -859,15 +861,15 @@ datahub/metadata/schemas/GlobalSettingsInfo.avsc,sha256=HKZisxW2HNm6kwJUBaAVol7s
859
861
  datahub/metadata/schemas/GlobalSettingsKey.avsc,sha256=Yj8s5IdM9yF7xrhJcLGCPCXBWqSsrPbufBaQjlZ3JlU,563
860
862
  datahub/metadata/schemas/GlobalTags.avsc,sha256=-SurkodMqTDnPpkRV6qYqmpNWjQNvynUiPZX7EhL5uc,4624
861
863
  datahub/metadata/schemas/GlossaryNodeInfo.avsc,sha256=G1Cb-w9VxIAEhNqyiEsDL_ABRO9QxyTpUANKU6DQrFw,1888
862
- datahub/metadata/schemas/GlossaryNodeKey.avsc,sha256=hT8ny4TL1WvgFvnaVBjuw6AWDiPDjpkh20f83ZT-UZ8,664
864
+ datahub/metadata/schemas/GlossaryNodeKey.avsc,sha256=KjdsiVm93SWXQdxllwE0GQei-NmvSatzrl9sQIA6TAE,687
863
865
  datahub/metadata/schemas/GlossaryRelatedTerms.avsc,sha256=ZTP0mrFD4y-C6JekRy8IVuHvICUkJib-ZAYD93Gv1tA,2763
864
866
  datahub/metadata/schemas/GlossaryTermInfo.avsc,sha256=j4s9NCyMOIF03HfaXoQEIkiMTRaCy_-euhenptfu7IA,2935
865
- datahub/metadata/schemas/GlossaryTermKey.avsc,sha256=00paBmYoFIlIUebS0X6BL9y5xqnedV3AjOiU0uOp1gI,858
867
+ datahub/metadata/schemas/GlossaryTermKey.avsc,sha256=_5Nf0kljjFVz0wNsE-BiIZvn6yL1iymaBQIptRKVwoU,881
866
868
  datahub/metadata/schemas/GlossaryTerms.avsc,sha256=ogOFO6Hr2Xb7s1JHqxsCPY8r_qY_9kwu69k5-E3j2BM,7123
867
869
  datahub/metadata/schemas/GroupMembership.avsc,sha256=wT3Hbpv2Z7V4X_-rIoed0cukAOMyYEL93udK8mMCjn0,557
868
870
  datahub/metadata/schemas/IcebergCatalogInfo.avsc,sha256=X9Ejqzn1DyxNIth7vDhtPjGG4xMPQMhl7f-S7fBFxek,691
869
871
  datahub/metadata/schemas/IcebergWarehouseInfo.avsc,sha256=0m7cQm8cCnBWNI5jGGgr5ZdOg66RQGWSf3gf8ay53So,2705
870
- datahub/metadata/schemas/IncidentInfo.avsc,sha256=L8xldmWyOW4Ml2Fm9XTRL13lP1CAEP0kgXsd_jLZaEU,12425
872
+ datahub/metadata/schemas/IncidentInfo.avsc,sha256=bxw_OwxhTarm1TxY4R-Q12RFQBIjeoUGgvgoeRiNT04,12428
871
873
  datahub/metadata/schemas/IncidentKey.avsc,sha256=Pip__DyNNTal7NxryM3kFi9qHlwntp1rIA8Al8Zz264,542
872
874
  datahub/metadata/schemas/IncidentSource.avsc,sha256=lY_SarA3cM55KNENcB5z1Gu2MygxEl9l7R8LdMak9AQ,1199
873
875
  datahub/metadata/schemas/IncidentsSummary.avsc,sha256=NTYp-6Oe92ALApbM3759TJ5pLXRArsSriIPq-f7w9vI,4514
@@ -929,7 +931,7 @@ datahub/metadata/schemas/SlackUserInfo.avsc,sha256=IY7InWaiDzJa3hJ9J4W3Eg8EUKuh0
929
931
  datahub/metadata/schemas/SourceCode.avsc,sha256=tUgo2rczO5x1fxw3fYNWQj-51vRNmNIj38b1wayA0aQ,1370
930
932
  datahub/metadata/schemas/Status.avsc,sha256=rPZSXSJdwnNywqNx2qll8cdt54aYgI-YUbRr3GK7h78,522
931
933
  datahub/metadata/schemas/StructuredProperties.avsc,sha256=qe45sKZ9XrLcf15Gt03Ttzt2J_kJYHvN-DAOSErSYuY,7028
932
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc,sha256=OIRGpyLUYuBmISPr4WR85Dz6RlqC0dwgP3vgKItcx1U,11795
934
+ datahub/metadata/schemas/StructuredPropertyDefinition.avsc,sha256=a-6TaOQ4A7LDFLshmaFRBcXjz11p4vM0Q3X35GN4Zo0,11737
933
935
  datahub/metadata/schemas/StructuredPropertyKey.avsc,sha256=lp7tQBgeriEU1YMQ6a4-6aUGSWDqNl00lLDym97j1yI,618
934
936
  datahub/metadata/schemas/StructuredPropertySettings.avsc,sha256=EDNlXfT1TqogfulCanIc-nuYO9ZxRFOGzD9tl3ZJdB8,3732
935
937
  datahub/metadata/schemas/SubTypes.avsc,sha256=bhXbzK020zDyQno97Xp05vmoMeZ82IGu2jz7pWDo3RQ,655
@@ -1004,8 +1006,8 @@ datahub/sql_parsing/split_statements.py,sha256=OIQXA9e4k3G9Z1y7rbgdtZhMWt4FPnq41
1004
1006
  datahub/sql_parsing/sql_parsing_aggregator.py,sha256=kxxSVe3YNoz_T2OG6-F30ZuXNSXuBZ-E54RqObo6qTI,72323
1005
1007
  datahub/sql_parsing/sql_parsing_common.py,sha256=cZ4WvVyHZuXDGjnBvKMX2_fz2EMextB5WQWcK0_saBo,3155
1006
1008
  datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
1007
- datahub/sql_parsing/sqlglot_lineage.py,sha256=oG7Zx2aOpm1tBQQowPgSufGlMpm5DaMGKTwk7gIkhX0,61450
1008
- datahub/sql_parsing/sqlglot_utils.py,sha256=TI11oBu1wrGeUuUGBg7hGTr6lTvztahdqiqXNJYRfbQ,14823
1009
+ datahub/sql_parsing/sqlglot_lineage.py,sha256=05ryqMkFK5_KDvr-ytXb1FyiC-SzaFB9_USnbL36LEg,66316
1010
+ datahub/sql_parsing/sqlglot_utils.py,sha256=zH8V9tAcSVO7Y8I3sIKPhs0D_9HzdNBlranBDmk1NB4,15454
1009
1011
  datahub/sql_parsing/tool_meta_extractor.py,sha256=5JsLPcKjuXSrPGxNIhRvX72dFPmlV33-hyvhJwlWxCY,7543
1010
1012
  datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1011
1013
  datahub/telemetry/stats.py,sha256=TwaQisQlD2Bk0uw__pP6u3Ovz9r-Ip4pCwpnto4r5e0,959
@@ -1112,8 +1114,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1112
1114
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1113
1115
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1114
1116
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1115
- acryl_datahub-1.2.0.7rc4.dist-info/METADATA,sha256=E15QJWL7lHS1mLe36RJZOMqHRbzN-EvYISjREnc2LZk,186633
1116
- acryl_datahub-1.2.0.7rc4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1117
- acryl_datahub-1.2.0.7rc4.dist-info/entry_points.txt,sha256=qopCAD6qrsijaZ9mTw3UlPCKsE00C3t9MbkkWow7pi4,9943
1118
- acryl_datahub-1.2.0.7rc4.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1119
- acryl_datahub-1.2.0.7rc4.dist-info/RECORD,,
1117
+ acryl_datahub-1.2.0.8rc2.dist-info/METADATA,sha256=Q8mmqp92zb_C5PbYaI7zQiAwkw9QrX0FUiCAGxtbzzg,186651
1118
+ acryl_datahub-1.2.0.8rc2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1119
+ acryl_datahub-1.2.0.8rc2.dist-info/entry_points.txt,sha256=qopCAD6qrsijaZ9mTw3UlPCKsE00C3t9MbkkWow7pi4,9943
1120
+ acryl_datahub-1.2.0.8rc2.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1121
+ acryl_datahub-1.2.0.8rc2.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.2.0.7rc4"
3
+ __version__ = "1.2.0.8rc2"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
datahub/cli/delete_cli.py CHANGED
@@ -469,6 +469,7 @@ def by_filter(
469
469
  query=query,
470
470
  status=soft_delete_filter,
471
471
  batch_size=batch_size,
472
+ skip_cache=True,
472
473
  )
473
474
  )
474
475
  if len(urns) == 0:
@@ -204,6 +204,7 @@ class ExamplesReport(Report, Closeable):
204
204
  samples: Dict[str, Dict[str, List[str]]] = field(
205
205
  default_factory=lambda: defaultdict(lambda: defaultdict(list))
206
206
  )
207
+ compute_stats_time_seconds: float = 0.0
207
208
  _file_based_dict: Optional[FileBackedDict[SourceReportSubtypes]] = None
208
209
 
209
210
  # We are adding this to make querying easier for fine-grained lineage
@@ -405,6 +406,7 @@ class ExamplesReport(Report, Closeable):
405
406
  self._update_file_based_dict(urn, entityType, aspectName, mcp)
406
407
 
407
408
  def compute_stats(self) -> None:
409
+ start_time = datetime.now()
408
410
  if self._file_based_dict is None:
409
411
  return
410
412
 
@@ -466,6 +468,8 @@ class ExamplesReport(Report, Closeable):
466
468
  list(self._lineage_aspects_seen), "lineage"
467
469
  )
468
470
  self._collect_samples_with_all_conditions("all_3")
471
+ end_time = datetime.now()
472
+ self.compute_stats_time_seconds += (end_time - start_time).total_seconds()
469
473
 
470
474
 
471
475
  class EntityFilterReport(ReportAttribute):
@@ -2678,7 +2678,7 @@
2678
2678
  },
2679
2679
  {
2680
2680
  "capability": "USAGE_STATS",
2681
- "description": "Enabled by default, can be disabled via configuration `include_usage_statistics`",
2681
+ "description": "Optionally enabled via `include_usage_statistics`",
2682
2682
  "subtype_modifier": null,
2683
2683
  "supported": true
2684
2684
  },
@@ -971,7 +971,8 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
971
971
  $orFilters: [AndFilterInput!],
972
972
  $batchSize: Int!,
973
973
  $scrollId: String,
974
- $skipCache: Boolean!) {
974
+ $skipCache: Boolean!,
975
+ $includeSoftDeleted: Boolean) {
975
976
 
976
977
  scrollAcrossEntities(input: {
977
978
  query: $query,
@@ -983,6 +984,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
983
984
  skipHighlighting: true
984
985
  skipAggregates: true
985
986
  skipCache: $skipCache
987
+ includeSoftDeleted: $includeSoftDeleted
986
988
  }
987
989
  }) {
988
990
  nextScrollId
@@ -1002,6 +1004,11 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1002
1004
  "orFilters": orFilters,
1003
1005
  "batchSize": batch_size,
1004
1006
  "skipCache": skip_cache,
1007
+ "includeSoftDeleted": (
1008
+ None
1009
+ if status is None
1010
+ else status != RemovedStatusFilter.NOT_SOFT_DELETED
1011
+ ),
1005
1012
  }
1006
1013
 
1007
1014
  for entity in self._scroll_across_entities(graphql_query, variables):
@@ -129,6 +129,10 @@ class DataHubSourceConfig(StatefulIngestionConfigBase):
129
129
  description="Timeout for each query in seconds. ",
130
130
  )
131
131
 
132
+ preserve_system_metadata: bool = Field(
133
+ default=True, description="Copy system metadata from the source system"
134
+ )
135
+
132
136
  @root_validator(skip_on_failure=True)
133
137
  def check_ingesting_data(cls, values):
134
138
  if (
@@ -380,7 +380,12 @@ class DataHubDatabaseReader:
380
380
  json_metadata = post_json_transform(
381
381
  json.loads(row["systemmetadata"] or "{}")
382
382
  )
383
- system_metadata = SystemMetadataClass.from_obj(json_metadata)
383
+ system_metadata = None
384
+ if self.config.preserve_system_metadata:
385
+ system_metadata = SystemMetadataClass.from_obj(json_metadata)
386
+ if system_metadata.properties:
387
+ is_no_op = system_metadata.properties.pop("isNoOp", None)
388
+ logger.debug(f"Removed potential value for is_no_op={is_no_op}")
384
389
  return MetadataChangeProposalWrapper(
385
390
  entityUrn=row["urn"],
386
391
  aspect=ASPECT_MAP[row["aspect"]].from_obj(json_aspect),
@@ -12,7 +12,7 @@ from pyiceberg.exceptions import (
12
12
  NoSuchNamespaceError,
13
13
  NoSuchPropertyException,
14
14
  NoSuchTableError,
15
- ServerError,
15
+ RESTError,
16
16
  )
17
17
  from pyiceberg.schema import Schema, SchemaVisitorPerPrimitiveType, visit
18
18
  from pyiceberg.table import Table
@@ -154,6 +154,10 @@ class IcebergSource(StatefulIngestionSourceBase):
154
154
  self.report: IcebergSourceReport = IcebergSourceReport()
155
155
  self.config: IcebergSourceConfig = config
156
156
  self.ctx: PipelineContext = ctx
157
+ self.stamping_processor = AutoSystemMetadata(
158
+ self.ctx
159
+ ) # single instance used only when processing namespaces
160
+ self.namespaces: List[Tuple[Identifier, str]] = []
157
161
 
158
162
  @classmethod
159
163
  def create(cls, config_dict: Dict, ctx: PipelineContext) -> "IcebergSource":
@@ -246,6 +250,13 @@ class IcebergSource(StatefulIngestionSourceBase):
246
250
  context=str(namespace),
247
251
  exc=e,
248
252
  )
253
+ except RESTError as e:
254
+ self.report.warning(
255
+ title="Iceberg REST Server Error",
256
+ message="Iceberg REST Server returned error status when trying to list tables for a namespace, skipping it.",
257
+ context=str(namespace),
258
+ exc=e,
259
+ )
249
260
  except Exception as e:
250
261
  self.report.report_failure(
251
262
  title="Error when processing a namespace",
@@ -322,10 +333,10 @@ class IcebergSource(StatefulIngestionSourceBase):
322
333
  context=dataset_name,
323
334
  exc=e,
324
335
  )
325
- except ServerError as e:
336
+ except RESTError as e:
326
337
  self.report.warning(
327
338
  title="Iceberg REST Server Error",
328
- message="Iceberg returned 500 HTTP status when trying to process a table, skipping it.",
339
+ message="Iceberg REST Server returned error status when trying to process a table, skipping it.",
329
340
  context=dataset_name,
330
341
  exc=e,
331
342
  )
@@ -365,7 +376,7 @@ class IcebergSource(StatefulIngestionSourceBase):
365
376
  )
366
377
 
367
378
  try:
368
- catalog = self.config.get_catalog()
379
+ self.catalog = self.config.get_catalog()
369
380
  except Exception as e:
370
381
  self.report.report_failure(
371
382
  title="Failed to initialize catalog object",
@@ -375,33 +386,7 @@ class IcebergSource(StatefulIngestionSourceBase):
375
386
  return
376
387
 
377
388
  try:
378
- stamping_processor = AutoSystemMetadata(self.ctx)
379
- namespace_ids = self._get_namespaces(catalog)
380
- namespaces: List[Tuple[Identifier, str]] = []
381
- for namespace in namespace_ids:
382
- namespace_repr = ".".join(namespace)
383
- LOGGER.debug(f"Processing namespace {namespace_repr}")
384
- namespace_urn = make_container_urn(
385
- NamespaceKey(
386
- namespace=namespace_repr,
387
- platform=self.platform,
388
- instance=self.config.platform_instance,
389
- env=self.config.env,
390
- )
391
- )
392
- namespace_properties: Properties = catalog.load_namespace_properties(
393
- namespace
394
- )
395
- namespaces.append((namespace, namespace_urn))
396
- for aspect in self._create_iceberg_namespace_aspects(
397
- namespace, namespace_properties
398
- ):
399
- yield stamping_processor.stamp_wu(
400
- MetadataChangeProposalWrapper(
401
- entityUrn=namespace_urn, aspect=aspect
402
- ).as_workunit()
403
- )
404
- LOGGER.debug("Namespaces ingestion completed")
389
+ yield from self._process_namespaces()
405
390
  except Exception as e:
406
391
  self.report.report_failure(
407
392
  title="Failed to list namespaces",
@@ -415,13 +400,70 @@ class IcebergSource(StatefulIngestionSourceBase):
415
400
  args_list=[
416
401
  (dataset_path, namespace_urn)
417
402
  for dataset_path, namespace_urn in self._get_datasets(
418
- catalog, namespaces
403
+ self.catalog, self.namespaces
419
404
  )
420
405
  ],
421
406
  max_workers=self.config.processing_threads,
422
407
  ):
423
408
  yield wu
424
409
 
410
+ def _try_processing_namespace(
411
+ self, namespace: Identifier
412
+ ) -> Iterable[MetadataWorkUnit]:
413
+ namespace_repr = ".".join(namespace)
414
+ try:
415
+ LOGGER.debug(f"Processing namespace {namespace_repr}")
416
+ namespace_urn = make_container_urn(
417
+ NamespaceKey(
418
+ namespace=namespace_repr,
419
+ platform=self.platform,
420
+ instance=self.config.platform_instance,
421
+ env=self.config.env,
422
+ )
423
+ )
424
+
425
+ namespace_properties: Properties = self.catalog.load_namespace_properties(
426
+ namespace
427
+ )
428
+ for aspect in self._create_iceberg_namespace_aspects(
429
+ namespace, namespace_properties
430
+ ):
431
+ yield self.stamping_processor.stamp_wu(
432
+ MetadataChangeProposalWrapper(
433
+ entityUrn=namespace_urn, aspect=aspect
434
+ ).as_workunit()
435
+ )
436
+ self.namespaces.append((namespace, namespace_urn))
437
+ except NoSuchNamespaceError as e:
438
+ self.report.report_warning(
439
+ title="Failed to retrieve namespace properties",
440
+ message="Couldn't find the namespace, was it deleted during the ingestion?",
441
+ context=namespace_repr,
442
+ exc=e,
443
+ )
444
+ return
445
+ except RESTError as e:
446
+ self.report.warning(
447
+ title="Iceberg REST Server Error",
448
+ message="Iceberg REST Server returned error status when trying to retrieve namespace properties, skipping it.",
449
+ context=str(namespace),
450
+ exc=e,
451
+ )
452
+ except Exception as e:
453
+ self.report.report_failure(
454
+ title="Failed to process namespace",
455
+ message="Unhandled exception happened during processing of the namespace",
456
+ context=namespace_repr,
457
+ exc=e,
458
+ )
459
+
460
+ def _process_namespaces(self) -> Iterable[MetadataWorkUnit]:
461
+ namespace_ids = self._get_namespaces(self.catalog)
462
+ for namespace in namespace_ids:
463
+ yield from self._try_processing_namespace(namespace)
464
+
465
+ LOGGER.debug("Namespaces ingestion completed")
466
+
425
467
  def _create_iceberg_table_aspects(
426
468
  self, dataset_name: str, table: Table, namespace_urn: str
427
469
  ) -> Iterable[_Aspect]:
@@ -37,9 +37,9 @@ from datahub.ingestion.api.source_helpers import (
37
37
  from datahub.ingestion.api.workunit import MetadataWorkUnit
38
38
  from datahub.ingestion.graph.client import get_default_graph
39
39
  from datahub.ingestion.graph.config import ClientMode
40
- from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
41
- FineGrainedLineageDownstreamType,
42
- FineGrainedLineageUpstreamType,
40
+ from datahub.metadata.schema_classes import (
41
+ FineGrainedLineageDownstreamTypeClass,
42
+ FineGrainedLineageUpstreamTypeClass,
43
43
  )
44
44
 
45
45
  logger = logging.getLogger(__name__)
@@ -80,9 +80,9 @@ class FineGrainedLineageConfig(ConfigModel):
80
80
  @validator("upstreamType")
81
81
  def upstream_type_must_be_supported(cls, v: str) -> str:
82
82
  allowed_types = [
83
- FineGrainedLineageUpstreamType.FIELD_SET,
84
- FineGrainedLineageUpstreamType.DATASET,
85
- FineGrainedLineageUpstreamType.NONE,
83
+ FineGrainedLineageUpstreamTypeClass.FIELD_SET,
84
+ FineGrainedLineageUpstreamTypeClass.DATASET,
85
+ FineGrainedLineageUpstreamTypeClass.NONE,
86
86
  ]
87
87
  if v not in allowed_types:
88
88
  raise ValueError(
@@ -93,8 +93,8 @@ class FineGrainedLineageConfig(ConfigModel):
93
93
  @validator("downstreamType")
94
94
  def downstream_type_must_be_supported(cls, v: str) -> str:
95
95
  allowed_types = [
96
- FineGrainedLineageDownstreamType.FIELD_SET,
97
- FineGrainedLineageDownstreamType.FIELD,
96
+ FineGrainedLineageDownstreamTypeClass.FIELD_SET,
97
+ FineGrainedLineageDownstreamTypeClass.FIELD,
98
98
  ]
99
99
  if v not in allowed_types:
100
100
  raise ValueError(
@@ -143,7 +143,7 @@ logger: logging.Logger = logging.getLogger(__name__)
143
143
  @capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
144
144
  @capability(
145
145
  SourceCapability.USAGE_STATS,
146
- "Enabled by default, can be disabled via configuration `include_usage_statistics`",
146
+ "Optionally enabled via `include_usage_statistics`",
147
147
  )
148
148
  @capability(
149
149
  SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"