acryl-datahub 1.2.0.7rc4__py3-none-any.whl → 1.2.0.8rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (34) hide show
  1. {acryl_datahub-1.2.0.7rc4.dist-info → acryl_datahub-1.2.0.8rc1.dist-info}/METADATA +2674 -2674
  2. {acryl_datahub-1.2.0.7rc4.dist-info → acryl_datahub-1.2.0.8rc1.dist-info}/RECORD +34 -32
  3. datahub/_version.py +1 -1
  4. datahub/cli/delete_cli.py +1 -0
  5. datahub/ingestion/api/report.py +4 -0
  6. datahub/ingestion/autogenerated/capability_summary.json +1 -1
  7. datahub/ingestion/graph/client.py +8 -1
  8. datahub/ingestion/source/datahub/config.py +4 -0
  9. datahub/ingestion/source/datahub/datahub_database_reader.py +6 -1
  10. datahub/ingestion/source/metadata/lineage.py +8 -8
  11. datahub/ingestion/source/redshift/redshift.py +1 -1
  12. datahub/ingestion/source/sql/athena.py +95 -18
  13. datahub/ingestion/source/sql/athena_properties_extractor.py +43 -25
  14. datahub/ingestion/source/superset.py +3 -2
  15. datahub/ingestion/source/tableau/tableau.py +8 -5
  16. datahub/metadata/_internal_schema_classes.py +207 -12
  17. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  18. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +6 -0
  19. datahub/metadata/schema.avsc +160 -12
  20. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  21. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +9 -1
  22. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +77 -1
  23. datahub/metadata/schemas/DataProductKey.avsc +2 -1
  24. datahub/metadata/schemas/DomainKey.avsc +2 -1
  25. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  26. datahub/metadata/schemas/GlossaryTermKey.avsc +2 -1
  27. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  28. datahub/metadata/schemas/StructuredPropertyDefinition.avsc +0 -3
  29. datahub/sql_parsing/sqlglot_lineage.py +121 -28
  30. datahub/sql_parsing/sqlglot_utils.py +12 -1
  31. {acryl_datahub-1.2.0.7rc4.dist-info → acryl_datahub-1.2.0.8rc1.dist-info}/WHEEL +0 -0
  32. {acryl_datahub-1.2.0.7rc4.dist-info → acryl_datahub-1.2.0.8rc1.dist-info}/entry_points.txt +0 -0
  33. {acryl_datahub-1.2.0.7rc4.dist-info → acryl_datahub-1.2.0.8rc1.dist-info}/licenses/LICENSE +0 -0
  34. {acryl_datahub-1.2.0.7rc4.dist-info → acryl_datahub-1.2.0.8rc1.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
- acryl_datahub-1.2.0.7rc4.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.2.0.8rc1.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=82EBfeYSMr3rKnGGc8fqqoIajsOjaTI4AIrgFadY4GE,323
4
+ datahub/_version.py,sha256=Rdij3ffZjrkKXarGFXcv2MZfNld3LEFCYwjv7W_kgqg,323
5
5
  datahub/entrypoints.py,sha256=9Qf-37rNnTzbGlx8S75OCDazIclFp6zWNcCEL1zCZto,9015
6
6
  datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -71,7 +71,7 @@ datahub/cli/check_cli.py,sha256=PVd3DT9TOK7Ejd0ODKnbRrBnPxsZLfUWgubFRUKwKAA,1697
71
71
  datahub/cli/cli_utils.py,sha256=0jTTAKuDZ8GzZwGHYytcT_MPR3Rb2DAcbr9n1H2T2sE,16170
72
72
  datahub/cli/config_utils.py,sha256=EeBGfhmf4AxYoTfnZ4GSiGIgpzJFkduNjN_FwmxZGhA,4889
73
73
  datahub/cli/container_cli.py,sha256=D0zWP3_3aww8_RTkMugOoOlILz3dPJ0TE9asQDLCm6E,1697
74
- datahub/cli/delete_cli.py,sha256=K6DwCAV9tjIirF4Im3Pu4yRRiBiMaEw4jc4LBCqkpJQ,26482
74
+ datahub/cli/delete_cli.py,sha256=0YJeWuXPGY0kbSn1AXK1-8SfCGBxb78ZbO53RAgyjQg,26515
75
75
  datahub/cli/docker_check.py,sha256=NdMmIG8H15QbJqzuHchrs99Mva7kVE-t7FTd6t3qT_c,12978
76
76
  datahub/cli/docker_cli.py,sha256=aAZIF3oblTSXzQVSanbphd3dCvdypHCyuMt1u3_QZ9M,33067
77
77
  datahub/cli/env_utils.py,sha256=RQzjg4JE29hjPt4v7p-RuqoOr99w8E3DBHWiN2Sm7T4,252
@@ -148,7 +148,7 @@ datahub/ingestion/api/incremental_properties_helper.py,sha256=KzdxdrQtaMV2XMHfPs
148
148
  datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py,sha256=3lLdkkxVqE9MVc26cdXImPeWy16az5BwgcorWxeBV50,1759
149
149
  datahub/ingestion/api/pipeline_run_listener.py,sha256=5uBP__LbMQxJ2utlf07cIzQINqPbUOKiZyOJta6a0og,713
150
150
  datahub/ingestion/api/registry.py,sha256=LbdZr89465Lj7ptQRVB4vI1JR1igWABvQFj9-WX63bI,7454
151
- datahub/ingestion/api/report.py,sha256=-xduHhIRUgf5G51mUb3uTi6GBxVli6ZK25AS5ikXuII,18312
151
+ datahub/ingestion/api/report.py,sha256=OuVZAgNkzSGkKhpOhpqebd9_bEsBCTeoWR1VcanPeD4,18509
152
152
  datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
153
153
  datahub/ingestion/api/sink.py,sha256=GZt48PV56FAhNoma-V5EwwRZvezhb40YH_zprm8_Yo0,4961
154
154
  datahub/ingestion/api/source.py,sha256=uf0fNbiOy0bS_aKFOcNv6NvuZe0LSDIDdNza9hraP7s,21857
@@ -159,7 +159,7 @@ datahub/ingestion/api/auto_work_units/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCe
159
159
  datahub/ingestion/api/auto_work_units/auto_dataset_properties_aspect.py,sha256=ID_6N3nWl2qohsSGizUCqo3d2MNyDeVbyWroQpSOSsc,5059
160
160
  datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py,sha256=0BwkpLhORbsiTHq0g_N_1cVVoZYdLR3qz02mNmsV9-M,4444
161
161
  datahub/ingestion/autogenerated/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
162
- datahub/ingestion/autogenerated/capability_summary.json,sha256=4dBPgl4z4kIDPhk3L8GsAzSzJWGpsRlDt0wuPdVey5s,110230
162
+ datahub/ingestion/autogenerated/capability_summary.json,sha256=78zS8mOo54IkG9l5KJ_YBXkZmERSSo3pDIrefQx-PWA,110199
163
163
  datahub/ingestion/autogenerated/lineage.json,sha256=8BdZF-5V5kJbX4mfFav8Zg-jHjzfkAEGk-pu1atLN4I,10029
164
164
  datahub/ingestion/autogenerated/lineage_helper.py,sha256=I_k1pZSCCCjDbUVifPTfy6fkmV8jqdVhbirE8EkpmxI,4748
165
165
  datahub/ingestion/extractor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -181,7 +181,7 @@ datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGo
181
181
  datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
182
182
  datahub/ingestion/glossary/datahub_classifier.py,sha256=O7wm6gQT1Jf2QSKdWjJQbS5oSzJwplXzfza26Gdq5Mg,7555
183
183
  datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
184
- datahub/ingestion/graph/client.py,sha256=xUURT6KxwOhwuAbUznxrOzmGuXxHI-3MmDgJQHFpaGk,74671
184
+ datahub/ingestion/graph/client.py,sha256=Orf9BHyTeHufB_LRIC-LyJj1kii73tyWTN_I1HJ6-_k,74955
185
185
  datahub/ingestion/graph/config.py,sha256=rmkcqAL8fJoY9QyAeS0Xm8HvwHzV3pCjY-Om-50JJTI,1015
186
186
  datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
187
187
  datahub/ingestion/graph/entity_versioning.py,sha256=nrcNz0Qm6kpE6oTu_mrYUQDx14KPspBTc6R9SyFUY6c,6901
@@ -228,7 +228,7 @@ datahub/ingestion/source/redash.py,sha256=YxjSad-X_wPmxYH8dJmFz_VCFhiLTCTSlK99Wd
228
228
  datahub/ingestion/source/salesforce.py,sha256=UttN3y4Ylbx_yCFCr-33wUEZFR48nTiYeUfOjGIFj2E,40872
229
229
  datahub/ingestion/source/source_registry.py,sha256=a2mLjJPLkSI-gYCTb_7U7Jo4D8jGknNQ_yScPIihXFk,1208
230
230
  datahub/ingestion/source/sql_queries.py,sha256=Zf6Y84WFCD-j0v4HdtcXshAPrtGq5InncXCjnLaAuS4,14657
231
- datahub/ingestion/source/superset.py,sha256=T3-DRuaSqIAQ9gfUenWzBgHcVnejv3yMHeyOK6BEGPo,53671
231
+ datahub/ingestion/source/superset.py,sha256=SLv6DqglPlFJwEskLOqEr-rfgCAb_2AXqWYupk8FqH4,53756
232
232
  datahub/ingestion/source/abs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
233
233
  datahub/ingestion/source/abs/config.py,sha256=WW9JWbzqAJDblAcJKtNeuBHqOeJsB57lW2PqSD65-BU,6729
234
234
  datahub/ingestion/source/abs/datalake_profiler_config.py,sha256=FfrcgK-JEF94vw-l3q6pN6FENXb-wZzW2w1VUZVkwW8,3620
@@ -293,9 +293,9 @@ datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=IYr5y8vy_6Ct
293
293
  datahub/ingestion/source/data_lake_common/object_store.py,sha256=i9Hgb8Ww23QD_jEjzj_2qxA8Nr56krnZfo1qyOWmH9M,23608
294
294
  datahub/ingestion/source/data_lake_common/path_spec.py,sha256=idEhxKXPCbkp7NzLmaTcS2ebAUwGZIBvn8zAUHcD1QM,23801
295
295
  datahub/ingestion/source/datahub/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
296
- datahub/ingestion/source/datahub/config.py,sha256=bjR1U3F85FbtgqmLDW-f_4dQvuw5AsJQxdQlOUeHDUk,5126
296
+ datahub/ingestion/source/datahub/config.py,sha256=ElMoJDuaByhR7ZgXmGTwHJDZDzfGydMkHlSLDnq2Ws0,5257
297
297
  datahub/ingestion/source/datahub/datahub_api_reader.py,sha256=hlKADVEPoTFiRGKqRsMF5mL4fSu_IrIW8Nx7LpEzvkM,2134
298
- datahub/ingestion/source/datahub/datahub_database_reader.py,sha256=gq0_o2Im83AvTDkA9eSrq6dDJbPysysxQnfNpO7a7gM,15297
298
+ datahub/ingestion/source/datahub/datahub_database_reader.py,sha256=KjVa7aoKALbqXvTo1iJpUfyJdW0_sxRe_VFh-9-FgrI,15599
299
299
  datahub/ingestion/source/datahub/datahub_kafka_reader.py,sha256=gnxhhlK-jrfnHqD_4eVmfcdtBNW6pi1N_qkDZ7uSb3o,4187
300
300
  datahub/ingestion/source/datahub/datahub_source.py,sha256=mpBrHxf3sitdPZni5B5atlEheQs8cMuN1i2q72eDelQ,8993
301
301
  datahub/ingestion/source/datahub/report.py,sha256=VHBfCbwFRzdLdB7hQG9ST4EiZxl_vBCU0XxGcZR6Xxs,940
@@ -405,7 +405,7 @@ datahub/ingestion/source/looker/urn_functions.py,sha256=4VvqEfGvIMq3rNHHps0-HlPu
405
405
  datahub/ingestion/source/looker/view_upstream.py,sha256=4FCjZaU6p2G7npB2RJpP4Gv2yLjbvbsYWEbAg55IvjY,26110
406
406
  datahub/ingestion/source/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
407
407
  datahub/ingestion/source/metadata/business_glossary.py,sha256=T_RJHst6iQRghJNmLLPeSBMEDsbEKf3yBldOAgMcGuo,19666
408
- datahub/ingestion/source/metadata/lineage.py,sha256=PA4JwSeQ-30XFMN4O5tPwIu-hZF1e-xMZ_CnEUE2c-Q,9595
408
+ datahub/ingestion/source/metadata/lineage.py,sha256=YgerAUptUVMwrg-s_H8CwTKbbI91Ta74JE-rK3oGUSE,9611
409
409
  datahub/ingestion/source/mock_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
410
410
  datahub/ingestion/source/mock_data/datahub_mock_data.py,sha256=3i3SdBp267cZRszhmD_JWJLTGIot2FI8REFpjJQ4jD8,19822
411
411
  datahub/ingestion/source/mock_data/datahub_mock_data_report.py,sha256=sV_H7JgcuVbrpIBqtGse_BBigMdqP32ZXuanpeXmwVI,331
@@ -451,7 +451,7 @@ datahub/ingestion/source/redshift/exception.py,sha256=dxzYUIv5B_FAWhOuzG2u5We7FX
451
451
  datahub/ingestion/source/redshift/lineage.py,sha256=nqrvWJqaI493i1hIZ_7patrdOb16sZrgSSGapdMcEiU,31710
452
452
  datahub/ingestion/source/redshift/profile.py,sha256=H1Xtc2rXScUv4w0b2BbM7POjYEwqIql_rpWvlumY_EM,4309
453
453
  datahub/ingestion/source/redshift/query.py,sha256=vVIuNUaU4a7AfMFJZlgLuqi0cGVl0gVz8xZUSnPhWvs,47845
454
- datahub/ingestion/source/redshift/redshift.py,sha256=3cWlMKd_coGYhpP9tImkca0zbjhC5Gf8YIwgsWVVjao,41291
454
+ datahub/ingestion/source/redshift/redshift.py,sha256=zalndYg_LK5aJ8cX_ZuXLcTYajtlavmV-dmQIsjGxjg,41260
455
455
  datahub/ingestion/source/redshift/redshift_data_reader.py,sha256=zc69jwXHdF-w8J4Hq-ZQ6BjHQ75Ij2iNDMpoRJlcmlU,1724
456
456
  datahub/ingestion/source/redshift/redshift_schema.py,sha256=7F-l_omOuKMuGE_rBWXVPG_GWXFKnCMzC4frNxZB9cs,24800
457
457
  datahub/ingestion/source/redshift/report.py,sha256=O3QFozHlmMbH9b7KxbqhgTgr_0tCryj6FIzMiN6kRxw,3044
@@ -505,8 +505,8 @@ datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=dmgpwApayUIevyn6l55
505
505
  datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=c6wg_s97Hrckqi0BgAbmnnRQRDDda1-BHFLlnRx0xuw,35753
506
506
  datahub/ingestion/source/snowflake/stored_proc_lineage.py,sha256=rOb78iHiWiK8v8WdVs1xDwVut4Y0OHmszej6IopQfCo,5341
507
507
  datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
508
- datahub/ingestion/source/sql/athena.py,sha256=TPKwL9oRiZlVnqIsOSBWUEwyvoW-1ssXvY4PfjxOR6g,28175
509
- datahub/ingestion/source/sql/athena_properties_extractor.py,sha256=OS2E2HD7xTn0MBy__pIvjKXMfGp02Zf93hQRAPMXE_Y,28533
508
+ datahub/ingestion/source/sql/athena.py,sha256=S5R-3HL7nN2FcryEUlwi2fFWzWQb55iycYFSlTRLcoc,31460
509
+ datahub/ingestion/source/sql/athena_properties_extractor.py,sha256=mKu0ZGyt8qvpWoP6CUf0vLUAz5k7GO2keof5KJd4Wak,29469
510
510
  datahub/ingestion/source/sql/clickhouse.py,sha256=zd5qE6XPw0AXtY_71-n0yz4ua69xP3oxMuIoabAuT3Q,25987
511
511
  datahub/ingestion/source/sql/cockroachdb.py,sha256=WoOKCq7YjsuzSPm1SmKIYZ9CrvlSF8zWmP1fNHn4G3Q,1360
512
512
  datahub/ingestion/source/sql/druid.py,sha256=_tzgTa5jhPUXk6WCmS7p10feCwJm6yUFcOgMZA-OcE8,2922
@@ -554,7 +554,7 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
554
554
  datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=DziD57PbHn2Tcy51tYXCG-GQgyTGMUxnkuzVS_xihFY,4079
555
555
  datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
556
556
  datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
557
- datahub/ingestion/source/tableau/tableau.py,sha256=aadKU4hFzsD2zhaFysTReUyg6FOAGE_HtZjuoLsIy28,155885
557
+ datahub/ingestion/source/tableau/tableau.py,sha256=OCdEHTYhJllnvQKalaEdiRATt04syAs2-O1oBYhCu74,156162
558
558
  datahub/ingestion/source/tableau/tableau_common.py,sha256=4cUm3E8wLfjLSjcAXXWDWVUXAEho_hbsQa2BzAF-vtM,27012
559
559
  datahub/ingestion/source/tableau/tableau_constant.py,sha256=4ngrOwqxf4cgbLR3i0OKI4pUxmHMABKyywfhXQ0GazA,2592
560
560
  datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=nSyx9RzC6TCQDm-cTVJ657qT8iDwzk_8JMKpohhmOc4,1046
@@ -635,8 +635,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
635
635
  datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
636
636
  datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
637
637
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
638
- datahub/metadata/_internal_schema_classes.py,sha256=dA_UHbIGJZTuffGPnvUQ9rjdWHKubr89z4INW_K7RTw,1053890
639
- datahub/metadata/schema.avsc,sha256=o9lFI4loFPIpA8EZZa321MvVVvmzUZ_za_6BFF4en9M,698792
638
+ datahub/metadata/_internal_schema_classes.py,sha256=AExBM83VHb4vsnAWF2nCR2x6hGg8NIsGXtAIAmYMs04,1061410
639
+ datahub/metadata/schema.avsc,sha256=HluHCVmYg7RpOaw9xUMigEJBxlHF5WLdNcqVBKPeoOU,704514
640
640
  datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
641
641
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
642
642
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
@@ -708,12 +708,13 @@ datahub/metadata/com/linkedin/pegasus2avro/schema/__init__.py,sha256=7JpzLs6S_Ey
708
708
  datahub/metadata/com/linkedin/pegasus2avro/schemafield/__init__.py,sha256=HTWeznycKnHBfPEGcCHXPEz83Iq9ypjNaoSfeQeDU9g,397
709
709
  datahub/metadata/com/linkedin/pegasus2avro/secret/__init__.py,sha256=qk61EqqVZF6k1Ct6t4Uo-pLb0WtM1EwJKn1XjVy9LHE,305
710
710
  datahub/metadata/com/linkedin/pegasus2avro/settings/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
711
+ datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py,sha256=ASD1mi7q19HVN9QKzbu2T3GxzdDQ_LqZNTehV3SI12c,531
711
712
  datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py,sha256=hKpTDXj2YEBja4hk2fZx3G9yzJ7eCFxcGCbbN0NydTk,933
712
713
  datahub/metadata/com/linkedin/pegasus2avro/step/__init__.py,sha256=HLNNbqBlyhcg09eXWx_AMD_JoOtBPYEi2kv12PE0R9E,329
713
714
  datahub/metadata/com/linkedin/pegasus2avro/structured/__init__.py,sha256=Cry61gPw6m5MQuJpPxADRm3jhI0XVqzznyD3fVKMkvc,1013
714
715
  datahub/metadata/com/linkedin/pegasus2avro/tag/__init__.py,sha256=Odb4mzloKJIlpoFHODEIxt_OIgFNrZExcyQtvXxjOFQ,290
715
716
  datahub/metadata/com/linkedin/pegasus2avro/telemetry/__init__.py,sha256=N4CJwzAqTrRoCQ2Aoa_e8cUZI_fzn9Zdo2okvO-_nWE,302
716
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py,sha256=CK8dZFt2A11dG9QnjxDrr1QbhP0MS6c4mMXHK688Azc,924
717
+ datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py,sha256=-TWG4__SDxZfD9X6VDalU1ErZRCtyRPwoI8eN4wSta4,1254
717
718
  datahub/metadata/com/linkedin/pegasus2avro/test/__init__.py,sha256=Z4DlDtf-NELFpx44Pk4RL1JlGuxtgEAMa6Sko8QBsGw,711
718
719
  datahub/metadata/com/linkedin/pegasus2avro/timeseries/__init__.py,sha256=6Pbit2drar8n99RFNQiXfYj7PhIzrO1SIpsGELZR4oA,637
719
720
  datahub/metadata/com/linkedin/pegasus2avro/upgrade/__init__.py,sha256=o3U2TuzRSU1uPL-4AOMCPDqEwngqRb6g4-CBFY7eSvQ,525
@@ -729,6 +730,7 @@ datahub/metadata/schemas/AssertionActions.avsc,sha256=zrvXzX2Nv_dmK6I3ZXCVWQ1bVs
729
730
  datahub/metadata/schemas/AssertionInfo.avsc,sha256=djiUVdw0pGd_Ex7uJspasTx2xwwCnd9cpItd76VJqYw,125296
730
731
  datahub/metadata/schemas/AssertionKey.avsc,sha256=EjNaTyzGxtWzTsZd87P2ZSaGX5dn8Y7HGBdqvlQVrFI,638
731
732
  datahub/metadata/schemas/AssertionRunEvent.avsc,sha256=FUyV73bUliBC-a_XFUlfgh75o99-lu1fl36b2q8Pqx4,12886
733
+ datahub/metadata/schemas/AssetSettings.avsc,sha256=LBmSybFUBkQPVmIEF1GwQl0ePmTrXlm0kHbKRIm--lo,2004
732
734
  datahub/metadata/schemas/BrowsePaths.avsc,sha256=NR_4dKuJMk1X2RB4DLkHVSqyMQc4PvT7eR0n6lM5aOM,654
733
735
  datahub/metadata/schemas/BrowsePathsV2.avsc,sha256=rpcXUONOmxCzTzUJZ4UxR5rk3AotiaHkCfqM5uJELjY,1876
734
736
  datahub/metadata/schemas/BusinessAttributeInfo.avsc,sha256=5HpEN9ZP6qdkcIib01F9n54dHoNDrGrml_9o_ZO5JTc,22062
@@ -769,9 +771,9 @@ datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc,sha256=4wac7sluRIq-0ZjO
769
771
  datahub/metadata/schemas/DataHubIngestionSourceKey.avsc,sha256=TGmm9WEGTaABs7kt5Uc-N-kbc5Sd-2sQwx-JpfAptvw,545
770
772
  datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc,sha256=q6ZyMoxInwmrkrXkUgMe-i-WZzAxbjcvJ-EI99SnEp8,599
771
773
  datahub/metadata/schemas/DataHubPageModuleKey.avsc,sha256=NyFN8cVO6s6rtgoLGJJGfcPfpGr5PfmZlIhM6ajldfQ,460
772
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc,sha256=2HK5h0bKefR1GNLYLyMrTTDwRPvyDciaLgiJHNDkinc,9878
774
+ datahub/metadata/schemas/DataHubPageModuleProperties.avsc,sha256=53Fj4ztBJqo9QMWuza2Kdtfpr2nTOTW0XuuXW77ugB8,10347
773
775
  datahub/metadata/schemas/DataHubPageTemplateKey.avsc,sha256=0sVqwL97Rp8YHPytp2RqUP5hIW048hmT2hPNP5k6arc,472
774
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc,sha256=0ndN64UNAADL6G_GVjJLHbe_dBnWhVRjtI3MilOlHQc,5651
776
+ datahub/metadata/schemas/DataHubPageTemplateProperties.avsc,sha256=FyNcZIniQy9m6yN9DT4XsPkDrxUsU7tRTqmfdGoEtMU,8565
775
777
  datahub/metadata/schemas/DataHubPersonaInfo.avsc,sha256=OUvbTgPQsBtzkDDb9pxHXpQ6A7dkL77ZnCXZ-MLEG14,227
776
778
  datahub/metadata/schemas/DataHubPersonaKey.avsc,sha256=ddj-DhXa0_YMdLaGkKLLSklfIeDRvSwPXu8o__YEXUE,448
777
779
  datahub/metadata/schemas/DataHubPolicyInfo.avsc,sha256=yBQe7pAuTMg9aovhugF4EkCRSHO_AN2TP_NM-0-Jg3A,10037
@@ -805,7 +807,7 @@ datahub/metadata/schemas/DataProcessInstanceProperties.avsc,sha256=2qsDFeSA2-ag5
805
807
  datahub/metadata/schemas/DataProcessInstanceRelationships.avsc,sha256=VhBpnyGGvO06WEnM6zy4PmjiT0nivRQfkSdJCUgIavw,2358
806
808
  datahub/metadata/schemas/DataProcessInstanceRunEvent.avsc,sha256=zwTYULEnpMbqwkLN8NbXW9PQWFG4X6TZkZwTQ1Wb53Y,6713
807
809
  datahub/metadata/schemas/DataProcessKey.avsc,sha256=ZZE2HN4mwZtm_TJNcdohFS97WXytFq9HAs_-shor6sY,2518
808
- datahub/metadata/schemas/DataProductKey.avsc,sha256=SyjmL2ieea1P6uipXst37mD5NdGPTqDvJAL3CVo91wk,661
810
+ datahub/metadata/schemas/DataProductKey.avsc,sha256=ECDUbxMwvmgI3tTRbO7UXncbNJUrqLkHTSDoMtBpYYY,684
809
811
  datahub/metadata/schemas/DataProductProperties.avsc,sha256=Lc3duV7YMJLvo_RwckLbW4bbmPrhSS1D-bxVVboNX2c,6930
810
812
  datahub/metadata/schemas/DataTransformLogic.avsc,sha256=nHTH6UzJ2Zz88N2aWa96hawLUR20HP7eSynfPtI1kzg,2111
811
813
  datahub/metadata/schemas/DataTypeInfo.avsc,sha256=MCjzal71P8uIXZg161LrU8rZTJocZeizK-YxYA0Det0,704
@@ -821,7 +823,7 @@ datahub/metadata/schemas/DatasetUsageStatistics.avsc,sha256=jgF1u31kP1XAnnV2B0X8
821
823
  datahub/metadata/schemas/Deprecation.avsc,sha256=p8SBIuKP3XVGeaBI7rROpLNACuoX8eMLRlZz8lGOYV8,1354
822
824
  datahub/metadata/schemas/DisplayProperties.avsc,sha256=MTa_g2s0roxNFFggWU8rslUH3UFe3xe11uUXyh0Go_I,1732
823
825
  datahub/metadata/schemas/Documentation.avsc,sha256=9vIJG9B08FFrC3y5c1XVaT5U3c-b5sOAc5foUxMnyCs,4836
824
- datahub/metadata/schemas/DomainKey.avsc,sha256=TYCcJRWqwbxbQuR5E68pvdeAmfVdYsJuMNhTxVphbqg,676
826
+ datahub/metadata/schemas/DomainKey.avsc,sha256=hDlX4jJTeAXW_VpKEhF8w5_tMZi7JRaVGqbvJL7GojE,699
825
827
  datahub/metadata/schemas/DomainProperties.avsc,sha256=6do6wZ9G6gyt1QowQyi1xldqgdTXspb05FaqWpKJ6eM,3843
826
828
  datahub/metadata/schemas/Domains.avsc,sha256=5mRQcba6Zmp6Y1srbxhOjETutg0I_ZG4ikuS2r9fkR0,804
827
829
  datahub/metadata/schemas/DynamicFormAssignment.avsc,sha256=SXRL5D6kIYWdGl3zLQYxPnkQX71JXQOKrjQNavFqVp0,7339
@@ -859,15 +861,15 @@ datahub/metadata/schemas/GlobalSettingsInfo.avsc,sha256=HKZisxW2HNm6kwJUBaAVol7s
859
861
  datahub/metadata/schemas/GlobalSettingsKey.avsc,sha256=Yj8s5IdM9yF7xrhJcLGCPCXBWqSsrPbufBaQjlZ3JlU,563
860
862
  datahub/metadata/schemas/GlobalTags.avsc,sha256=-SurkodMqTDnPpkRV6qYqmpNWjQNvynUiPZX7EhL5uc,4624
861
863
  datahub/metadata/schemas/GlossaryNodeInfo.avsc,sha256=G1Cb-w9VxIAEhNqyiEsDL_ABRO9QxyTpUANKU6DQrFw,1888
862
- datahub/metadata/schemas/GlossaryNodeKey.avsc,sha256=hT8ny4TL1WvgFvnaVBjuw6AWDiPDjpkh20f83ZT-UZ8,664
864
+ datahub/metadata/schemas/GlossaryNodeKey.avsc,sha256=KjdsiVm93SWXQdxllwE0GQei-NmvSatzrl9sQIA6TAE,687
863
865
  datahub/metadata/schemas/GlossaryRelatedTerms.avsc,sha256=ZTP0mrFD4y-C6JekRy8IVuHvICUkJib-ZAYD93Gv1tA,2763
864
866
  datahub/metadata/schemas/GlossaryTermInfo.avsc,sha256=j4s9NCyMOIF03HfaXoQEIkiMTRaCy_-euhenptfu7IA,2935
865
- datahub/metadata/schemas/GlossaryTermKey.avsc,sha256=00paBmYoFIlIUebS0X6BL9y5xqnedV3AjOiU0uOp1gI,858
867
+ datahub/metadata/schemas/GlossaryTermKey.avsc,sha256=_5Nf0kljjFVz0wNsE-BiIZvn6yL1iymaBQIptRKVwoU,881
866
868
  datahub/metadata/schemas/GlossaryTerms.avsc,sha256=ogOFO6Hr2Xb7s1JHqxsCPY8r_qY_9kwu69k5-E3j2BM,7123
867
869
  datahub/metadata/schemas/GroupMembership.avsc,sha256=wT3Hbpv2Z7V4X_-rIoed0cukAOMyYEL93udK8mMCjn0,557
868
870
  datahub/metadata/schemas/IcebergCatalogInfo.avsc,sha256=X9Ejqzn1DyxNIth7vDhtPjGG4xMPQMhl7f-S7fBFxek,691
869
871
  datahub/metadata/schemas/IcebergWarehouseInfo.avsc,sha256=0m7cQm8cCnBWNI5jGGgr5ZdOg66RQGWSf3gf8ay53So,2705
870
- datahub/metadata/schemas/IncidentInfo.avsc,sha256=L8xldmWyOW4Ml2Fm9XTRL13lP1CAEP0kgXsd_jLZaEU,12425
872
+ datahub/metadata/schemas/IncidentInfo.avsc,sha256=bxw_OwxhTarm1TxY4R-Q12RFQBIjeoUGgvgoeRiNT04,12428
871
873
  datahub/metadata/schemas/IncidentKey.avsc,sha256=Pip__DyNNTal7NxryM3kFi9qHlwntp1rIA8Al8Zz264,542
872
874
  datahub/metadata/schemas/IncidentSource.avsc,sha256=lY_SarA3cM55KNENcB5z1Gu2MygxEl9l7R8LdMak9AQ,1199
873
875
  datahub/metadata/schemas/IncidentsSummary.avsc,sha256=NTYp-6Oe92ALApbM3759TJ5pLXRArsSriIPq-f7w9vI,4514
@@ -929,7 +931,7 @@ datahub/metadata/schemas/SlackUserInfo.avsc,sha256=IY7InWaiDzJa3hJ9J4W3Eg8EUKuh0
929
931
  datahub/metadata/schemas/SourceCode.avsc,sha256=tUgo2rczO5x1fxw3fYNWQj-51vRNmNIj38b1wayA0aQ,1370
930
932
  datahub/metadata/schemas/Status.avsc,sha256=rPZSXSJdwnNywqNx2qll8cdt54aYgI-YUbRr3GK7h78,522
931
933
  datahub/metadata/schemas/StructuredProperties.avsc,sha256=qe45sKZ9XrLcf15Gt03Ttzt2J_kJYHvN-DAOSErSYuY,7028
932
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc,sha256=OIRGpyLUYuBmISPr4WR85Dz6RlqC0dwgP3vgKItcx1U,11795
934
+ datahub/metadata/schemas/StructuredPropertyDefinition.avsc,sha256=a-6TaOQ4A7LDFLshmaFRBcXjz11p4vM0Q3X35GN4Zo0,11737
933
935
  datahub/metadata/schemas/StructuredPropertyKey.avsc,sha256=lp7tQBgeriEU1YMQ6a4-6aUGSWDqNl00lLDym97j1yI,618
934
936
  datahub/metadata/schemas/StructuredPropertySettings.avsc,sha256=EDNlXfT1TqogfulCanIc-nuYO9ZxRFOGzD9tl3ZJdB8,3732
935
937
  datahub/metadata/schemas/SubTypes.avsc,sha256=bhXbzK020zDyQno97Xp05vmoMeZ82IGu2jz7pWDo3RQ,655
@@ -1004,8 +1006,8 @@ datahub/sql_parsing/split_statements.py,sha256=OIQXA9e4k3G9Z1y7rbgdtZhMWt4FPnq41
1004
1006
  datahub/sql_parsing/sql_parsing_aggregator.py,sha256=kxxSVe3YNoz_T2OG6-F30ZuXNSXuBZ-E54RqObo6qTI,72323
1005
1007
  datahub/sql_parsing/sql_parsing_common.py,sha256=cZ4WvVyHZuXDGjnBvKMX2_fz2EMextB5WQWcK0_saBo,3155
1006
1008
  datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
1007
- datahub/sql_parsing/sqlglot_lineage.py,sha256=oG7Zx2aOpm1tBQQowPgSufGlMpm5DaMGKTwk7gIkhX0,61450
1008
- datahub/sql_parsing/sqlglot_utils.py,sha256=TI11oBu1wrGeUuUGBg7hGTr6lTvztahdqiqXNJYRfbQ,14823
1009
+ datahub/sql_parsing/sqlglot_lineage.py,sha256=05ryqMkFK5_KDvr-ytXb1FyiC-SzaFB9_USnbL36LEg,66316
1010
+ datahub/sql_parsing/sqlglot_utils.py,sha256=zH8V9tAcSVO7Y8I3sIKPhs0D_9HzdNBlranBDmk1NB4,15454
1009
1011
  datahub/sql_parsing/tool_meta_extractor.py,sha256=5JsLPcKjuXSrPGxNIhRvX72dFPmlV33-hyvhJwlWxCY,7543
1010
1012
  datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1011
1013
  datahub/telemetry/stats.py,sha256=TwaQisQlD2Bk0uw__pP6u3Ovz9r-Ip4pCwpnto4r5e0,959
@@ -1112,8 +1114,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1112
1114
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1113
1115
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1114
1116
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1115
- acryl_datahub-1.2.0.7rc4.dist-info/METADATA,sha256=E15QJWL7lHS1mLe36RJZOMqHRbzN-EvYISjREnc2LZk,186633
1116
- acryl_datahub-1.2.0.7rc4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1117
- acryl_datahub-1.2.0.7rc4.dist-info/entry_points.txt,sha256=qopCAD6qrsijaZ9mTw3UlPCKsE00C3t9MbkkWow7pi4,9943
1118
- acryl_datahub-1.2.0.7rc4.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1119
- acryl_datahub-1.2.0.7rc4.dist-info/RECORD,,
1117
+ acryl_datahub-1.2.0.8rc1.dist-info/METADATA,sha256=_TXC2AAKI66LHx6fTnBdVxsJBwedMdTKiIhnrBVSTQk,186651
1118
+ acryl_datahub-1.2.0.8rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1119
+ acryl_datahub-1.2.0.8rc1.dist-info/entry_points.txt,sha256=qopCAD6qrsijaZ9mTw3UlPCKsE00C3t9MbkkWow7pi4,9943
1120
+ acryl_datahub-1.2.0.8rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1121
+ acryl_datahub-1.2.0.8rc1.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.2.0.7rc4"
3
+ __version__ = "1.2.0.8rc1"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
datahub/cli/delete_cli.py CHANGED
@@ -469,6 +469,7 @@ def by_filter(
469
469
  query=query,
470
470
  status=soft_delete_filter,
471
471
  batch_size=batch_size,
472
+ skip_cache=True,
472
473
  )
473
474
  )
474
475
  if len(urns) == 0:
@@ -204,6 +204,7 @@ class ExamplesReport(Report, Closeable):
204
204
  samples: Dict[str, Dict[str, List[str]]] = field(
205
205
  default_factory=lambda: defaultdict(lambda: defaultdict(list))
206
206
  )
207
+ compute_stats_time_seconds: float = 0.0
207
208
  _file_based_dict: Optional[FileBackedDict[SourceReportSubtypes]] = None
208
209
 
209
210
  # We are adding this to make querying easier for fine-grained lineage
@@ -405,6 +406,7 @@ class ExamplesReport(Report, Closeable):
405
406
  self._update_file_based_dict(urn, entityType, aspectName, mcp)
406
407
 
407
408
  def compute_stats(self) -> None:
409
+ start_time = datetime.now()
408
410
  if self._file_based_dict is None:
409
411
  return
410
412
 
@@ -466,6 +468,8 @@ class ExamplesReport(Report, Closeable):
466
468
  list(self._lineage_aspects_seen), "lineage"
467
469
  )
468
470
  self._collect_samples_with_all_conditions("all_3")
471
+ end_time = datetime.now()
472
+ self.compute_stats_time_seconds += (end_time - start_time).total_seconds()
469
473
 
470
474
 
471
475
  class EntityFilterReport(ReportAttribute):
@@ -2678,7 +2678,7 @@
2678
2678
  },
2679
2679
  {
2680
2680
  "capability": "USAGE_STATS",
2681
- "description": "Enabled by default, can be disabled via configuration `include_usage_statistics`",
2681
+ "description": "Optionally enabled via `include_usage_statistics`",
2682
2682
  "subtype_modifier": null,
2683
2683
  "supported": true
2684
2684
  },
@@ -971,7 +971,8 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
971
971
  $orFilters: [AndFilterInput!],
972
972
  $batchSize: Int!,
973
973
  $scrollId: String,
974
- $skipCache: Boolean!) {
974
+ $skipCache: Boolean!,
975
+ $includeSoftDeleted: Boolean) {
975
976
 
976
977
  scrollAcrossEntities(input: {
977
978
  query: $query,
@@ -983,6 +984,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
983
984
  skipHighlighting: true
984
985
  skipAggregates: true
985
986
  skipCache: $skipCache
987
+ includeSoftDeleted: $includeSoftDeleted
986
988
  }
987
989
  }) {
988
990
  nextScrollId
@@ -1002,6 +1004,11 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1002
1004
  "orFilters": orFilters,
1003
1005
  "batchSize": batch_size,
1004
1006
  "skipCache": skip_cache,
1007
+ "includeSoftDeleted": (
1008
+ None
1009
+ if status is None
1010
+ else status != RemovedStatusFilter.NOT_SOFT_DELETED
1011
+ ),
1005
1012
  }
1006
1013
 
1007
1014
  for entity in self._scroll_across_entities(graphql_query, variables):
@@ -129,6 +129,10 @@ class DataHubSourceConfig(StatefulIngestionConfigBase):
129
129
  description="Timeout for each query in seconds. ",
130
130
  )
131
131
 
132
+ preserve_system_metadata: bool = Field(
133
+ default=True, description="Copy system metadata from the source system"
134
+ )
135
+
132
136
  @root_validator(skip_on_failure=True)
133
137
  def check_ingesting_data(cls, values):
134
138
  if (
@@ -380,7 +380,12 @@ class DataHubDatabaseReader:
380
380
  json_metadata = post_json_transform(
381
381
  json.loads(row["systemmetadata"] or "{}")
382
382
  )
383
- system_metadata = SystemMetadataClass.from_obj(json_metadata)
383
+ system_metadata = None
384
+ if self.config.preserve_system_metadata:
385
+ system_metadata = SystemMetadataClass.from_obj(json_metadata)
386
+ if system_metadata.properties:
387
+ is_no_op = system_metadata.properties.pop("isNoOp", None)
388
+ logger.debug(f"Removed potential value for is_no_op={is_no_op}")
384
389
  return MetadataChangeProposalWrapper(
385
390
  entityUrn=row["urn"],
386
391
  aspect=ASPECT_MAP[row["aspect"]].from_obj(json_aspect),
@@ -37,9 +37,9 @@ from datahub.ingestion.api.source_helpers import (
37
37
  from datahub.ingestion.api.workunit import MetadataWorkUnit
38
38
  from datahub.ingestion.graph.client import get_default_graph
39
39
  from datahub.ingestion.graph.config import ClientMode
40
- from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
41
- FineGrainedLineageDownstreamType,
42
- FineGrainedLineageUpstreamType,
40
+ from datahub.metadata.schema_classes import (
41
+ FineGrainedLineageDownstreamTypeClass,
42
+ FineGrainedLineageUpstreamTypeClass,
43
43
  )
44
44
 
45
45
  logger = logging.getLogger(__name__)
@@ -80,9 +80,9 @@ class FineGrainedLineageConfig(ConfigModel):
80
80
  @validator("upstreamType")
81
81
  def upstream_type_must_be_supported(cls, v: str) -> str:
82
82
  allowed_types = [
83
- FineGrainedLineageUpstreamType.FIELD_SET,
84
- FineGrainedLineageUpstreamType.DATASET,
85
- FineGrainedLineageUpstreamType.NONE,
83
+ FineGrainedLineageUpstreamTypeClass.FIELD_SET,
84
+ FineGrainedLineageUpstreamTypeClass.DATASET,
85
+ FineGrainedLineageUpstreamTypeClass.NONE,
86
86
  ]
87
87
  if v not in allowed_types:
88
88
  raise ValueError(
@@ -93,8 +93,8 @@ class FineGrainedLineageConfig(ConfigModel):
93
93
  @validator("downstreamType")
94
94
  def downstream_type_must_be_supported(cls, v: str) -> str:
95
95
  allowed_types = [
96
- FineGrainedLineageDownstreamType.FIELD_SET,
97
- FineGrainedLineageDownstreamType.FIELD,
96
+ FineGrainedLineageDownstreamTypeClass.FIELD_SET,
97
+ FineGrainedLineageDownstreamTypeClass.FIELD,
98
98
  ]
99
99
  if v not in allowed_types:
100
100
  raise ValueError(
@@ -143,7 +143,7 @@ logger: logging.Logger = logging.getLogger(__name__)
143
143
  @capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
144
144
  @capability(
145
145
  SourceCapability.USAGE_STATS,
146
- "Enabled by default, can be disabled via configuration `include_usage_statistics`",
146
+ "Optionally enabled via `include_usage_statistics`",
147
147
  )
148
148
  @capability(
149
149
  SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
@@ -73,6 +73,11 @@ except ImportError:
73
73
 
74
74
  logger = logging.getLogger(__name__)
75
75
 
76
+ # Precompiled regex for SQL identifier validation
77
+ # Athena identifiers can only contain lowercase letters, numbers, underscore, and period (for complex types)
78
+ # Note: Athena automatically converts uppercase to lowercase, but we're being strict for security
79
+ _IDENTIFIER_PATTERN = re.compile(r"^[a-zA-Z0-9_.]+$")
80
+
76
81
  assert STRUCT, "required type modules are not available"
77
82
  register_custom_type(STRUCT, RecordTypeClass)
78
83
  register_custom_type(MapType, MapTypeClass)
@@ -510,20 +515,76 @@ class AthenaSource(SQLAlchemySource):
510
515
  return [schema for schema in schemas if schema == athena_config.database]
511
516
  return schemas
512
517
 
518
+ @classmethod
519
+ def _sanitize_identifier(cls, identifier: str) -> str:
520
+ """Sanitize SQL identifiers to prevent injection attacks.
521
+
522
+ Args:
523
+ identifier: The SQL identifier to sanitize
524
+
525
+ Returns:
526
+ Sanitized identifier safe for SQL queries
527
+
528
+ Raises:
529
+ ValueError: If identifier contains unsafe characters
530
+ """
531
+ if not identifier:
532
+ raise ValueError("Identifier cannot be empty")
533
+
534
+ # Allow only alphanumeric characters, underscores, and periods for identifiers
535
+ # This matches Athena's identifier naming rules
536
+ if not _IDENTIFIER_PATTERN.match(identifier):
537
+ raise ValueError(
538
+ f"Identifier '{identifier}' contains unsafe characters. Only alphanumeric characters, underscores, and periods are allowed."
539
+ )
540
+
541
+ return identifier
542
+
513
543
  @classmethod
514
544
  def _casted_partition_key(cls, key: str) -> str:
515
545
  # We need to cast the partition keys to a VARCHAR, since otherwise
516
546
  # Athena may throw an error during concatenation / comparison.
517
- return f"CAST({key} as VARCHAR)"
547
+ sanitized_key = cls._sanitize_identifier(key)
548
+ return f"CAST({sanitized_key} as VARCHAR)"
549
+
550
+ @classmethod
551
+ def _build_max_partition_query(
552
+ cls, schema: str, table: str, partitions: List[str]
553
+ ) -> str:
554
+ """Build SQL query to find the row with maximum partition values.
555
+
556
+ Args:
557
+ schema: Database schema name
558
+ table: Table name
559
+ partitions: List of partition column names
560
+
561
+ Returns:
562
+ SQL query string to find the maximum partition
563
+
564
+ Raises:
565
+ ValueError: If any identifier contains unsafe characters
566
+ """
567
+ # Sanitize all identifiers to prevent SQL injection
568
+ sanitized_schema = cls._sanitize_identifier(schema)
569
+ sanitized_table = cls._sanitize_identifier(table)
570
+ sanitized_partitions = [
571
+ cls._sanitize_identifier(partition) for partition in partitions
572
+ ]
573
+
574
+ casted_keys = [cls._casted_partition_key(key) for key in partitions]
575
+ if len(casted_keys) == 1:
576
+ part_concat = casted_keys[0]
577
+ else:
578
+ separator = "CAST('-' AS VARCHAR)"
579
+ part_concat = f"CONCAT({f', {separator}, '.join(casted_keys)})"
580
+
581
+ return f'select {",".join(sanitized_partitions)} from "{sanitized_schema}"."{sanitized_table}$partitions" where {part_concat} = (select max({part_concat}) from "{sanitized_schema}"."{sanitized_table}$partitions")'
518
582
 
519
583
  @override
520
584
  def get_partitions(
521
585
  self, inspector: Inspector, schema: str, table: str
522
586
  ) -> Optional[List[str]]:
523
- if (
524
- not self.config.extract_partitions
525
- and not self.config.extract_partitions_using_create_statements
526
- ):
587
+ if not self.config.extract_partitions:
527
588
  return None
528
589
 
529
590
  if not self.cursor:
@@ -557,11 +618,9 @@ class AthenaSource(SQLAlchemySource):
557
618
  context=f"{schema}.{table}",
558
619
  level=StructuredLogLevel.WARN,
559
620
  ):
560
- # We create an artifical concatenated partition key to be able to query max partition easier
561
- part_concat = " || '-' || ".join(
562
- self._casted_partition_key(key) for key in partitions
621
+ max_partition_query = self._build_max_partition_query(
622
+ schema, table, partitions
563
623
  )
564
- max_partition_query = f'select {",".join(partitions)} from "{schema}"."{table}$partitions" where {part_concat} = (select max({part_concat}) from "{schema}"."{table}$partitions")'
565
624
  ret = self.cursor.execute(max_partition_query)
566
625
  max_partition: Dict[str, str] = {}
567
626
  if ret:
@@ -678,16 +737,34 @@ class AthenaSource(SQLAlchemySource):
678
737
  ).get(table, None)
679
738
 
680
739
  if partition and partition.max_partition:
681
- max_partition_filters = []
682
- for key, value in partition.max_partition.items():
683
- max_partition_filters.append(
684
- f"{self._casted_partition_key(key)} = '{value}'"
740
+ try:
741
+ # Sanitize identifiers to prevent SQL injection
742
+ sanitized_schema = self._sanitize_identifier(schema)
743
+ sanitized_table = self._sanitize_identifier(table)
744
+
745
+ max_partition_filters = []
746
+ for key, value in partition.max_partition.items():
747
+ # Sanitize partition key and properly escape the value
748
+ sanitized_key = self._sanitize_identifier(key)
749
+ # Escape single quotes in the value to prevent injection
750
+ escaped_value = value.replace("'", "''") if value else ""
751
+ max_partition_filters.append(
752
+ f"{self._casted_partition_key(sanitized_key)} = '{escaped_value}'"
753
+ )
754
+ max_partition = str(partition.max_partition)
755
+ return (
756
+ max_partition,
757
+ f'SELECT * FROM "{sanitized_schema}"."{sanitized_table}" WHERE {" AND ".join(max_partition_filters)}',
685
758
  )
686
- max_partition = str(partition.max_partition)
687
- return (
688
- max_partition,
689
- f'SELECT * FROM "{schema}"."{table}" WHERE {" AND ".join(max_partition_filters)}',
690
- )
759
+ except ValueError as e:
760
+ # If sanitization fails due to malicious identifiers,
761
+ # return None to disable partition profiling for this table
762
+ # rather than crashing the entire ingestion
763
+ logger.warning(
764
+ f"Failed to generate partition profiler query for {schema}.{table} due to unsafe identifiers: {e}. "
765
+ f"Partition profiling disabled for this table."
766
+ )
767
+ return None, None
691
768
  return None, None
692
769
 
693
770
  def close(self):