acryl-datahub 1.3.0.1rc5__py3-none-any.whl → 1.3.0.1rc6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (23) hide show
  1. {acryl_datahub-1.3.0.1rc5.dist-info → acryl_datahub-1.3.0.1rc6.dist-info}/METADATA +2439 -2439
  2. {acryl_datahub-1.3.0.1rc5.dist-info → acryl_datahub-1.3.0.1rc6.dist-info}/RECORD +23 -20
  3. datahub/_version.py +1 -1
  4. datahub/ingestion/source/bigquery_v2/bigquery.py +17 -1
  5. datahub/ingestion/source/bigquery_v2/bigquery_config.py +16 -0
  6. datahub/ingestion/source/bigquery_v2/queries_extractor.py +41 -4
  7. datahub/ingestion/source/snowflake/snowflake_config.py +16 -0
  8. datahub/ingestion/source/snowflake/snowflake_queries.py +46 -6
  9. datahub/ingestion/source/snowflake/snowflake_v2.py +14 -1
  10. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  11. datahub/ingestion/source/state/stateful_ingestion_base.py +30 -2
  12. datahub/metadata/_internal_schema_classes.py +223 -0
  13. datahub/metadata/_urns/urn_defs.py +56 -0
  14. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  15. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
  16. datahub/metadata/schema.avsc +206 -0
  17. datahub/metadata/schemas/DataHubFileInfo.avsc +228 -0
  18. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  19. datahub/sql_parsing/sql_parsing_aggregator.py +18 -4
  20. {acryl_datahub-1.3.0.1rc5.dist-info → acryl_datahub-1.3.0.1rc6.dist-info}/WHEEL +0 -0
  21. {acryl_datahub-1.3.0.1rc5.dist-info → acryl_datahub-1.3.0.1rc6.dist-info}/entry_points.txt +0 -0
  22. {acryl_datahub-1.3.0.1rc5.dist-info → acryl_datahub-1.3.0.1rc6.dist-info}/licenses/LICENSE +0 -0
  23. {acryl_datahub-1.3.0.1rc5.dist-info → acryl_datahub-1.3.0.1rc6.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
- acryl_datahub-1.3.0.1rc5.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.3.0.1rc6.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=GFe5nZs9PKs-LLDaT1H1D9udtmOwDf_NsyGlgBGOywE,323
4
+ datahub/_version.py,sha256=PW9A4Uazfqf_qZ54rH_cG6i8GhrmicechIGtJYipA8Q,323
5
5
  datahub/entrypoints.py,sha256=VcbU6Z47b_JKW1zI-WJMYIngm05FSogKLiuvFNtyNcI,9088
6
6
  datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -260,10 +260,10 @@ datahub/ingestion/source/azure/abs_folder_utils.py,sha256=7skXus-4fSIoKpqCeU-GG0
260
260
  datahub/ingestion/source/azure/abs_utils.py,sha256=KdAlCK-PMrn35kFHxz5vrsjajyx2PD5GRgoBKdoRvcg,2075
261
261
  datahub/ingestion/source/azure/azure_common.py,sha256=DvPrLpjQSJ1USB_myGmg8lGkRW-WAl2GIZMcEkBFjOs,4063
262
262
  datahub/ingestion/source/bigquery_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
263
- datahub/ingestion/source/bigquery_v2/bigquery.py,sha256=u4-LLt6ZDe3hKqLWqEByYpc0z-UcEZf85uok9qNEFko,15321
263
+ datahub/ingestion/source/bigquery_v2/bigquery.py,sha256=Z5QsyvBNDWEf9kME_zBRw2oLIh3rD5zafpvuYB0p4ow,15972
264
264
  datahub/ingestion/source/bigquery_v2/bigquery_audit.py,sha256=kEwWhq3ch6WT4q4hcX8-fvQh28KgrNfspFwIytO3vQA,25103
265
265
  datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py,sha256=LuGJ6LgPViLIfDQfylxlQ3CA7fZYM5MDt8M-7sfzm84,5096
266
- datahub/ingestion/source/bigquery_v2/bigquery_config.py,sha256=3u5p6RM4-q651fXrDTDsB5jY29lK_usr6OWI9N1LjuQ,22977
266
+ datahub/ingestion/source/bigquery_v2/bigquery_config.py,sha256=A5pLaTm4WCTndhGmGBGrjc05LHtC7C5-FrE3vEMc1ik,23880
267
267
  datahub/ingestion/source/bigquery_v2/bigquery_connection.py,sha256=6XFCc0oxxU3R4IPyYHaf3YMETlMD4ztkNpkf4kf1Elw,3171
268
268
  datahub/ingestion/source/bigquery_v2/bigquery_data_reader.py,sha256=DeT3v_Z82__8En0FcZ0kavBAWQoRvSZ5Rppm9eeDAb8,2393
269
269
  datahub/ingestion/source/bigquery_v2/bigquery_helper.py,sha256=QER3gY8e_k1_eNVj7cBso7ZzrWl_vO5PYSa6CpvqNx8,1554
@@ -277,7 +277,7 @@ datahub/ingestion/source/bigquery_v2/common.py,sha256=IinOy-RO4UZGxSf5scaN02672B
277
277
  datahub/ingestion/source/bigquery_v2/lineage.py,sha256=jju14mJbAUMA_K3j2yq-TdZV202cjd5rBAsDPJGEVno,44900
278
278
  datahub/ingestion/source/bigquery_v2/profiler.py,sha256=oLf5jMjJf-ShNny9Dll2tCsOoPMF1DxAh7e7etpeLq4,10821
279
279
  datahub/ingestion/source/bigquery_v2/queries.py,sha256=gDvvgajptmNn5AiBglmDhGAC9LBh8fzw56_d8ewLbxA,20222
280
- datahub/ingestion/source/bigquery_v2/queries_extractor.py,sha256=7mObcHn6mpZRoO4QnJ0QuZ8AS_MsdPLwb-cLRyP-W6k,19531
280
+ datahub/ingestion/source/bigquery_v2/queries_extractor.py,sha256=bSYusyf-xnhs_1WURsQ2YmMxRn3J5HCp_UKChsxbWIw,21015
281
281
  datahub/ingestion/source/bigquery_v2/usage.py,sha256=A9c-ofclaRk0NSnc4IRaqJYqMPv6ecCld_TPy3V2qFs,40748
282
282
  datahub/ingestion/source/cassandra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
283
283
  datahub/ingestion/source/cassandra/cassandra.py,sha256=pNy61Z4kTqL_wGcWIYee5fnZiuJDseDcRcQwsxeAssk,14487
@@ -496,12 +496,12 @@ datahub/ingestion/source/snowflake/constants.py,sha256=iDTamMozHwLYyglpRfqwTbxPx
496
496
  datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
497
497
  datahub/ingestion/source/snowflake/oauth_generator.py,sha256=fu2VnREGuJXeTqIV2jx4TwieVnznf83HQkrE0h2DGGM,3423
498
498
  datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81xxdeizJn9nJCZ_nMIXgk9N6pEk5o,4803
499
- datahub/ingestion/source/snowflake/snowflake_config.py,sha256=tpNJvPZYUb6pZWSqh-fRgpIeSx20hkDCLYW_EJbkIlk,23536
499
+ datahub/ingestion/source/snowflake/snowflake_config.py,sha256=HoDzaG3TlP1ui5qY2PZUcu83wOjvJ96Z9fFYC4GmCko,24439
500
500
  datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=uSHdPqigRzjeNxtn0_m5i57X7X8LBZIpHzDcWIoovyA,19005
501
501
  datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
502
502
  datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=nam-bYV6wL9LfR-Tt50Qe_Kea61IuWS-lLu5__aDxk8,21853
503
503
  datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=PmQi-qDlRhdJ-PsJ7x-EScIiswWRAxDDOKHydvN3mTY,7404
504
- datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=lAMA--X3nbWFdNs1DTHNm7crctB3RilX_pB-zy47piI,45528
504
+ datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=AiLEbCs6zXbIoejFdLsbSupkvqHj5BOtPz9lubGKLv8,47142
505
505
  datahub/ingestion/source/snowflake/snowflake_query.py,sha256=wLDaYZrWJ0794KKn69rB_QF0_8Bzu5l_7L6mD77KVc4,40469
506
506
  datahub/ingestion/source/snowflake/snowflake_report.py,sha256=fA6C-p9wM-jyTsXE_suTbCtrE_lle-5LI52S7wFYf00,6701
507
507
  datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=a6avRJXbj2qwnu28oK1YotmJo68zEG-1S7vonsUUJy4,41473
@@ -511,7 +511,7 @@ datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=5Li4H8KuS4qBKR98L
511
511
  datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=eA9xh-G1Ydr1OwUUtrbXUWp26hE1jF0zvyKNky_i_nQ,8887
512
512
  datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=mM0v9b4PHRJAT-SdRids3wdzc5O96gWCCww3e42itV8,24982
513
513
  datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=1c1YNmAxxOwAKy8IEFqVdp6x-EvCYJkN6UZ_RwUUVv0,15062
514
- datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=c6wg_s97Hrckqi0BgAbmnnRQRDDda1-BHFLlnRx0xuw,35753
514
+ datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=Tpx_d8UuO4wD9sIL9nMDR4GtCvVc6KbF82nlBdFRtEI,36408
515
515
  datahub/ingestion/source/snowflake/stored_proc_lineage.py,sha256=rOb78iHiWiK8v8WdVs1xDwVut4Y0OHmszej6IopQfCo,5341
516
516
  datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
517
517
  datahub/ingestion/source/sql/athena.py,sha256=ZvWGuAPRUeUkE-7N6B3RyCkQp7JZKnLVXTnnR200gls,31532
@@ -552,10 +552,10 @@ datahub/ingestion/source/state/checkpoint.py,sha256=ob8wtC0zOgTdc_-cVAI3MUKcQaN4
552
552
  datahub/ingestion/source/state/entity_removal_state.py,sha256=dIG1HnueaRTsAu8kYjYiPFPpkZ4WwbraDF6oFDsts2E,6720
553
553
  datahub/ingestion/source/state/profiling_state.py,sha256=lsWu7oZhB9nSlqoklvjs-LjS4XF0p6BxSAcLY-xKRzM,512
554
554
  datahub/ingestion/source/state/profiling_state_handler.py,sha256=jDMiIrAq8k4GrYoh9Ymh0ZAmzejYFk8E1W7-kuw6lXg,4295
555
- datahub/ingestion/source/state/redundant_run_skip_handler.py,sha256=h28twxcsMNvI74bUjAKleRYid8kfIyWS7Y11aBldDlY,9435
555
+ datahub/ingestion/source/state/redundant_run_skip_handler.py,sha256=UENQDZ1Hacd8Hg3jC6okomG9V4EMLfL-Zz60aU6jzyc,10260
556
556
  datahub/ingestion/source/state/sql_common_state.py,sha256=OtJpJfMTBSgyR37dn3w-nnZwlc0nFNb2GoUzIWhnyAc,143
557
557
  datahub/ingestion/source/state/stale_entity_removal_handler.py,sha256=Lr2HYGx_b2FQ8A36s7s11tl-4-mGIM13bfy5JbQ3LtM,14890
558
- datahub/ingestion/source/state/stateful_ingestion_base.py,sha256=FusdOZBvx-Kdc0A8cxEDZ4RfxjmG6MN02PCQImjTyBg,17305
558
+ datahub/ingestion/source/state/stateful_ingestion_base.py,sha256=j78BN_uSBpOJRi19kosZGPsgolKm9i-40MmSzDGeaFs,18837
559
559
  datahub/ingestion/source/state/usage_common_state.py,sha256=TJyb0CpwibsduJYI854EFdtrwWnz7JC-IkzKUXVGDx0,983
560
560
  datahub/ingestion/source/state/use_case_handler.py,sha256=3g8ddTvGXHe0dCiyTkyFeNmR8a3bhwywtIt8EpK5oQs,1271
561
561
  datahub/ingestion/source/state_provider/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -646,12 +646,12 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
646
646
  datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
647
647
  datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
648
648
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
649
- datahub/metadata/_internal_schema_classes.py,sha256=M3j1TDrK43RYlhKUsaqeHgjbw5ERkAzKxdC4pD4dLEg,1077060
650
- datahub/metadata/schema.avsc,sha256=mUP8XLRosJg0WlBymK0-EAlGqrC-j7bQIDbOVtODrR8,775591
649
+ datahub/metadata/_internal_schema_classes.py,sha256=O8MG_weYzOYtGIZsOr7c6EO33NDo9liE8D8kyotoq8Q,1084754
650
+ datahub/metadata/schema.avsc,sha256=YMMQSOELeCC4ZptYnoEfMFHRL-DguCR9KmjZ6NYWMlQ,780967
651
651
  datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
652
652
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
653
653
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
654
- datahub/metadata/_urns/urn_defs.py,sha256=_LgqKLHrmHHxpvrP-93NMJSLEnoFI8q72lkX17mK1XA,143257
654
+ datahub/metadata/_urns/urn_defs.py,sha256=GRvs_XhEQ8Xc6abC7B7FwTIMb18R3VeXrjV1_sRi_DE,145429
655
655
  datahub/metadata/com/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
656
656
  datahub/metadata/com/linkedin/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
657
657
  datahub/metadata/com/linkedin/events/__init__.py,sha256=s_dR0plZF-rOxxIbE8ojekJqwiHzl2WYR-Z3kW6kKS0,298
@@ -688,6 +688,7 @@ datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.
688
688
  datahub/metadata/com/linkedin/pegasus2avro/events/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
689
689
  datahub/metadata/com/linkedin/pegasus2avro/events/metadata/__init__.py,sha256=a1FI_2VZ9Ejc9AIVztO-B5kLPR6VwlOgdFlv4PTCTYs,282
690
690
  datahub/metadata/com/linkedin/pegasus2avro/execution/__init__.py,sha256=O5XAXnGzDnWv8nbqRHxLPPXUbrIu_pn76WUK_hhkHmg,775
691
+ datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py,sha256=wS-GU8YjGghMQcQ42BYv7Rh_-8h-yPiS_QMzJI34yJM,507
691
692
  datahub/metadata/com/linkedin/pegasus2avro/form/__init__.py,sha256=rGDmWiKm6qpXiipZ5veCHqBJGSAryAqnSzRPlwcmLnA,845
692
693
  datahub/metadata/com/linkedin/pegasus2avro/glossary/__init__.py,sha256=fa1QNv08O3TqXqZ14bkJerGho_t-8DPHFdcWKiXkkUA,501
693
694
  datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py,sha256=EGxkzJgQMASL_aUmgjHE3bo8qRTSbAbM_8gUccZblX0,1603
@@ -695,7 +696,7 @@ datahub/metadata/com/linkedin/pegasus2avro/incident/__init__.py,sha256=LfB7ytT1u
695
696
  datahub/metadata/com/linkedin/pegasus2avro/ingestion/__init__.py,sha256=1bfG2naq4iS_pwU4J-BVer_gfL0hDbJbnH0gh1MPNgA,871
696
697
  datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py,sha256=7SHiR-KzV1CkAimFy94SkcY0Xg0RlsIlLTUTGmGAW_U,290
697
698
  datahub/metadata/com/linkedin/pegasus2avro/metadata/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
698
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py,sha256=G9CI1UqSXGzselvjnlOI7Obzjn5ZTQVzohRGBZHdnZk,5151
699
+ datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py,sha256=U7nFcTKOeiqyZVxD53p2vcKBj6tupuCJH9I3yz6hFDg,5241
699
700
  datahub/metadata/com/linkedin/pegasus2avro/metadata/query/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
700
701
  datahub/metadata/com/linkedin/pegasus2avro/metadata/query/filter/__init__.py,sha256=DBP_QtxkFmC5q_kuk4dGjb4uOKbB4xKgqTWXGxmNbBQ,532
701
702
  datahub/metadata/com/linkedin/pegasus2avro/metadata/snapshot/__init__.py,sha256=OPboF8SV11wGnjvWQB-rxtB0otMdCsE7Tcy7xkOUgz8,2358
@@ -778,6 +779,8 @@ datahub/metadata/schemas/DataHubAccessTokenKey.avsc,sha256=3EspNIxgb_I4WwV0a2o4N
778
779
  datahub/metadata/schemas/DataHubActionKey.avsc,sha256=bjiKcoyvUPQKaGUi2ICBMJ_ukwnt7dh0szJS4WBZE0A,448
779
780
  datahub/metadata/schemas/DataHubConnectionDetails.avsc,sha256=IvZj6OA7HRvy-ZIIn0UbXdJNnyt_oTn16XIe5ZlcqGk,1661
780
781
  datahub/metadata/schemas/DataHubConnectionKey.avsc,sha256=VwbamVFoEdp6epz1lJm_UShBl6ksBxoA7jAYuPI5u3M,522
782
+ datahub/metadata/schemas/DataHubFileInfo.avsc,sha256=Vt1t5L9QyaIkSh3GUkrQs4I81o2MYAJJu194h1M97tw,6314
783
+ datahub/metadata/schemas/DataHubFileKey.avsc,sha256=-qxMFdpRPSHpA88adIFIJ35PZrN5qXOEo8RK-xTzkSQ,422
781
784
  datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc,sha256=4wac7sluRIq-0ZjODE5SmuVKuQeW8ajLJNRpqEBRyio,4601
782
785
  datahub/metadata/schemas/DataHubIngestionSourceKey.avsc,sha256=TGmm9WEGTaABs7kt5Uc-N-kbc5Sd-2sQwx-JpfAptvw,545
783
786
  datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc,sha256=q6ZyMoxInwmrkrXkUgMe-i-WZzAxbjcvJ-EI99SnEp8,599
@@ -1017,7 +1020,7 @@ datahub/sql_parsing/fingerprint_utils.py,sha256=3hGiexaQXnE7eZLxo-t7hlTyVQz7womb
1017
1020
  datahub/sql_parsing/query_types.py,sha256=FKjDzszZzsrCfYfm7dgD6T_8865qxWl767fdGyHWBh4,2720
1018
1021
  datahub/sql_parsing/schema_resolver.py,sha256=ISuingLcQnOJZkNXBkc73uPwYUbbOtERAjgGhJajDiQ,10782
1019
1022
  datahub/sql_parsing/split_statements.py,sha256=doCACwQ_Fx6m1djo7t3BnU9ZHki4EV2KJUQkFMGv7lg,10101
1020
- datahub/sql_parsing/sql_parsing_aggregator.py,sha256=NTWrsScS4d8CYLlBdMYo0b6ecuFpMfiwHJ7kMAh9fQg,72685
1023
+ datahub/sql_parsing/sql_parsing_aggregator.py,sha256=chRTe5eZfxsOd2vP_IrO_j3KhFUGLe2sPt0WXQ6yt5M,73320
1021
1024
  datahub/sql_parsing/sql_parsing_common.py,sha256=cZ4WvVyHZuXDGjnBvKMX2_fz2EMextB5WQWcK0_saBo,3155
1022
1025
  datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
1023
1026
  datahub/sql_parsing/sqlglot_lineage.py,sha256=l4LZMiaeTARjJG76Uun_yNtFHdSj3yi8zO1XvAQtxl0,66944
@@ -1128,8 +1131,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1128
1131
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1129
1132
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1130
1133
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1131
- acryl_datahub-1.3.0.1rc5.dist-info/METADATA,sha256=ckjfgTlPEUgZH1sYvZm3MLupBZvBbsC-zokD3Q2ekno,184688
1132
- acryl_datahub-1.3.0.1rc5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1133
- acryl_datahub-1.3.0.1rc5.dist-info/entry_points.txt,sha256=pzsBoTx-D-iTcmpX8oCGCyzlHP2112EygUMzZWz56M8,10105
1134
- acryl_datahub-1.3.0.1rc5.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1135
- acryl_datahub-1.3.0.1rc5.dist-info/RECORD,,
1134
+ acryl_datahub-1.3.0.1rc6.dist-info/METADATA,sha256=JfkdfNc1P5L7jaGrlIUM-DqN9KcYjFuKfxwfg-IvTQI,184688
1135
+ acryl_datahub-1.3.0.1rc6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1136
+ acryl_datahub-1.3.0.1rc6.dist-info/entry_points.txt,sha256=pzsBoTx-D-iTcmpX8oCGCyzlHP2112EygUMzZWz56M8,10105
1137
+ acryl_datahub-1.3.0.1rc6.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1138
+ acryl_datahub-1.3.0.1rc6.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.3.0.1rc5"
3
+ __version__ = "1.3.0.1rc6"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -49,6 +49,7 @@ from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
49
49
  from datahub.ingestion.source.state.profiling_state_handler import ProfilingHandler
50
50
  from datahub.ingestion.source.state.redundant_run_skip_handler import (
51
51
  RedundantLineageRunSkipHandler,
52
+ RedundantQueriesRunSkipHandler,
52
53
  RedundantUsageRunSkipHandler,
53
54
  )
54
55
  from datahub.ingestion.source.state.stale_entity_removal_handler import (
@@ -145,7 +146,10 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
145
146
  redundant_lineage_run_skip_handler: Optional[RedundantLineageRunSkipHandler] = (
146
147
  None
147
148
  )
148
- if self.config.enable_stateful_lineage_ingestion:
149
+ if (
150
+ self.config.enable_stateful_lineage_ingestion
151
+ and not self.config.use_queries_v2
152
+ ):
149
153
  redundant_lineage_run_skip_handler = RedundantLineageRunSkipHandler(
150
154
  source=self,
151
155
  config=self.config,
@@ -296,6 +300,17 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
296
300
  ):
297
301
  return
298
302
 
303
+ redundant_queries_run_skip_handler: Optional[
304
+ RedundantQueriesRunSkipHandler
305
+ ] = None
306
+ if self.config.enable_stateful_time_window:
307
+ redundant_queries_run_skip_handler = RedundantQueriesRunSkipHandler(
308
+ source=self,
309
+ config=self.config,
310
+ pipeline_name=self.ctx.pipeline_name,
311
+ run_id=self.ctx.run_id,
312
+ )
313
+
299
314
  with (
300
315
  self.report.new_stage(f"*: {QUERIES_EXTRACTION}"),
301
316
  BigQueryQueriesExtractor(
@@ -315,6 +330,7 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
315
330
  structured_report=self.report,
316
331
  filters=self.filters,
317
332
  identifiers=self.identifiers,
333
+ redundant_run_skip_handler=redundant_queries_run_skip_handler,
318
334
  schema_resolver=self.sql_parser_schema_resolver,
319
335
  discovered_tables=self.bq_schema_extractor.table_refs,
320
336
  ) as queries_extractor,
@@ -25,6 +25,7 @@ from datahub.ingestion.source.sql.sql_config import SQLCommonConfig, SQLFilterCo
25
25
  from datahub.ingestion.source.state.stateful_ingestion_base import (
26
26
  StatefulLineageConfigMixin,
27
27
  StatefulProfilingConfigMixin,
28
+ StatefulTimeWindowConfigMixin,
28
29
  StatefulUsageConfigMixin,
29
30
  )
30
31
  from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
@@ -271,6 +272,7 @@ class BigQueryV2Config(
271
272
  SQLCommonConfig,
272
273
  StatefulUsageConfigMixin,
273
274
  StatefulLineageConfigMixin,
275
+ StatefulTimeWindowConfigMixin,
274
276
  StatefulProfilingConfigMixin,
275
277
  ClassificationSourceConfigMixin,
276
278
  ):
@@ -527,6 +529,20 @@ class BigQueryV2Config(
527
529
 
528
530
  return v
529
531
 
532
+ @root_validator(pre=False, skip_on_failure=True)
533
+ def validate_queries_v2_stateful_ingestion(cls, values: Dict) -> Dict:
534
+ if values.get("use_queries_v2"):
535
+ if values.get("enable_stateful_lineage_ingestion") or values.get(
536
+ "enable_stateful_usage_ingestion"
537
+ ):
538
+ logger.warning(
539
+ "enable_stateful_lineage_ingestion and enable_stateful_usage_ingestion are deprecated "
540
+ "when using use_queries_v2=True. These configs only work with the legacy (non-queries v2) extraction path. "
541
+ "For queries v2, use enable_stateful_time_window instead to enable stateful ingestion "
542
+ "for the unified time window extraction (lineage + usage + operations + queries)."
543
+ )
544
+ return values
545
+
530
546
  def get_table_pattern(self, pattern: List[str]) -> str:
531
547
  return "|".join(pattern) if pattern else ""
532
548
 
@@ -36,6 +36,9 @@ from datahub.ingestion.source.bigquery_v2.common import (
36
36
  BigQueryFilter,
37
37
  BigQueryIdentifierBuilder,
38
38
  )
39
+ from datahub.ingestion.source.state.redundant_run_skip_handler import (
40
+ RedundantQueriesRunSkipHandler,
41
+ )
39
42
  from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
40
43
  from datahub.metadata.urns import CorpUserUrn
41
44
  from datahub.sql_parsing.schema_resolver import SchemaResolver
@@ -135,6 +138,7 @@ class BigQueryQueriesExtractor(Closeable):
135
138
  structured_report: SourceReport,
136
139
  filters: BigQueryFilter,
137
140
  identifiers: BigQueryIdentifierBuilder,
141
+ redundant_run_skip_handler: Optional[RedundantQueriesRunSkipHandler] = None,
138
142
  graph: Optional[DataHubGraph] = None,
139
143
  schema_resolver: Optional[SchemaResolver] = None,
140
144
  discovered_tables: Optional[Collection[str]] = None,
@@ -158,6 +162,9 @@ class BigQueryQueriesExtractor(Closeable):
158
162
  )
159
163
 
160
164
  self.structured_report = structured_report
165
+ self.redundant_run_skip_handler = redundant_run_skip_handler
166
+
167
+ self.start_time, self.end_time = self._get_time_window()
161
168
 
162
169
  self.aggregator = SqlParsingAggregator(
163
170
  platform=self.identifiers.platform,
@@ -172,8 +179,8 @@ class BigQueryQueriesExtractor(Closeable):
172
179
  generate_query_usage_statistics=self.config.include_query_usage_statistics,
173
180
  usage_config=BaseUsageConfig(
174
181
  bucket_duration=self.config.window.bucket_duration,
175
- start_time=self.config.window.start_time,
176
- end_time=self.config.window.end_time,
182
+ start_time=self.start_time,
183
+ end_time=self.end_time,
177
184
  user_email_pattern=self.config.user_email_pattern,
178
185
  top_n_queries=self.config.top_n_queries,
179
186
  ),
@@ -199,6 +206,34 @@ class BigQueryQueriesExtractor(Closeable):
199
206
  logger.info(f"Using local temp path: {path}")
200
207
  return path
201
208
 
209
+ def _get_time_window(self) -> tuple[datetime, datetime]:
210
+ if self.redundant_run_skip_handler:
211
+ start_time, end_time = (
212
+ self.redundant_run_skip_handler.suggest_run_time_window(
213
+ self.config.window.start_time,
214
+ self.config.window.end_time,
215
+ )
216
+ )
217
+ else:
218
+ start_time = self.config.window.start_time
219
+ end_time = self.config.window.end_time
220
+
221
+ # Usage statistics are aggregated per bucket (typically per day).
222
+ # To ensure accurate aggregated metrics, we need to align the start_time
223
+ # to the beginning of a bucket so that we include complete bucket periods.
224
+ if self.config.include_usage_statistics:
225
+ start_time = get_time_bucket(start_time, self.config.window.bucket_duration)
226
+
227
+ return start_time, end_time
228
+
229
+ def _update_state(self) -> None:
230
+ if self.redundant_run_skip_handler:
231
+ self.redundant_run_skip_handler.update_state(
232
+ self.config.window.start_time,
233
+ self.config.window.end_time,
234
+ self.config.window.bucket_duration,
235
+ )
236
+
202
237
  def is_temp_table(self, name: str) -> bool:
203
238
  try:
204
239
  table = BigqueryTableIdentifier.from_string_name(name)
@@ -299,6 +334,8 @@ class BigQueryQueriesExtractor(Closeable):
299
334
  shared_connection.close()
300
335
  audit_log_file.unlink(missing_ok=True)
301
336
 
337
+ self._update_state()
338
+
302
339
  def deduplicate_queries(
303
340
  self, queries: FileBackedList[ObservedQuery]
304
341
  ) -> FileBackedDict[Dict[int, ObservedQuery]]:
@@ -355,8 +392,8 @@ class BigQueryQueriesExtractor(Closeable):
355
392
  query_log_query = _build_enriched_query_log_query(
356
393
  project_id=project.id,
357
394
  region=region,
358
- start_time=self.config.window.start_time,
359
- end_time=self.config.window.end_time,
395
+ start_time=self.start_time,
396
+ end_time=self.end_time,
360
397
  )
361
398
 
362
399
  logger.info(f"Fetching query log from BQ Project {project.id} for {region}")
@@ -31,6 +31,7 @@ from datahub.ingestion.source.sql.sql_config import SQLCommonConfig, SQLFilterCo
31
31
  from datahub.ingestion.source.state.stateful_ingestion_base import (
32
32
  StatefulLineageConfigMixin,
33
33
  StatefulProfilingConfigMixin,
34
+ StatefulTimeWindowConfigMixin,
34
35
  StatefulUsageConfigMixin,
35
36
  )
36
37
  from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
@@ -199,6 +200,7 @@ class SnowflakeV2Config(
199
200
  SnowflakeUsageConfig,
200
201
  StatefulLineageConfigMixin,
201
202
  StatefulUsageConfigMixin,
203
+ StatefulTimeWindowConfigMixin,
202
204
  StatefulProfilingConfigMixin,
203
205
  ClassificationSourceConfigMixin,
204
206
  IncrementalPropertiesConfigMixin,
@@ -477,6 +479,20 @@ class SnowflakeV2Config(
477
479
 
478
480
  return shares
479
481
 
482
+ @root_validator(pre=False, skip_on_failure=True)
483
+ def validate_queries_v2_stateful_ingestion(cls, values: Dict) -> Dict:
484
+ if values.get("use_queries_v2"):
485
+ if values.get("enable_stateful_lineage_ingestion") or values.get(
486
+ "enable_stateful_usage_ingestion"
487
+ ):
488
+ logger.warning(
489
+ "enable_stateful_lineage_ingestion and enable_stateful_usage_ingestion are deprecated "
490
+ "when using use_queries_v2=True. These configs only work with the legacy (non-queries v2) extraction path. "
491
+ "For queries v2, use enable_stateful_time_window instead to enable stateful ingestion "
492
+ "for the unified time window extraction (lineage + usage + operations + queries)."
493
+ )
494
+ return values
495
+
480
496
  def outbounds(self) -> Dict[str, Set[DatabaseId]]:
481
497
  """
482
498
  Returns mapping of
@@ -17,6 +17,7 @@ from datahub.configuration.common import AllowDenyPattern, ConfigModel, HiddenFr
17
17
  from datahub.configuration.time_window_config import (
18
18
  BaseTimeWindowConfig,
19
19
  BucketDuration,
20
+ get_time_bucket,
20
21
  )
21
22
  from datahub.ingestion.api.closeable import Closeable
22
23
  from datahub.ingestion.api.common import PipelineContext
@@ -50,6 +51,9 @@ from datahub.ingestion.source.snowflake.stored_proc_lineage import (
50
51
  StoredProcLineageReport,
51
52
  StoredProcLineageTracker,
52
53
  )
54
+ from datahub.ingestion.source.state.redundant_run_skip_handler import (
55
+ RedundantQueriesRunSkipHandler,
56
+ )
53
57
  from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
54
58
  from datahub.metadata.urns import CorpUserUrn
55
59
  from datahub.sql_parsing.schema_resolver import SchemaResolver
@@ -180,6 +184,7 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
180
184
  structured_report: SourceReport,
181
185
  filters: SnowflakeFilter,
182
186
  identifiers: SnowflakeIdentifierBuilder,
187
+ redundant_run_skip_handler: Optional[RedundantQueriesRunSkipHandler] = None,
183
188
  graph: Optional[DataHubGraph] = None,
184
189
  schema_resolver: Optional[SchemaResolver] = None,
185
190
  discovered_tables: Optional[List[str]] = None,
@@ -191,9 +196,13 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
191
196
  self.filters = filters
192
197
  self.identifiers = identifiers
193
198
  self.discovered_tables = set(discovered_tables) if discovered_tables else None
199
+ self.redundant_run_skip_handler = redundant_run_skip_handler
194
200
 
195
201
  self._structured_report = structured_report
196
202
 
203
+ # Adjust time window based on stateful ingestion state
204
+ self.start_time, self.end_time = self._get_time_window()
205
+
197
206
  # The exit stack helps ensure that we close all the resources we open.
198
207
  self._exit_stack = contextlib.ExitStack()
199
208
 
@@ -211,8 +220,8 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
211
220
  generate_query_usage_statistics=self.config.include_query_usage_statistics,
212
221
  usage_config=BaseUsageConfig(
213
222
  bucket_duration=self.config.window.bucket_duration,
214
- start_time=self.config.window.start_time,
215
- end_time=self.config.window.end_time,
223
+ start_time=self.start_time,
224
+ end_time=self.end_time,
216
225
  user_email_pattern=self.config.user_email_pattern,
217
226
  # TODO make the rest of the fields configurable
218
227
  ),
@@ -228,6 +237,34 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
228
237
  def structured_reporter(self) -> SourceReport:
229
238
  return self._structured_report
230
239
 
240
+ def _get_time_window(self) -> tuple[datetime, datetime]:
241
+ if self.redundant_run_skip_handler:
242
+ start_time, end_time = (
243
+ self.redundant_run_skip_handler.suggest_run_time_window(
244
+ self.config.window.start_time,
245
+ self.config.window.end_time,
246
+ )
247
+ )
248
+ else:
249
+ start_time = self.config.window.start_time
250
+ end_time = self.config.window.end_time
251
+
252
+ # Usage statistics are aggregated per bucket (typically per day).
253
+ # To ensure accurate aggregated metrics, we need to align the start_time
254
+ # to the beginning of a bucket so that we include complete bucket periods.
255
+ if self.config.include_usage_statistics:
256
+ start_time = get_time_bucket(start_time, self.config.window.bucket_duration)
257
+
258
+ return start_time, end_time
259
+
260
+ def _update_state(self) -> None:
261
+ if self.redundant_run_skip_handler:
262
+ self.redundant_run_skip_handler.update_state(
263
+ self.config.window.start_time,
264
+ self.config.window.end_time,
265
+ self.config.window.bucket_duration,
266
+ )
267
+
231
268
  @functools.cached_property
232
269
  def local_temp_path(self) -> pathlib.Path:
233
270
  if self.config.local_temp_path:
@@ -355,6 +392,9 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
355
392
  with self.report.aggregator_generate_timer:
356
393
  yield from auto_workunit(self.aggregator.gen_metadata())
357
394
 
395
+ # Update the stateful ingestion state after successful extraction
396
+ self._update_state()
397
+
358
398
  def fetch_users(self) -> UsersMapping:
359
399
  users: UsersMapping = dict()
360
400
  with self.structured_reporter.report_exc("Error fetching users from Snowflake"):
@@ -378,8 +418,8 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
378
418
  # Derived from _populate_external_lineage_from_copy_history.
379
419
 
380
420
  query: str = SnowflakeQuery.copy_lineage_history(
381
- start_time_millis=int(self.config.window.start_time.timestamp() * 1000),
382
- end_time_millis=int(self.config.window.end_time.timestamp() * 1000),
421
+ start_time_millis=int(self.start_time.timestamp() * 1000),
422
+ end_time_millis=int(self.end_time.timestamp() * 1000),
383
423
  downstreams_deny_pattern=self.config.temporary_tables_pattern,
384
424
  )
385
425
 
@@ -414,8 +454,8 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
414
454
  Union[PreparsedQuery, TableRename, TableSwap, ObservedQuery, StoredProcCall]
415
455
  ]:
416
456
  query_log_query = QueryLogQueryBuilder(
417
- start_time=self.config.window.start_time,
418
- end_time=self.config.window.end_time,
457
+ start_time=self.start_time,
458
+ end_time=self.end_time,
419
459
  bucket_duration=self.config.window.bucket_duration,
420
460
  deny_usernames=self.config.pushdown_deny_usernames,
421
461
  allow_usernames=self.config.pushdown_allow_usernames,
@@ -73,6 +73,7 @@ from datahub.ingestion.source.snowflake.snowflake_utils import (
73
73
  from datahub.ingestion.source.state.profiling_state_handler import ProfilingHandler
74
74
  from datahub.ingestion.source.state.redundant_run_skip_handler import (
75
75
  RedundantLineageRunSkipHandler,
76
+ RedundantQueriesRunSkipHandler,
76
77
  RedundantUsageRunSkipHandler,
77
78
  )
78
79
  from datahub.ingestion.source.state.stale_entity_removal_handler import (
@@ -207,7 +208,7 @@ class SnowflakeV2Source(
207
208
  )
208
209
  self.report.sql_aggregator = self.aggregator.report
209
210
 
210
- if self.config.include_table_lineage:
211
+ if self.config.include_table_lineage and not self.config.use_queries_v2:
211
212
  redundant_lineage_run_skip_handler: Optional[
212
213
  RedundantLineageRunSkipHandler
213
214
  ] = None
@@ -589,6 +590,17 @@ class SnowflakeV2Source(
589
590
  with self.report.new_stage(f"*: {QUERIES_EXTRACTION}"):
590
591
  schema_resolver = self.aggregator._schema_resolver
591
592
 
593
+ redundant_queries_run_skip_handler: Optional[
594
+ RedundantQueriesRunSkipHandler
595
+ ] = None
596
+ if self.config.enable_stateful_time_window:
597
+ redundant_queries_run_skip_handler = RedundantQueriesRunSkipHandler(
598
+ source=self,
599
+ config=self.config,
600
+ pipeline_name=self.ctx.pipeline_name,
601
+ run_id=self.ctx.run_id,
602
+ )
603
+
592
604
  queries_extractor = SnowflakeQueriesExtractor(
593
605
  connection=self.connection,
594
606
  # TODO: this should be its own section in main recipe
@@ -614,6 +626,7 @@ class SnowflakeV2Source(
614
626
  structured_report=self.report,
615
627
  filters=self.filters,
616
628
  identifiers=self.identifiers,
629
+ redundant_run_skip_handler=redundant_queries_run_skip_handler,
617
630
  schema_resolver=schema_resolver,
618
631
  discovered_tables=self.discovered_datasets,
619
632
  graph=self.ctx.graph,
@@ -244,3 +244,24 @@ class RedundantUsageRunSkipHandler(RedundantRunSkipHandler):
244
244
  cur_state.begin_timestamp_millis = datetime_to_ts_millis(start_time)
245
245
  cur_state.end_timestamp_millis = datetime_to_ts_millis(end_time)
246
246
  cur_state.bucket_duration = bucket_duration
247
+
248
+
249
+ class RedundantQueriesRunSkipHandler(RedundantRunSkipHandler):
250
+ """
251
+ Handler for stateful ingestion of queries v2 extraction.
252
+ Manages the time window for audit log extraction that combines
253
+ lineage, usage, operations, and queries.
254
+ """
255
+
256
+ def get_job_name_suffix(self):
257
+ return "_audit_window"
258
+
259
+ def update_state(
260
+ self, start_time: datetime, end_time: datetime, bucket_duration: BucketDuration
261
+ ) -> None:
262
+ cur_checkpoint = self.get_current_checkpoint()
263
+ if cur_checkpoint:
264
+ cur_state = cast(BaseTimeWindowCheckpointState, cur_checkpoint.state)
265
+ cur_state.begin_timestamp_millis = datetime_to_ts_millis(start_time)
266
+ cur_state.end_timestamp_millis = datetime_to_ts_millis(end_time)
267
+ cur_state.bucket_duration = bucket_duration
@@ -101,7 +101,9 @@ class StatefulLineageConfigMixin(ConfigModel):
101
101
  default=True,
102
102
  description="Enable stateful lineage ingestion."
103
103
  " This will store lineage window timestamps after successful lineage ingestion. "
104
- "and will not run lineage ingestion for same timestamps in subsequent run. ",
104
+ "and will not run lineage ingestion for same timestamps in subsequent run. "
105
+ "NOTE: This only works with use_queries_v2=False (legacy extraction path). "
106
+ "For queries v2, use enable_stateful_time_window instead.",
105
107
  )
106
108
 
107
109
  _store_last_lineage_extraction_timestamp = pydantic_renamed_field(
@@ -150,7 +152,9 @@ class StatefulUsageConfigMixin(BaseTimeWindowConfig):
150
152
  default=True,
151
153
  description="Enable stateful lineage ingestion."
152
154
  " This will store usage window timestamps after successful usage ingestion. "
153
- "and will not run usage ingestion for same timestamps in subsequent run. ",
155
+ "and will not run usage ingestion for same timestamps in subsequent run. "
156
+ "NOTE: This only works with use_queries_v2=False (legacy extraction path). "
157
+ "For queries v2, use enable_stateful_time_window instead.",
154
158
  )
155
159
 
156
160
  _store_last_usage_extraction_timestamp = pydantic_renamed_field(
@@ -169,6 +173,30 @@ class StatefulUsageConfigMixin(BaseTimeWindowConfig):
169
173
  return values
170
174
 
171
175
 
176
+ class StatefulTimeWindowConfigMixin(BaseTimeWindowConfig):
177
+ enable_stateful_time_window: bool = Field(
178
+ default=False,
179
+ description="Enable stateful time window tracking."
180
+ " This will store the time window after successful extraction "
181
+ "and adjust the time window in subsequent runs to avoid reprocessing. "
182
+ "NOTE: This is ONLY applicable when using queries v2 (use_queries_v2=True). "
183
+ "This replaces enable_stateful_lineage_ingestion and enable_stateful_usage_ingestion "
184
+ "for the queries v2 extraction path, since queries v2 extracts lineage, usage, operations, "
185
+ "and queries together from a single audit log and uses a unified time window.",
186
+ )
187
+
188
+ @root_validator(skip_on_failure=True)
189
+ def time_window_stateful_option_validator(cls, values: Dict) -> Dict:
190
+ sti = values.get("stateful_ingestion")
191
+ if not sti or not sti.enabled:
192
+ if values.get("enable_stateful_time_window"):
193
+ logger.warning(
194
+ "Stateful ingestion is disabled, disabling enable_stateful_time_window config option as well"
195
+ )
196
+ values["enable_stateful_time_window"] = False
197
+ return values
198
+
199
+
172
200
  @dataclass
173
201
  class StatefulIngestionReport(SourceReport):
174
202
  pass