acryl-datahub 1.3.0.1rc4__py3-none-any.whl → 1.3.0.1rc6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.3.0.1rc4.dist-info → acryl_datahub-1.3.0.1rc6.dist-info}/METADATA +2637 -2633
- {acryl_datahub-1.3.0.1rc4.dist-info → acryl_datahub-1.3.0.1rc6.dist-info}/RECORD +31 -28
- datahub/_version.py +1 -1
- datahub/ingestion/source/aws/aws_common.py +161 -0
- datahub/ingestion/source/bigquery_v2/bigquery.py +17 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +16 -0
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +5 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +41 -4
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/usage.py +2 -2
- datahub/ingestion/source/snowflake/snowflake_config.py +16 -0
- datahub/ingestion/source/snowflake/snowflake_queries.py +46 -6
- datahub/ingestion/source/snowflake/snowflake_v2.py +14 -1
- datahub/ingestion/source/sql/mysql.py +101 -4
- datahub/ingestion/source/sql/postgres.py +81 -4
- datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stateful_ingestion_base.py +30 -2
- datahub/metadata/_internal_schema_classes.py +772 -546
- datahub/metadata/_urns/urn_defs.py +1751 -1695
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
- datahub/metadata/schema.avsc +18450 -18242
- datahub/metadata/schemas/DataHubFileInfo.avsc +228 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +3 -1
- datahub/sql_parsing/sql_parsing_aggregator.py +18 -4
- {acryl_datahub-1.3.0.1rc4.dist-info → acryl_datahub-1.3.0.1rc6.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.3.0.1rc4.dist-info → acryl_datahub-1.3.0.1rc6.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.3.0.1rc4.dist-info → acryl_datahub-1.3.0.1rc6.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.3.0.1rc4.dist-info → acryl_datahub-1.3.0.1rc6.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
acryl_datahub-1.3.0.
|
|
1
|
+
acryl_datahub-1.3.0.1rc6.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=PW9A4Uazfqf_qZ54rH_cG6i8GhrmicechIGtJYipA8Q,323
|
|
5
5
|
datahub/entrypoints.py,sha256=VcbU6Z47b_JKW1zI-WJMYIngm05FSogKLiuvFNtyNcI,9088
|
|
6
6
|
datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -241,7 +241,7 @@ datahub/ingestion/source/abs/source.py,sha256=z86K5_P_gu8kTytLOAYyQqqD2g14JGSrv1
|
|
|
241
241
|
datahub/ingestion/source/apply/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
242
242
|
datahub/ingestion/source/apply/datahub_apply.py,sha256=xTD-Iq3UHhxcz61RwNuI2kJjRrnQEfZFSgvS1X6loV4,7703
|
|
243
243
|
datahub/ingestion/source/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
244
|
-
datahub/ingestion/source/aws/aws_common.py,sha256=
|
|
244
|
+
datahub/ingestion/source/aws/aws_common.py,sha256=Va9uxo5aKsAR7qIC625VpRO3XDqzNIg4SfK_eFg25Rw,23781
|
|
245
245
|
datahub/ingestion/source/aws/glue.py,sha256=dUaMWcI5Ed-TzbbSrF6suT4L1vcRHoHfFCdTvAINc4w,67423
|
|
246
246
|
datahub/ingestion/source/aws/platform_resource_repository.py,sha256=0eUfGy1FbaBltCSNTtXyLrkrdqTc1KkTgDJB1Gd-Ydk,853
|
|
247
247
|
datahub/ingestion/source/aws/s3_boto_utils.py,sha256=rGlWAkKZpkeA1_wMvcJvSDvobvduShszowU-KcrQudg,7011
|
|
@@ -260,10 +260,10 @@ datahub/ingestion/source/azure/abs_folder_utils.py,sha256=7skXus-4fSIoKpqCeU-GG0
|
|
|
260
260
|
datahub/ingestion/source/azure/abs_utils.py,sha256=KdAlCK-PMrn35kFHxz5vrsjajyx2PD5GRgoBKdoRvcg,2075
|
|
261
261
|
datahub/ingestion/source/azure/azure_common.py,sha256=DvPrLpjQSJ1USB_myGmg8lGkRW-WAl2GIZMcEkBFjOs,4063
|
|
262
262
|
datahub/ingestion/source/bigquery_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
263
|
-
datahub/ingestion/source/bigquery_v2/bigquery.py,sha256=
|
|
263
|
+
datahub/ingestion/source/bigquery_v2/bigquery.py,sha256=Z5QsyvBNDWEf9kME_zBRw2oLIh3rD5zafpvuYB0p4ow,15972
|
|
264
264
|
datahub/ingestion/source/bigquery_v2/bigquery_audit.py,sha256=kEwWhq3ch6WT4q4hcX8-fvQh28KgrNfspFwIytO3vQA,25103
|
|
265
265
|
datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py,sha256=LuGJ6LgPViLIfDQfylxlQ3CA7fZYM5MDt8M-7sfzm84,5096
|
|
266
|
-
datahub/ingestion/source/bigquery_v2/bigquery_config.py,sha256=
|
|
266
|
+
datahub/ingestion/source/bigquery_v2/bigquery_config.py,sha256=A5pLaTm4WCTndhGmGBGrjc05LHtC7C5-FrE3vEMc1ik,23880
|
|
267
267
|
datahub/ingestion/source/bigquery_v2/bigquery_connection.py,sha256=6XFCc0oxxU3R4IPyYHaf3YMETlMD4ztkNpkf4kf1Elw,3171
|
|
268
268
|
datahub/ingestion/source/bigquery_v2/bigquery_data_reader.py,sha256=DeT3v_Z82__8En0FcZ0kavBAWQoRvSZ5Rppm9eeDAb8,2393
|
|
269
269
|
datahub/ingestion/source/bigquery_v2/bigquery_helper.py,sha256=QER3gY8e_k1_eNVj7cBso7ZzrWl_vO5PYSa6CpvqNx8,1554
|
|
@@ -271,13 +271,13 @@ datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256
|
|
|
271
271
|
datahub/ingestion/source/bigquery_v2/bigquery_queries.py,sha256=2syDMaRpYEbtGUVejVAK5d6g8HqM54ZyEM908uLJ55o,3393
|
|
272
272
|
datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=zlTkqOmt5zxnO40rVTYHF3fclj4OVlLtqUXwW5WIIcM,7855
|
|
273
273
|
datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=zbYb1EYnCJxgvsU8oT_76l0q_BW1exVjMWM1GAgd1nc,32600
|
|
274
|
-
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=
|
|
274
|
+
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=PbSCMj5ACwEu_HQNe29IHs4y1bn15_nnz6ZW1Yt17wI,51796
|
|
275
275
|
datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
|
|
276
276
|
datahub/ingestion/source/bigquery_v2/common.py,sha256=IinOy-RO4UZGxSf5scaN02672BzZuNsjJZ56axti6iI,4016
|
|
277
277
|
datahub/ingestion/source/bigquery_v2/lineage.py,sha256=jju14mJbAUMA_K3j2yq-TdZV202cjd5rBAsDPJGEVno,44900
|
|
278
278
|
datahub/ingestion/source/bigquery_v2/profiler.py,sha256=oLf5jMjJf-ShNny9Dll2tCsOoPMF1DxAh7e7etpeLq4,10821
|
|
279
279
|
datahub/ingestion/source/bigquery_v2/queries.py,sha256=gDvvgajptmNn5AiBglmDhGAC9LBh8fzw56_d8ewLbxA,20222
|
|
280
|
-
datahub/ingestion/source/bigquery_v2/queries_extractor.py,sha256=
|
|
280
|
+
datahub/ingestion/source/bigquery_v2/queries_extractor.py,sha256=bSYusyf-xnhs_1WURsQ2YmMxRn3J5HCp_UKChsxbWIw,21015
|
|
281
281
|
datahub/ingestion/source/bigquery_v2/usage.py,sha256=A9c-ofclaRk0NSnc4IRaqJYqMPv6ecCld_TPy3V2qFs,40748
|
|
282
282
|
datahub/ingestion/source/cassandra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
283
283
|
datahub/ingestion/source/cassandra/cassandra.py,sha256=pNy61Z4kTqL_wGcWIYee5fnZiuJDseDcRcQwsxeAssk,14487
|
|
@@ -456,9 +456,9 @@ datahub/ingestion/source/redshift/profile.py,sha256=H1Xtc2rXScUv4w0b2BbM7POjYEwq
|
|
|
456
456
|
datahub/ingestion/source/redshift/query.py,sha256=HKobQ-0crARgT8Mkfe-WBqVR9ZadYCZ9DGaUoEHHHww,48234
|
|
457
457
|
datahub/ingestion/source/redshift/redshift.py,sha256=RN8rao3j7nocnnD6oPcEju09-8mOZTE4vFkgy_13Az8,41293
|
|
458
458
|
datahub/ingestion/source/redshift/redshift_data_reader.py,sha256=zc69jwXHdF-w8J4Hq-ZQ6BjHQ75Ij2iNDMpoRJlcmlU,1724
|
|
459
|
-
datahub/ingestion/source/redshift/redshift_schema.py,sha256=
|
|
459
|
+
datahub/ingestion/source/redshift/redshift_schema.py,sha256=2U8IIPRJkL-HWUeWswOzvcT1hdTBQgPMhr6tYCDuqrM,25226
|
|
460
460
|
datahub/ingestion/source/redshift/report.py,sha256=aCFDFUbz5xde8b_eRIHSBiELoo9LZFtDpp2lSadiPHU,2937
|
|
461
|
-
datahub/ingestion/source/redshift/usage.py,sha256=
|
|
461
|
+
datahub/ingestion/source/redshift/usage.py,sha256=szdg3cUw4UpZ8rXMZufIAVTq2iiI1VsmrFgjAEH8Dv4,17521
|
|
462
462
|
datahub/ingestion/source/s3/__init__.py,sha256=HjqFPj11WtNFZM3kcVshlDb7kOsc19-l_3LM8PBjlJM,56
|
|
463
463
|
datahub/ingestion/source/s3/config.py,sha256=lElFXgEpKDT9SVoiXvtx98wV6Gp880qP4pLQaOGJGOo,7828
|
|
464
464
|
datahub/ingestion/source/s3/datalake_profiler_config.py,sha256=FfrcgK-JEF94vw-l3q6pN6FENXb-wZzW2w1VUZVkwW8,3620
|
|
@@ -496,12 +496,12 @@ datahub/ingestion/source/snowflake/constants.py,sha256=iDTamMozHwLYyglpRfqwTbxPx
|
|
|
496
496
|
datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
|
|
497
497
|
datahub/ingestion/source/snowflake/oauth_generator.py,sha256=fu2VnREGuJXeTqIV2jx4TwieVnznf83HQkrE0h2DGGM,3423
|
|
498
498
|
datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81xxdeizJn9nJCZ_nMIXgk9N6pEk5o,4803
|
|
499
|
-
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=
|
|
499
|
+
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=HoDzaG3TlP1ui5qY2PZUcu83wOjvJ96Z9fFYC4GmCko,24439
|
|
500
500
|
datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=uSHdPqigRzjeNxtn0_m5i57X7X8LBZIpHzDcWIoovyA,19005
|
|
501
501
|
datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
|
|
502
502
|
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=nam-bYV6wL9LfR-Tt50Qe_Kea61IuWS-lLu5__aDxk8,21853
|
|
503
503
|
datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=PmQi-qDlRhdJ-PsJ7x-EScIiswWRAxDDOKHydvN3mTY,7404
|
|
504
|
-
datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=
|
|
504
|
+
datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=AiLEbCs6zXbIoejFdLsbSupkvqHj5BOtPz9lubGKLv8,47142
|
|
505
505
|
datahub/ingestion/source/snowflake/snowflake_query.py,sha256=wLDaYZrWJ0794KKn69rB_QF0_8Bzu5l_7L6mD77KVc4,40469
|
|
506
506
|
datahub/ingestion/source/snowflake/snowflake_report.py,sha256=fA6C-p9wM-jyTsXE_suTbCtrE_lle-5LI52S7wFYf00,6701
|
|
507
507
|
datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=a6avRJXbj2qwnu28oK1YotmJo68zEG-1S7vonsUUJy4,41473
|
|
@@ -511,7 +511,7 @@ datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=5Li4H8KuS4qBKR98L
|
|
|
511
511
|
datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=eA9xh-G1Ydr1OwUUtrbXUWp26hE1jF0zvyKNky_i_nQ,8887
|
|
512
512
|
datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=mM0v9b4PHRJAT-SdRids3wdzc5O96gWCCww3e42itV8,24982
|
|
513
513
|
datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=1c1YNmAxxOwAKy8IEFqVdp6x-EvCYJkN6UZ_RwUUVv0,15062
|
|
514
|
-
datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=
|
|
514
|
+
datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=Tpx_d8UuO4wD9sIL9nMDR4GtCvVc6KbF82nlBdFRtEI,36408
|
|
515
515
|
datahub/ingestion/source/snowflake/stored_proc_lineage.py,sha256=rOb78iHiWiK8v8WdVs1xDwVut4Y0OHmszej6IopQfCo,5341
|
|
516
516
|
datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
517
517
|
datahub/ingestion/source/sql/athena.py,sha256=ZvWGuAPRUeUkE-7N6B3RyCkQp7JZKnLVXTnnR200gls,31532
|
|
@@ -523,9 +523,9 @@ datahub/ingestion/source/sql/hana.py,sha256=V6bGVLVjI1VL0deebg8VxIL8Ls-oxUvpSvX9
|
|
|
523
523
|
datahub/ingestion/source/sql/hive.py,sha256=SPmAWlk63V-s-loBTU2hXsQA7xA4sa0iPK6pCbF-AJ8,31600
|
|
524
524
|
datahub/ingestion/source/sql/hive_metastore.py,sha256=UBB7mV2eKuCxv3voi0F3tqF2MyRObSYxArAxETZfO4E,35997
|
|
525
525
|
datahub/ingestion/source/sql/mariadb.py,sha256=om6QoG5UtDldt1N6AfIWp3T-HXNaaqFmpz2i0JAemfM,654
|
|
526
|
-
datahub/ingestion/source/sql/mysql.py,sha256=
|
|
526
|
+
datahub/ingestion/source/sql/mysql.py,sha256=h0kv86-8SxBTmaEhmcyqGcKoaWQ4peUdiE9sfhrTuqY,9734
|
|
527
527
|
datahub/ingestion/source/sql/oracle.py,sha256=nKMM1O67SkxCgT781eENl5xXpIR8_p5joTSdAYzQwHY,29988
|
|
528
|
-
datahub/ingestion/source/sql/postgres.py,sha256=
|
|
528
|
+
datahub/ingestion/source/sql/postgres.py,sha256=Vk1NVI0zJPzMj4SKJ9jfyGu4DY3bow724BDn10BaxzU,17478
|
|
529
529
|
datahub/ingestion/source/sql/presto.py,sha256=58py4M3UYxkGpbBFA1o96H154eUhD2dBm1hpxxYlYYM,4256
|
|
530
530
|
datahub/ingestion/source/sql/sql_common.py,sha256=EZGoeGlOYZoOrXOiKDI-S1mw-sPVV33PZQ_mPJlEvRc,57759
|
|
531
531
|
datahub/ingestion/source/sql/sql_config.py,sha256=u3nGZYYl1WtaxfNsDU5bglgZ5Jq3Fxk9xei_CUIAXB0,8222
|
|
@@ -535,7 +535,7 @@ datahub/ingestion/source/sql/sql_report.py,sha256=gw-OPHSExp_b6DRjvwqE1U6Bpkwekx
|
|
|
535
535
|
datahub/ingestion/source/sql/sql_types.py,sha256=AVeBBXw8aKB1_jw6Wtg58miu-YUfN_-7ZcXwSF-ESgA,16021
|
|
536
536
|
datahub/ingestion/source/sql/sql_utils.py,sha256=q-Bsk6WxlsRtrw9RXBxvqI3zuaMTC_F25T2VrCziR9I,8418
|
|
537
537
|
datahub/ingestion/source/sql/sqlalchemy_data_reader.py,sha256=FvHZ4JEK3aR2DYOBZiT_ZsAy12RjTu4t_KIR_92B11k,2644
|
|
538
|
-
datahub/ingestion/source/sql/sqlalchemy_uri.py,sha256=
|
|
538
|
+
datahub/ingestion/source/sql/sqlalchemy_uri.py,sha256=wE_GX2TtkFEvscC_Epy5hUfBxtE6mUQoVPb7fUea0jk,1882
|
|
539
539
|
datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py,sha256=KOpbmDIE2h1hyYEsbVHJi2B7FlsyUMTXZx4diyzltQg,1826
|
|
540
540
|
datahub/ingestion/source/sql/teradata.py,sha256=YydlPGndFGZcpvlmim3T-1yaAmsFt08TZVOTo1R3GLo,66871
|
|
541
541
|
datahub/ingestion/source/sql/trino.py,sha256=o5hm84iwRHO59TD2LaEqYgF2LYIcSUIKmlgu1VudGBY,19254
|
|
@@ -552,10 +552,10 @@ datahub/ingestion/source/state/checkpoint.py,sha256=ob8wtC0zOgTdc_-cVAI3MUKcQaN4
|
|
|
552
552
|
datahub/ingestion/source/state/entity_removal_state.py,sha256=dIG1HnueaRTsAu8kYjYiPFPpkZ4WwbraDF6oFDsts2E,6720
|
|
553
553
|
datahub/ingestion/source/state/profiling_state.py,sha256=lsWu7oZhB9nSlqoklvjs-LjS4XF0p6BxSAcLY-xKRzM,512
|
|
554
554
|
datahub/ingestion/source/state/profiling_state_handler.py,sha256=jDMiIrAq8k4GrYoh9Ymh0ZAmzejYFk8E1W7-kuw6lXg,4295
|
|
555
|
-
datahub/ingestion/source/state/redundant_run_skip_handler.py,sha256=
|
|
555
|
+
datahub/ingestion/source/state/redundant_run_skip_handler.py,sha256=UENQDZ1Hacd8Hg3jC6okomG9V4EMLfL-Zz60aU6jzyc,10260
|
|
556
556
|
datahub/ingestion/source/state/sql_common_state.py,sha256=OtJpJfMTBSgyR37dn3w-nnZwlc0nFNb2GoUzIWhnyAc,143
|
|
557
557
|
datahub/ingestion/source/state/stale_entity_removal_handler.py,sha256=Lr2HYGx_b2FQ8A36s7s11tl-4-mGIM13bfy5JbQ3LtM,14890
|
|
558
|
-
datahub/ingestion/source/state/stateful_ingestion_base.py,sha256=
|
|
558
|
+
datahub/ingestion/source/state/stateful_ingestion_base.py,sha256=j78BN_uSBpOJRi19kosZGPsgolKm9i-40MmSzDGeaFs,18837
|
|
559
559
|
datahub/ingestion/source/state/usage_common_state.py,sha256=TJyb0CpwibsduJYI854EFdtrwWnz7JC-IkzKUXVGDx0,983
|
|
560
560
|
datahub/ingestion/source/state/use_case_handler.py,sha256=3g8ddTvGXHe0dCiyTkyFeNmR8a3bhwywtIt8EpK5oQs,1271
|
|
561
561
|
datahub/ingestion/source/state_provider/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -646,12 +646,12 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
|
|
|
646
646
|
datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
|
|
647
647
|
datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
|
|
648
648
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
649
|
-
datahub/metadata/_internal_schema_classes.py,sha256=
|
|
650
|
-
datahub/metadata/schema.avsc,sha256=
|
|
649
|
+
datahub/metadata/_internal_schema_classes.py,sha256=O8MG_weYzOYtGIZsOr7c6EO33NDo9liE8D8kyotoq8Q,1084754
|
|
650
|
+
datahub/metadata/schema.avsc,sha256=YMMQSOELeCC4ZptYnoEfMFHRL-DguCR9KmjZ6NYWMlQ,780967
|
|
651
651
|
datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
|
|
652
652
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
653
653
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
654
|
-
datahub/metadata/_urns/urn_defs.py,sha256=
|
|
654
|
+
datahub/metadata/_urns/urn_defs.py,sha256=GRvs_XhEQ8Xc6abC7B7FwTIMb18R3VeXrjV1_sRi_DE,145429
|
|
655
655
|
datahub/metadata/com/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
|
|
656
656
|
datahub/metadata/com/linkedin/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
|
|
657
657
|
datahub/metadata/com/linkedin/events/__init__.py,sha256=s_dR0plZF-rOxxIbE8ojekJqwiHzl2WYR-Z3kW6kKS0,298
|
|
@@ -688,6 +688,7 @@ datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.
|
|
|
688
688
|
datahub/metadata/com/linkedin/pegasus2avro/events/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
|
|
689
689
|
datahub/metadata/com/linkedin/pegasus2avro/events/metadata/__init__.py,sha256=a1FI_2VZ9Ejc9AIVztO-B5kLPR6VwlOgdFlv4PTCTYs,282
|
|
690
690
|
datahub/metadata/com/linkedin/pegasus2avro/execution/__init__.py,sha256=O5XAXnGzDnWv8nbqRHxLPPXUbrIu_pn76WUK_hhkHmg,775
|
|
691
|
+
datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py,sha256=wS-GU8YjGghMQcQ42BYv7Rh_-8h-yPiS_QMzJI34yJM,507
|
|
691
692
|
datahub/metadata/com/linkedin/pegasus2avro/form/__init__.py,sha256=rGDmWiKm6qpXiipZ5veCHqBJGSAryAqnSzRPlwcmLnA,845
|
|
692
693
|
datahub/metadata/com/linkedin/pegasus2avro/glossary/__init__.py,sha256=fa1QNv08O3TqXqZ14bkJerGho_t-8DPHFdcWKiXkkUA,501
|
|
693
694
|
datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py,sha256=EGxkzJgQMASL_aUmgjHE3bo8qRTSbAbM_8gUccZblX0,1603
|
|
@@ -695,7 +696,7 @@ datahub/metadata/com/linkedin/pegasus2avro/incident/__init__.py,sha256=LfB7ytT1u
|
|
|
695
696
|
datahub/metadata/com/linkedin/pegasus2avro/ingestion/__init__.py,sha256=1bfG2naq4iS_pwU4J-BVer_gfL0hDbJbnH0gh1MPNgA,871
|
|
696
697
|
datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py,sha256=7SHiR-KzV1CkAimFy94SkcY0Xg0RlsIlLTUTGmGAW_U,290
|
|
697
698
|
datahub/metadata/com/linkedin/pegasus2avro/metadata/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
|
|
698
|
-
datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py,sha256=
|
|
699
|
+
datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py,sha256=U7nFcTKOeiqyZVxD53p2vcKBj6tupuCJH9I3yz6hFDg,5241
|
|
699
700
|
datahub/metadata/com/linkedin/pegasus2avro/metadata/query/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
|
|
700
701
|
datahub/metadata/com/linkedin/pegasus2avro/metadata/query/filter/__init__.py,sha256=DBP_QtxkFmC5q_kuk4dGjb4uOKbB4xKgqTWXGxmNbBQ,532
|
|
701
702
|
datahub/metadata/com/linkedin/pegasus2avro/metadata/snapshot/__init__.py,sha256=OPboF8SV11wGnjvWQB-rxtB0otMdCsE7Tcy7xkOUgz8,2358
|
|
@@ -778,11 +779,13 @@ datahub/metadata/schemas/DataHubAccessTokenKey.avsc,sha256=3EspNIxgb_I4WwV0a2o4N
|
|
|
778
779
|
datahub/metadata/schemas/DataHubActionKey.avsc,sha256=bjiKcoyvUPQKaGUi2ICBMJ_ukwnt7dh0szJS4WBZE0A,448
|
|
779
780
|
datahub/metadata/schemas/DataHubConnectionDetails.avsc,sha256=IvZj6OA7HRvy-ZIIn0UbXdJNnyt_oTn16XIe5ZlcqGk,1661
|
|
780
781
|
datahub/metadata/schemas/DataHubConnectionKey.avsc,sha256=VwbamVFoEdp6epz1lJm_UShBl6ksBxoA7jAYuPI5u3M,522
|
|
782
|
+
datahub/metadata/schemas/DataHubFileInfo.avsc,sha256=Vt1t5L9QyaIkSh3GUkrQs4I81o2MYAJJu194h1M97tw,6314
|
|
783
|
+
datahub/metadata/schemas/DataHubFileKey.avsc,sha256=-qxMFdpRPSHpA88adIFIJ35PZrN5qXOEo8RK-xTzkSQ,422
|
|
781
784
|
datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc,sha256=4wac7sluRIq-0ZjODE5SmuVKuQeW8ajLJNRpqEBRyio,4601
|
|
782
785
|
datahub/metadata/schemas/DataHubIngestionSourceKey.avsc,sha256=TGmm9WEGTaABs7kt5Uc-N-kbc5Sd-2sQwx-JpfAptvw,545
|
|
783
786
|
datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc,sha256=q6ZyMoxInwmrkrXkUgMe-i-WZzAxbjcvJ-EI99SnEp8,599
|
|
784
787
|
datahub/metadata/schemas/DataHubPageModuleKey.avsc,sha256=NyFN8cVO6s6rtgoLGJJGfcPfpGr5PfmZlIhM6ajldfQ,460
|
|
785
|
-
datahub/metadata/schemas/DataHubPageModuleProperties.avsc,sha256=
|
|
788
|
+
datahub/metadata/schemas/DataHubPageModuleProperties.avsc,sha256=hbIEkpjQxVxSZOQmTVIjOGnGTNeaA0r0oYNKPpwuddg,10443
|
|
786
789
|
datahub/metadata/schemas/DataHubPageTemplateKey.avsc,sha256=0sVqwL97Rp8YHPytp2RqUP5hIW048hmT2hPNP5k6arc,472
|
|
787
790
|
datahub/metadata/schemas/DataHubPageTemplateProperties.avsc,sha256=FyNcZIniQy9m6yN9DT4XsPkDrxUsU7tRTqmfdGoEtMU,8565
|
|
788
791
|
datahub/metadata/schemas/DataHubPersonaInfo.avsc,sha256=OUvbTgPQsBtzkDDb9pxHXpQ6A7dkL77ZnCXZ-MLEG14,227
|
|
@@ -1017,7 +1020,7 @@ datahub/sql_parsing/fingerprint_utils.py,sha256=3hGiexaQXnE7eZLxo-t7hlTyVQz7womb
|
|
|
1017
1020
|
datahub/sql_parsing/query_types.py,sha256=FKjDzszZzsrCfYfm7dgD6T_8865qxWl767fdGyHWBh4,2720
|
|
1018
1021
|
datahub/sql_parsing/schema_resolver.py,sha256=ISuingLcQnOJZkNXBkc73uPwYUbbOtERAjgGhJajDiQ,10782
|
|
1019
1022
|
datahub/sql_parsing/split_statements.py,sha256=doCACwQ_Fx6m1djo7t3BnU9ZHki4EV2KJUQkFMGv7lg,10101
|
|
1020
|
-
datahub/sql_parsing/sql_parsing_aggregator.py,sha256=
|
|
1023
|
+
datahub/sql_parsing/sql_parsing_aggregator.py,sha256=chRTe5eZfxsOd2vP_IrO_j3KhFUGLe2sPt0WXQ6yt5M,73320
|
|
1021
1024
|
datahub/sql_parsing/sql_parsing_common.py,sha256=cZ4WvVyHZuXDGjnBvKMX2_fz2EMextB5WQWcK0_saBo,3155
|
|
1022
1025
|
datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
|
|
1023
1026
|
datahub/sql_parsing/sqlglot_lineage.py,sha256=l4LZMiaeTARjJG76Uun_yNtFHdSj3yi8zO1XvAQtxl0,66944
|
|
@@ -1128,8 +1131,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1128
1131
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1129
1132
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1130
1133
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1131
|
-
acryl_datahub-1.3.0.
|
|
1132
|
-
acryl_datahub-1.3.0.
|
|
1133
|
-
acryl_datahub-1.3.0.
|
|
1134
|
-
acryl_datahub-1.3.0.
|
|
1135
|
-
acryl_datahub-1.3.0.
|
|
1134
|
+
acryl_datahub-1.3.0.1rc6.dist-info/METADATA,sha256=JfkdfNc1P5L7jaGrlIUM-DqN9KcYjFuKfxwfg-IvTQI,184688
|
|
1135
|
+
acryl_datahub-1.3.0.1rc6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
1136
|
+
acryl_datahub-1.3.0.1rc6.dist-info/entry_points.txt,sha256=pzsBoTx-D-iTcmpX8oCGCyzlHP2112EygUMzZWz56M8,10105
|
|
1137
|
+
acryl_datahub-1.3.0.1rc6.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1138
|
+
acryl_datahub-1.3.0.1rc6.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
|
@@ -3,11 +3,13 @@ from datetime import datetime, timedelta, timezone
|
|
|
3
3
|
from enum import Enum
|
|
4
4
|
from http import HTTPStatus
|
|
5
5
|
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Union
|
|
6
|
+
from urllib.parse import parse_qs, urlparse
|
|
6
7
|
|
|
7
8
|
import boto3
|
|
8
9
|
import requests
|
|
9
10
|
from boto3.session import Session
|
|
10
11
|
from botocore.config import DEFAULT_TIMEOUT, Config
|
|
12
|
+
from botocore.exceptions import ClientError, NoCredentialsError
|
|
11
13
|
from botocore.utils import fix_s3_host
|
|
12
14
|
from pydantic.fields import Field
|
|
13
15
|
|
|
@@ -465,6 +467,165 @@ class AwsConnectionConfig(ConfigModel):
|
|
|
465
467
|
def get_lakeformation_client(self) -> "LakeFormationClient":
|
|
466
468
|
return self.get_session().client("lakeformation", config=self._aws_config())
|
|
467
469
|
|
|
470
|
+
def get_rds_client(self):
|
|
471
|
+
"""Get an RDS client for generating IAM auth tokens."""
|
|
472
|
+
return self.get_session().client("rds", config=self._aws_config())
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
def generate_rds_iam_token(
|
|
476
|
+
endpoint: str,
|
|
477
|
+
username: str,
|
|
478
|
+
port: int,
|
|
479
|
+
aws_config: AwsConnectionConfig,
|
|
480
|
+
) -> str:
|
|
481
|
+
"""
|
|
482
|
+
Generate an AWS RDS IAM authentication token.
|
|
483
|
+
|
|
484
|
+
boto3's generate_db_auth_token() returns a presigned URL in the format:
|
|
485
|
+
"hostname:port/?Action=connect&DBUser=username&X-Amz-Date=...&X-Amz-Expires=..."
|
|
486
|
+
|
|
487
|
+
This token should be used as-is by pymysql/psycopg2 drivers.
|
|
488
|
+
|
|
489
|
+
Args:
|
|
490
|
+
endpoint: RDS endpoint hostname
|
|
491
|
+
username: Database username for IAM authentication
|
|
492
|
+
port: Database port (5432 for PostgreSQL, 3306 for MySQL)
|
|
493
|
+
aws_config: AwsConnectionConfig for session management and credentials
|
|
494
|
+
|
|
495
|
+
Returns:
|
|
496
|
+
Authentication token (presigned URL format)
|
|
497
|
+
|
|
498
|
+
Raises:
|
|
499
|
+
ValueError: If AWS credentials are not found or token generation fails
|
|
500
|
+
|
|
501
|
+
"""
|
|
502
|
+
try:
|
|
503
|
+
client = aws_config.get_rds_client()
|
|
504
|
+
token = client.generate_db_auth_token(
|
|
505
|
+
DBHostname=endpoint, Port=port, DBUsername=username
|
|
506
|
+
)
|
|
507
|
+
logger.debug(f"Generated RDS IAM token for {username}@{endpoint}:{port}")
|
|
508
|
+
return token
|
|
509
|
+
except NoCredentialsError as e:
|
|
510
|
+
raise ValueError("AWS credentials not found") from e
|
|
511
|
+
except ClientError as e:
|
|
512
|
+
raise ValueError(f"Failed to generate RDS IAM token: {e}") from e
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
class RDSIAMTokenManager:
|
|
516
|
+
"""
|
|
517
|
+
Manages RDS IAM token lifecycle with automatic refresh.
|
|
518
|
+
|
|
519
|
+
RDS IAM tokens include expiration information in the URL parameters.
|
|
520
|
+
This manager parses the token expiry and refreshes before expiration
|
|
521
|
+
to ensure uninterrupted database access.
|
|
522
|
+
"""
|
|
523
|
+
|
|
524
|
+
def __init__(
|
|
525
|
+
self,
|
|
526
|
+
endpoint: str,
|
|
527
|
+
username: str,
|
|
528
|
+
port: int,
|
|
529
|
+
aws_config: AwsConnectionConfig,
|
|
530
|
+
refresh_threshold_minutes: int = 5,
|
|
531
|
+
):
|
|
532
|
+
"""
|
|
533
|
+
Initialize the token manager.
|
|
534
|
+
|
|
535
|
+
Args:
|
|
536
|
+
endpoint: RDS endpoint hostname
|
|
537
|
+
username: Database username for IAM authentication
|
|
538
|
+
port: Database port
|
|
539
|
+
aws_config: AwsConnectionConfig for session management and credentials
|
|
540
|
+
refresh_threshold_minutes: Refresh token when this many minutes remain before expiry
|
|
541
|
+
"""
|
|
542
|
+
self.endpoint = endpoint
|
|
543
|
+
self.username = username
|
|
544
|
+
self.port = port
|
|
545
|
+
self.aws_config = aws_config
|
|
546
|
+
self.refresh_threshold = timedelta(minutes=refresh_threshold_minutes)
|
|
547
|
+
|
|
548
|
+
self._current_token: Optional[str] = None
|
|
549
|
+
self._token_expires_at: Optional[datetime] = None
|
|
550
|
+
|
|
551
|
+
def get_token(self) -> str:
|
|
552
|
+
"""
|
|
553
|
+
Get current token, refreshing if necessary.
|
|
554
|
+
|
|
555
|
+
Returns:
|
|
556
|
+
Valid authentication token
|
|
557
|
+
|
|
558
|
+
Raises:
|
|
559
|
+
RuntimeError: If token generation or refresh fails
|
|
560
|
+
"""
|
|
561
|
+
if self._needs_refresh():
|
|
562
|
+
self._refresh_token()
|
|
563
|
+
|
|
564
|
+
assert self._current_token is not None
|
|
565
|
+
return self._current_token
|
|
566
|
+
|
|
567
|
+
def _needs_refresh(self) -> bool:
|
|
568
|
+
"""Check if token needs to be refreshed."""
|
|
569
|
+
if self._current_token is None or self._token_expires_at is None:
|
|
570
|
+
return True
|
|
571
|
+
|
|
572
|
+
time_until_expiry = self._token_expires_at - datetime.now(timezone.utc)
|
|
573
|
+
return time_until_expiry <= self.refresh_threshold
|
|
574
|
+
|
|
575
|
+
def _parse_token_expiry(self, token: str) -> datetime:
|
|
576
|
+
"""
|
|
577
|
+
Parse token expiry from X-Amz-Date and X-Amz-Expires URL parameters.
|
|
578
|
+
|
|
579
|
+
Args:
|
|
580
|
+
token: RDS IAM authentication token (presigned URL)
|
|
581
|
+
|
|
582
|
+
Returns:
|
|
583
|
+
Expiration datetime in UTC
|
|
584
|
+
|
|
585
|
+
Raises:
|
|
586
|
+
ValueError: If token URL format is invalid or missing required parameters
|
|
587
|
+
"""
|
|
588
|
+
try:
|
|
589
|
+
parsed_url = urlparse(token)
|
|
590
|
+
query_params = parse_qs(parsed_url.query)
|
|
591
|
+
|
|
592
|
+
# Extract X-Amz-Date (ISO 8601 format: YYYYMMDDTHHMMSSZ)
|
|
593
|
+
amz_date_list = query_params.get("X-Amz-Date")
|
|
594
|
+
if not amz_date_list:
|
|
595
|
+
raise ValueError("Missing X-Amz-Date parameter in RDS IAM token")
|
|
596
|
+
amz_date_str = amz_date_list[0]
|
|
597
|
+
|
|
598
|
+
# Extract X-Amz-Expires (duration in seconds)
|
|
599
|
+
amz_expires_list = query_params.get("X-Amz-Expires")
|
|
600
|
+
if not amz_expires_list:
|
|
601
|
+
raise ValueError("Missing X-Amz-Expires parameter in RDS IAM token")
|
|
602
|
+
amz_expires_seconds = int(amz_expires_list[0])
|
|
603
|
+
|
|
604
|
+
# Parse X-Amz-Date to datetime
|
|
605
|
+
token_issued_at = datetime.strptime(amz_date_str, "%Y%m%dT%H%M%SZ").replace(
|
|
606
|
+
tzinfo=timezone.utc
|
|
607
|
+
)
|
|
608
|
+
|
|
609
|
+
# Calculate expiration
|
|
610
|
+
return token_issued_at + timedelta(seconds=amz_expires_seconds)
|
|
611
|
+
|
|
612
|
+
except (ValueError, KeyError, IndexError) as e:
|
|
613
|
+
raise ValueError(
|
|
614
|
+
f"Failed to parse RDS IAM token expiry: {e}. Token format may be invalid."
|
|
615
|
+
) from e
|
|
616
|
+
|
|
617
|
+
def _refresh_token(self) -> None:
|
|
618
|
+
"""Generate and store a new token with parsed expiry."""
|
|
619
|
+
logger.info("Refreshing RDS IAM authentication token")
|
|
620
|
+
self._current_token = generate_rds_iam_token(
|
|
621
|
+
endpoint=self.endpoint,
|
|
622
|
+
username=self.username,
|
|
623
|
+
port=self.port,
|
|
624
|
+
aws_config=self.aws_config,
|
|
625
|
+
)
|
|
626
|
+
self._token_expires_at = self._parse_token_expiry(self._current_token)
|
|
627
|
+
logger.debug(f"Token will expire at {self._token_expires_at}")
|
|
628
|
+
|
|
468
629
|
|
|
469
630
|
class AwsSourceConfig(EnvConfigMixin, AwsConnectionConfig):
|
|
470
631
|
"""
|
|
@@ -49,6 +49,7 @@ from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
|
|
|
49
49
|
from datahub.ingestion.source.state.profiling_state_handler import ProfilingHandler
|
|
50
50
|
from datahub.ingestion.source.state.redundant_run_skip_handler import (
|
|
51
51
|
RedundantLineageRunSkipHandler,
|
|
52
|
+
RedundantQueriesRunSkipHandler,
|
|
52
53
|
RedundantUsageRunSkipHandler,
|
|
53
54
|
)
|
|
54
55
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
@@ -145,7 +146,10 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
|
|
|
145
146
|
redundant_lineage_run_skip_handler: Optional[RedundantLineageRunSkipHandler] = (
|
|
146
147
|
None
|
|
147
148
|
)
|
|
148
|
-
if
|
|
149
|
+
if (
|
|
150
|
+
self.config.enable_stateful_lineage_ingestion
|
|
151
|
+
and not self.config.use_queries_v2
|
|
152
|
+
):
|
|
149
153
|
redundant_lineage_run_skip_handler = RedundantLineageRunSkipHandler(
|
|
150
154
|
source=self,
|
|
151
155
|
config=self.config,
|
|
@@ -296,6 +300,17 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
|
|
|
296
300
|
):
|
|
297
301
|
return
|
|
298
302
|
|
|
303
|
+
redundant_queries_run_skip_handler: Optional[
|
|
304
|
+
RedundantQueriesRunSkipHandler
|
|
305
|
+
] = None
|
|
306
|
+
if self.config.enable_stateful_time_window:
|
|
307
|
+
redundant_queries_run_skip_handler = RedundantQueriesRunSkipHandler(
|
|
308
|
+
source=self,
|
|
309
|
+
config=self.config,
|
|
310
|
+
pipeline_name=self.ctx.pipeline_name,
|
|
311
|
+
run_id=self.ctx.run_id,
|
|
312
|
+
)
|
|
313
|
+
|
|
299
314
|
with (
|
|
300
315
|
self.report.new_stage(f"*: {QUERIES_EXTRACTION}"),
|
|
301
316
|
BigQueryQueriesExtractor(
|
|
@@ -315,6 +330,7 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
|
|
|
315
330
|
structured_report=self.report,
|
|
316
331
|
filters=self.filters,
|
|
317
332
|
identifiers=self.identifiers,
|
|
333
|
+
redundant_run_skip_handler=redundant_queries_run_skip_handler,
|
|
318
334
|
schema_resolver=self.sql_parser_schema_resolver,
|
|
319
335
|
discovered_tables=self.bq_schema_extractor.table_refs,
|
|
320
336
|
) as queries_extractor,
|
|
@@ -25,6 +25,7 @@ from datahub.ingestion.source.sql.sql_config import SQLCommonConfig, SQLFilterCo
|
|
|
25
25
|
from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
26
26
|
StatefulLineageConfigMixin,
|
|
27
27
|
StatefulProfilingConfigMixin,
|
|
28
|
+
StatefulTimeWindowConfigMixin,
|
|
28
29
|
StatefulUsageConfigMixin,
|
|
29
30
|
)
|
|
30
31
|
from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
|
|
@@ -271,6 +272,7 @@ class BigQueryV2Config(
|
|
|
271
272
|
SQLCommonConfig,
|
|
272
273
|
StatefulUsageConfigMixin,
|
|
273
274
|
StatefulLineageConfigMixin,
|
|
275
|
+
StatefulTimeWindowConfigMixin,
|
|
274
276
|
StatefulProfilingConfigMixin,
|
|
275
277
|
ClassificationSourceConfigMixin,
|
|
276
278
|
):
|
|
@@ -527,6 +529,20 @@ class BigQueryV2Config(
|
|
|
527
529
|
|
|
528
530
|
return v
|
|
529
531
|
|
|
532
|
+
@root_validator(pre=False, skip_on_failure=True)
|
|
533
|
+
def validate_queries_v2_stateful_ingestion(cls, values: Dict) -> Dict:
|
|
534
|
+
if values.get("use_queries_v2"):
|
|
535
|
+
if values.get("enable_stateful_lineage_ingestion") or values.get(
|
|
536
|
+
"enable_stateful_usage_ingestion"
|
|
537
|
+
):
|
|
538
|
+
logger.warning(
|
|
539
|
+
"enable_stateful_lineage_ingestion and enable_stateful_usage_ingestion are deprecated "
|
|
540
|
+
"when using use_queries_v2=True. These configs only work with the legacy (non-queries v2) extraction path. "
|
|
541
|
+
"For queries v2, use enable_stateful_time_window instead to enable stateful ingestion "
|
|
542
|
+
"for the unified time window extraction (lineage + usage + operations + queries)."
|
|
543
|
+
)
|
|
544
|
+
return values
|
|
545
|
+
|
|
530
546
|
def get_table_pattern(self, pattern: List[str]) -> str:
|
|
531
547
|
return "|".join(pattern) if pattern else ""
|
|
532
548
|
|
|
@@ -449,10 +449,12 @@ class BigQuerySchemaGenerator:
|
|
|
449
449
|
):
|
|
450
450
|
yield wu
|
|
451
451
|
except Exception as e:
|
|
452
|
-
|
|
453
|
-
|
|
452
|
+
# If configuration indicates we need table data access (for profiling or use_tables_list_query_v2),
|
|
453
|
+
# include bigquery.tables.getData in the error message since that's likely the missing permission
|
|
454
|
+
if self.config.have_table_data_read_permission:
|
|
455
|
+
action_mesage = "Does your service account have bigquery.tables.list, bigquery.routines.get, bigquery.routines.list, bigquery.tables.getData permissions?"
|
|
454
456
|
else:
|
|
455
|
-
action_mesage = "Does your service account have bigquery.tables.list, bigquery.routines.get, bigquery.routines.list
|
|
457
|
+
action_mesage = "Does your service account have bigquery.tables.list, bigquery.routines.get, bigquery.routines.list permissions?"
|
|
456
458
|
|
|
457
459
|
self.report.failure(
|
|
458
460
|
title="Unable to get tables for dataset",
|
|
@@ -36,6 +36,9 @@ from datahub.ingestion.source.bigquery_v2.common import (
|
|
|
36
36
|
BigQueryFilter,
|
|
37
37
|
BigQueryIdentifierBuilder,
|
|
38
38
|
)
|
|
39
|
+
from datahub.ingestion.source.state.redundant_run_skip_handler import (
|
|
40
|
+
RedundantQueriesRunSkipHandler,
|
|
41
|
+
)
|
|
39
42
|
from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
|
|
40
43
|
from datahub.metadata.urns import CorpUserUrn
|
|
41
44
|
from datahub.sql_parsing.schema_resolver import SchemaResolver
|
|
@@ -135,6 +138,7 @@ class BigQueryQueriesExtractor(Closeable):
|
|
|
135
138
|
structured_report: SourceReport,
|
|
136
139
|
filters: BigQueryFilter,
|
|
137
140
|
identifiers: BigQueryIdentifierBuilder,
|
|
141
|
+
redundant_run_skip_handler: Optional[RedundantQueriesRunSkipHandler] = None,
|
|
138
142
|
graph: Optional[DataHubGraph] = None,
|
|
139
143
|
schema_resolver: Optional[SchemaResolver] = None,
|
|
140
144
|
discovered_tables: Optional[Collection[str]] = None,
|
|
@@ -158,6 +162,9 @@ class BigQueryQueriesExtractor(Closeable):
|
|
|
158
162
|
)
|
|
159
163
|
|
|
160
164
|
self.structured_report = structured_report
|
|
165
|
+
self.redundant_run_skip_handler = redundant_run_skip_handler
|
|
166
|
+
|
|
167
|
+
self.start_time, self.end_time = self._get_time_window()
|
|
161
168
|
|
|
162
169
|
self.aggregator = SqlParsingAggregator(
|
|
163
170
|
platform=self.identifiers.platform,
|
|
@@ -172,8 +179,8 @@ class BigQueryQueriesExtractor(Closeable):
|
|
|
172
179
|
generate_query_usage_statistics=self.config.include_query_usage_statistics,
|
|
173
180
|
usage_config=BaseUsageConfig(
|
|
174
181
|
bucket_duration=self.config.window.bucket_duration,
|
|
175
|
-
start_time=self.
|
|
176
|
-
end_time=self.
|
|
182
|
+
start_time=self.start_time,
|
|
183
|
+
end_time=self.end_time,
|
|
177
184
|
user_email_pattern=self.config.user_email_pattern,
|
|
178
185
|
top_n_queries=self.config.top_n_queries,
|
|
179
186
|
),
|
|
@@ -199,6 +206,34 @@ class BigQueryQueriesExtractor(Closeable):
|
|
|
199
206
|
logger.info(f"Using local temp path: {path}")
|
|
200
207
|
return path
|
|
201
208
|
|
|
209
|
+
def _get_time_window(self) -> tuple[datetime, datetime]:
|
|
210
|
+
if self.redundant_run_skip_handler:
|
|
211
|
+
start_time, end_time = (
|
|
212
|
+
self.redundant_run_skip_handler.suggest_run_time_window(
|
|
213
|
+
self.config.window.start_time,
|
|
214
|
+
self.config.window.end_time,
|
|
215
|
+
)
|
|
216
|
+
)
|
|
217
|
+
else:
|
|
218
|
+
start_time = self.config.window.start_time
|
|
219
|
+
end_time = self.config.window.end_time
|
|
220
|
+
|
|
221
|
+
# Usage statistics are aggregated per bucket (typically per day).
|
|
222
|
+
# To ensure accurate aggregated metrics, we need to align the start_time
|
|
223
|
+
# to the beginning of a bucket so that we include complete bucket periods.
|
|
224
|
+
if self.config.include_usage_statistics:
|
|
225
|
+
start_time = get_time_bucket(start_time, self.config.window.bucket_duration)
|
|
226
|
+
|
|
227
|
+
return start_time, end_time
|
|
228
|
+
|
|
229
|
+
def _update_state(self) -> None:
|
|
230
|
+
if self.redundant_run_skip_handler:
|
|
231
|
+
self.redundant_run_skip_handler.update_state(
|
|
232
|
+
self.config.window.start_time,
|
|
233
|
+
self.config.window.end_time,
|
|
234
|
+
self.config.window.bucket_duration,
|
|
235
|
+
)
|
|
236
|
+
|
|
202
237
|
def is_temp_table(self, name: str) -> bool:
|
|
203
238
|
try:
|
|
204
239
|
table = BigqueryTableIdentifier.from_string_name(name)
|
|
@@ -299,6 +334,8 @@ class BigQueryQueriesExtractor(Closeable):
|
|
|
299
334
|
shared_connection.close()
|
|
300
335
|
audit_log_file.unlink(missing_ok=True)
|
|
301
336
|
|
|
337
|
+
self._update_state()
|
|
338
|
+
|
|
302
339
|
def deduplicate_queries(
|
|
303
340
|
self, queries: FileBackedList[ObservedQuery]
|
|
304
341
|
) -> FileBackedDict[Dict[int, ObservedQuery]]:
|
|
@@ -355,8 +392,8 @@ class BigQueryQueriesExtractor(Closeable):
|
|
|
355
392
|
query_log_query = _build_enriched_query_log_query(
|
|
356
393
|
project_id=project.id,
|
|
357
394
|
region=region,
|
|
358
|
-
start_time=self.
|
|
359
|
-
end_time=self.
|
|
395
|
+
start_time=self.start_time,
|
|
396
|
+
end_time=self.end_time,
|
|
360
397
|
)
|
|
361
398
|
|
|
362
399
|
logger.info(f"Fetching query log from BQ Project {project.id} for {region}")
|
|
@@ -15,6 +15,7 @@ from datahub.ingestion.source.sql.sql_generic import BaseColumn, BaseTable
|
|
|
15
15
|
from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField
|
|
16
16
|
from datahub.sql_parsing.sqlglot_lineage import SqlParsingResult
|
|
17
17
|
from datahub.utilities.hive_schema_to_avro import get_schema_fields_for_hive_column
|
|
18
|
+
from datahub.utilities.perf_timer import PerfTimer
|
|
18
19
|
|
|
19
20
|
logger: logging.Logger = logging.getLogger(__name__)
|
|
20
21
|
|
|
@@ -243,9 +244,13 @@ class RedshiftDataDictionary:
|
|
|
243
244
|
conn: redshift_connector.Connection, query: str
|
|
244
245
|
) -> redshift_connector.Cursor:
|
|
245
246
|
cursor: redshift_connector.Cursor = conn.cursor()
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
247
|
+
with PerfTimer() as timer:
|
|
248
|
+
query_hash_id = hash(query)
|
|
249
|
+
logger.info(f"Executing query [{query_hash_id}]\n{query}")
|
|
250
|
+
cursor.execute(query)
|
|
251
|
+
logger.info(
|
|
252
|
+
f"Time taken query [{query_hash_id}: {timer.elapsed_seconds():.3f} seconds"
|
|
253
|
+
)
|
|
249
254
|
return cursor
|
|
250
255
|
|
|
251
256
|
@staticmethod
|
|
@@ -545,8 +550,7 @@ class RedshiftDataDictionary:
|
|
|
545
550
|
conn: redshift_connector.Connection,
|
|
546
551
|
query: str,
|
|
547
552
|
) -> Iterable[LineageRow]:
|
|
548
|
-
cursor =
|
|
549
|
-
cursor.execute(query)
|
|
553
|
+
cursor = RedshiftDataDictionary.get_query_result(conn=conn, query=query)
|
|
550
554
|
field_names = [i[0] for i in cursor.description]
|
|
551
555
|
|
|
552
556
|
rows = cursor.fetchmany()
|
|
@@ -603,9 +607,7 @@ class RedshiftDataDictionary:
|
|
|
603
607
|
conn: redshift_connector.Connection,
|
|
604
608
|
query: str,
|
|
605
609
|
) -> Iterable[TempTableRow]:
|
|
606
|
-
cursor =
|
|
607
|
-
|
|
608
|
-
cursor.execute(query)
|
|
610
|
+
cursor = RedshiftDataDictionary.get_query_result(conn=conn, query=query)
|
|
609
611
|
|
|
610
612
|
field_names = [i[0] for i in cursor.description]
|
|
611
613
|
|
|
@@ -662,8 +664,9 @@ class RedshiftDataDictionary:
|
|
|
662
664
|
def get_outbound_datashares(
|
|
663
665
|
conn: redshift_connector.Connection,
|
|
664
666
|
) -> Iterable[OutboundDatashare]:
|
|
665
|
-
cursor =
|
|
666
|
-
|
|
667
|
+
cursor = RedshiftDataDictionary.get_query_result(
|
|
668
|
+
conn=conn, query=RedshiftCommonQuery.list_outbound_datashares()
|
|
669
|
+
)
|
|
667
670
|
for item in cursor.fetchall():
|
|
668
671
|
yield OutboundDatashare(
|
|
669
672
|
share_name=item[1],
|
|
@@ -678,8 +681,10 @@ class RedshiftDataDictionary:
|
|
|
678
681
|
conn: redshift_connector.Connection,
|
|
679
682
|
database: str,
|
|
680
683
|
) -> Optional[InboundDatashare]:
|
|
681
|
-
cursor =
|
|
682
|
-
|
|
684
|
+
cursor = RedshiftDataDictionary.get_query_result(
|
|
685
|
+
conn=conn,
|
|
686
|
+
query=RedshiftCommonQuery.get_inbound_datashare(database),
|
|
687
|
+
)
|
|
683
688
|
item = cursor.fetchone()
|
|
684
689
|
if item:
|
|
685
690
|
return InboundDatashare(
|