acryl-datahub 0.15.0rc13__py3-none-any.whl → 0.15.0rc14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0rc13.dist-info → acryl_datahub-0.15.0rc14.dist-info}/METADATA +2464 -2464
- {acryl_datahub-0.15.0rc13.dist-info → acryl_datahub-0.15.0rc14.dist-info}/RECORD +22 -22
- datahub/__init__.py +1 -1
- datahub/ingestion/source/aws/aws_common.py +13 -1
- datahub/ingestion/source/aws/sagemaker.py +8 -0
- datahub/ingestion/source/aws/sagemaker_processors/common.py +6 -0
- datahub/ingestion/source/aws/sagemaker_processors/jobs.py +12 -1
- datahub/ingestion/source/aws/sagemaker_processors/lineage.py +11 -4
- datahub/ingestion/source/gc/dataprocess_cleanup.py +20 -11
- datahub/ingestion/source/powerbi/m_query/data_classes.py +2 -13
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +19 -27
- datahub/ingestion/source/powerbi/m_query/resolver.py +8 -10
- datahub/ingestion/source/powerbi/m_query/tree_function.py +3 -3
- datahub/ingestion/source/preset.py +1 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +4 -3
- datahub/ingestion/source/snowflake/snowflake_query.py +2 -2
- datahub/ingestion/source/sql/mssql/source.py +0 -2
- datahub/ingestion/source/sql/sql_common.py +32 -21
- datahub/ingestion/source/superset.py +215 -65
- {acryl_datahub-0.15.0rc13.dist-info → acryl_datahub-0.15.0rc14.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0rc13.dist-info → acryl_datahub-0.15.0rc14.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0rc13.dist-info → acryl_datahub-0.15.0rc14.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datahub/__init__.py,sha256=
|
|
1
|
+
datahub/__init__.py,sha256=TT6lQ2hGZTB3ZKUJKer0P4dfP-zsktBs_ZiZhHSPcV8,575
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
3
|
datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
|
|
4
4
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -200,13 +200,13 @@ datahub/ingestion/source/mongodb.py,sha256=vZue4Nz0xaBoCUsQr3_0OIRkWRxeE_IH_Y_QK
|
|
|
200
200
|
datahub/ingestion/source/nifi.py,sha256=ttsjZ9aRUvINmewvKFIQD8Rwa4jcl35WFG-F-jPGPWQ,56146
|
|
201
201
|
datahub/ingestion/source/openapi.py,sha256=3ea2ORz1cuq4e7L2hSjxG9Cw3__pVoJ5UNYTJS3EnKU,17386
|
|
202
202
|
datahub/ingestion/source/openapi_parser.py,sha256=1_68wHWe_SzWYEyC1YVDw9vxoadKjW1yv8DecvyIhwY,13606
|
|
203
|
-
datahub/ingestion/source/preset.py,sha256=
|
|
203
|
+
datahub/ingestion/source/preset.py,sha256=fByqamRLnXxsfCGdLPzWN_5LJR_s2_G2f_zwSKUc8EA,3981
|
|
204
204
|
datahub/ingestion/source/pulsar.py,sha256=H8XJC7xIX8Kdkd7006PxllAGVO_Pjza8Xx9VUBOvpPc,19827
|
|
205
205
|
datahub/ingestion/source/redash.py,sha256=E-a14X19zppPun7_-S-pZ2lRiw1-68QiT-jL7bDzG10,32057
|
|
206
206
|
datahub/ingestion/source/salesforce.py,sha256=S6LSM6mzl8-zKbrJPoINhM1SCpYfM244Xb74pbEI-J0,31792
|
|
207
207
|
datahub/ingestion/source/source_registry.py,sha256=a2mLjJPLkSI-gYCTb_7U7Jo4D8jGknNQ_yScPIihXFk,1208
|
|
208
208
|
datahub/ingestion/source/sql_queries.py,sha256=Ip7UZub7fgMh7P5jL_zJPY7lSkc9GGTy8GJ8lqZrcsE,9502
|
|
209
|
-
datahub/ingestion/source/superset.py,sha256=
|
|
209
|
+
datahub/ingestion/source/superset.py,sha256=5hUI83QEArHoDy5tb8rx5P6t-1louxX1Ki1XIplIuFo,24777
|
|
210
210
|
datahub/ingestion/source/abs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
211
211
|
datahub/ingestion/source/abs/config.py,sha256=Doecl1mA6JshJTNar7oTVR7wnWl4gMu64MBHp3hIVJc,6737
|
|
212
212
|
datahub/ingestion/source/abs/datalake_profiler_config.py,sha256=qh38q-Zw8TUTZD5RF0_hSoEfR6BilNGXyKPRsq1KQKE,3600
|
|
@@ -214,17 +214,17 @@ datahub/ingestion/source/abs/profiling.py,sha256=yKNCKpr6w7qpCH-baeSkNE9VjkN6eBo
|
|
|
214
214
|
datahub/ingestion/source/abs/report.py,sha256=fzkTdTewYlWrTk4f2Cyl-e8RV4qw9wEVtm0cdKD-Xgw,542
|
|
215
215
|
datahub/ingestion/source/abs/source.py,sha256=eH3SIWnbJH30VGNsdNOExFfjfyR9bLuS6KKzMsi6vz4,24339
|
|
216
216
|
datahub/ingestion/source/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
217
|
-
datahub/ingestion/source/aws/aws_common.py,sha256=
|
|
217
|
+
datahub/ingestion/source/aws/aws_common.py,sha256=BqDe19yqHdwj6_sjIryGZ9_5lsAJ0PZhfPfGqLZrCcE,10649
|
|
218
218
|
datahub/ingestion/source/aws/glue.py,sha256=fX0dtaVVq174ZS0aBJvZFYK8ligfZX5EU3pdS3j1KQs,56215
|
|
219
219
|
datahub/ingestion/source/aws/s3_boto_utils.py,sha256=Wyp9k9tapsCuw9dyH4FCXJr_wmeLaYFoCtKvrV6SEDk,3892
|
|
220
220
|
datahub/ingestion/source/aws/s3_util.py,sha256=OFypcgmVC6jnZM90-gjcPpAMtTV1lbnreCaMhCzNlzs,2149
|
|
221
|
-
datahub/ingestion/source/aws/sagemaker.py,sha256=
|
|
221
|
+
datahub/ingestion/source/aws/sagemaker.py,sha256=Bl2tkBYnrindgx61VHYgNovUF_Kp_fXNcivQn28vC2w,5254
|
|
222
222
|
datahub/ingestion/source/aws/sagemaker_processors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
223
|
-
datahub/ingestion/source/aws/sagemaker_processors/common.py,sha256=
|
|
223
|
+
datahub/ingestion/source/aws/sagemaker_processors/common.py,sha256=NvYfI8LHgDvhEZE7qp6qF1NSZ0_SQKhg3ivtdjsdpFg,2172
|
|
224
224
|
datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py,sha256=bnx6uKwXvzafYhcIl112INTMmotu6xy8FjFNhTO4b6c,10384
|
|
225
225
|
datahub/ingestion/source/aws/sagemaker_processors/job_classes.py,sha256=CfJkzjZU2uvZvw7qvmxfNgeWI1EvgHFY-7bn5Ih71no,9154
|
|
226
|
-
datahub/ingestion/source/aws/sagemaker_processors/jobs.py,sha256=
|
|
227
|
-
datahub/ingestion/source/aws/sagemaker_processors/lineage.py,sha256=
|
|
226
|
+
datahub/ingestion/source/aws/sagemaker_processors/jobs.py,sha256=aHgQ4QMufdWAA62TNBoEPT3YSQKXg39IJ2-6MZXs8sw,32915
|
|
227
|
+
datahub/ingestion/source/aws/sagemaker_processors/lineage.py,sha256=TcT8xmVuQDQdlRKYaCRXbFjMcW5brfSWFBNpoRdPx1o,9789
|
|
228
228
|
datahub/ingestion/source/aws/sagemaker_processors/models.py,sha256=6Ltmy6MAwbexN_JRYu7LXlAKpihXGlW4WXxo7qdwEF8,19845
|
|
229
229
|
datahub/ingestion/source/azure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
230
230
|
datahub/ingestion/source/azure/abs_folder_utils.py,sha256=7skXus-4fSIoKpqCeU-GG0ch1oF2SJSYDZ1JMB_Onso,7605
|
|
@@ -301,7 +301,7 @@ datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=EAak3hJpe75WZSgz6wP
|
|
|
301
301
|
datahub/ingestion/source/fivetran/fivetran_query.py,sha256=vLrTj7e-0NxZ2U4bWTB57pih42WirqPlUvwtIRfStlQ,5275
|
|
302
302
|
datahub/ingestion/source/gc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
303
303
|
datahub/ingestion/source/gc/datahub_gc.py,sha256=f6Erj3KfD0Hx3ydwL5MUVCZgFzS9c6U2Pkr54JLIUOA,12394
|
|
304
|
-
datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=
|
|
304
|
+
datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=ficLiRb9DEx4YFXZqWO8o-6ndVIrNW_yR-Yn2SXfDxc,15836
|
|
305
305
|
datahub/ingestion/source/gc/execution_request_cleanup.py,sha256=cHJmxz4NmA7VjTX2iGEo3wZ_SDrjC_rCQcnRxKgfUVI,8713
|
|
306
306
|
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=_tms5AqNAJRDRzQmyN_VydzXbdME2lkvTwa5u1La5z8,7353
|
|
307
307
|
datahub/ingestion/source/gcs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -355,12 +355,12 @@ datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=AIU89l
|
|
|
355
355
|
datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=PRWzuZMMhKdOVoAaE8csHvUFbZHxYe5meJHgrqlgiuw,19795
|
|
356
356
|
datahub/ingestion/source/powerbi/powerbi.py,sha256=7UsAEqaFlkWONcXJdQ2hotUYYn46ks6Fe71KXEMh7lI,54495
|
|
357
357
|
datahub/ingestion/source/powerbi/m_query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
358
|
-
datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=
|
|
358
|
+
datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=EbaEasEOGZ73jz0cQofH9ez65wSvRBof0R6GQaIVLnM,2009
|
|
359
359
|
datahub/ingestion/source/powerbi/m_query/native_sql_parser.py,sha256=zzKVDGeUM3Yv3-zNah4D6mSnr6jXsstNuLmzczcPQEE,3683
|
|
360
360
|
datahub/ingestion/source/powerbi/m_query/parser.py,sha256=pB1LGdb02Ryf8Pr8JPSgiOmLE6mEAgDbKodtKOOY6LU,5782
|
|
361
|
-
datahub/ingestion/source/powerbi/m_query/pattern_handler.py,sha256=
|
|
362
|
-
datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=
|
|
363
|
-
datahub/ingestion/source/powerbi/m_query/tree_function.py,sha256=
|
|
361
|
+
datahub/ingestion/source/powerbi/m_query/pattern_handler.py,sha256=h1gBh9Gz1zF3m-hA0WMQaBdEtcZUW9vScNKEvqIWCfk,32442
|
|
362
|
+
datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=TMt84_JaCazpP7vcneW0O3cUjtbIuh8Yid78JWfDxsI,16953
|
|
363
|
+
datahub/ingestion/source/powerbi/m_query/tree_function.py,sha256=h77DunhlgOP0fAg8UXDXxxInOi7Pay85_d1Ca4YqyKs,6134
|
|
364
364
|
datahub/ingestion/source/powerbi/m_query/validator.py,sha256=crG-VZy2XPieiDliP9yVMgiFcc8b2xbZyDFEATXqEAQ,1155
|
|
365
365
|
datahub/ingestion/source/powerbi/rest_api_wrapper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
366
366
|
datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py,sha256=xqAsnNUCP44Wd1rE1m_phbKtNCMJTFJfOX4_2varadg,8298
|
|
@@ -423,13 +423,13 @@ datahub/ingestion/source/snowflake/constants.py,sha256=22n-0r04nuy-ImxWFFpmbrt_G
|
|
|
423
423
|
datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
|
|
424
424
|
datahub/ingestion/source/snowflake/oauth_generator.py,sha256=fu2VnREGuJXeTqIV2jx4TwieVnznf83HQkrE0h2DGGM,3423
|
|
425
425
|
datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81xxdeizJn9nJCZ_nMIXgk9N6pEk5o,4803
|
|
426
|
-
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=
|
|
426
|
+
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=BzxM1EYIV2-KdZz3jphCAhitjpMUadiLLEbL6s-Z2J4,18707
|
|
427
427
|
datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=yzv-01FdmfDSCJY5rqKNNodXxzg3SS5DF7oA4WXArOA,17793
|
|
428
428
|
datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
|
|
429
429
|
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=w2CPm5XEU-KMUSIpb58aKOaxTDHfM5NvghutCVRicy4,23247
|
|
430
430
|
datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
|
|
431
431
|
datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=fu-8S9eADIXZcd_kHc6cBeMa-on9RF9qG3yqjJnS3DE,26085
|
|
432
|
-
datahub/ingestion/source/snowflake/snowflake_query.py,sha256=
|
|
432
|
+
datahub/ingestion/source/snowflake/snowflake_query.py,sha256=e6WodpmNto-I8lmexRd7VO0lxJDxM66MCGnG5dzr1Dk,38067
|
|
433
433
|
datahub/ingestion/source/snowflake/snowflake_report.py,sha256=KjNvYufQMVkFP7F5sEFumKorkiFAmFVCQ1jYqXr0ev0,6419
|
|
434
434
|
datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=fatrKpBUY9CnzXhLJcFlHkHGt0QWFhkYH9ZXwWoQCLA,20392
|
|
435
435
|
datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=JjzhhyEN9QBUv-64sHhkq-4Vq1XhDtz9npLMiqlSICo,38893
|
|
@@ -452,7 +452,7 @@ datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_
|
|
|
452
452
|
datahub/ingestion/source/sql/oracle.py,sha256=ibBtjaneCFto-Rw3k2OxsbT3YHgux1aCtPtv5oA8St4,24533
|
|
453
453
|
datahub/ingestion/source/sql/postgres.py,sha256=uC1kYEI8VdxiZ1Y9IxMWzwmg11wtMqYN0e2fkok1rxo,11972
|
|
454
454
|
datahub/ingestion/source/sql/presto.py,sha256=PB-CS5MX2dSRFRHjlxfkLHGXLZXFNCsVAAyRBtY6HMg,3611
|
|
455
|
-
datahub/ingestion/source/sql/sql_common.py,sha256=
|
|
455
|
+
datahub/ingestion/source/sql/sql_common.py,sha256=FMLbRQVSxLWjLNTiBBQVb9SG4F7h9lwsfNzAoKhOyAU,51356
|
|
456
456
|
datahub/ingestion/source/sql/sql_config.py,sha256=M-l_uXau0ODolLZHBzAXhy-Rq5yYxvJ6cLbCIea7Mww,9449
|
|
457
457
|
datahub/ingestion/source/sql/sql_generic.py,sha256=9AERvkK8kdJUeDOzCYJDb93xdv6Z4DGho0NfeHj5Uyg,2740
|
|
458
458
|
datahub/ingestion/source/sql/sql_generic_profiler.py,sha256=6QbhkQH_F13GV1HsavVTq3BE9F7Pr_vfGOjCX2o2c60,11675
|
|
@@ -467,7 +467,7 @@ datahub/ingestion/source/sql/two_tier_sql_source.py,sha256=YDrGBb5WKVls6qv17QU5f
|
|
|
467
467
|
datahub/ingestion/source/sql/vertica.py,sha256=_9OgSgIgqBml0av063rb8nACiT3SAmzpw0ouyF91wv8,33382
|
|
468
468
|
datahub/ingestion/source/sql/mssql/__init__.py,sha256=1agpl8S_uDW40olkhCX_W19dbr5GO9qgjS3R7pLRZSk,87
|
|
469
469
|
datahub/ingestion/source/sql/mssql/job_models.py,sha256=eMyR0Efl5kvi7QNgNXzd5_6PdDKYly_552Y8OGSj9PY,6012
|
|
470
|
-
datahub/ingestion/source/sql/mssql/source.py,sha256
|
|
470
|
+
datahub/ingestion/source/sql/mssql/source.py,sha256=-B0bnFKEReciYzQ4p_2xJJzdn-H8vYz2MQ_h-1B0ibs,30329
|
|
471
471
|
datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py,sha256=RpnvKPalAAaOD_eUg8bZ4VkGTSeLFWuy0mefwc4s3x8,2837
|
|
472
472
|
datahub/ingestion/source/state/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
473
473
|
datahub/ingestion/source/state/checkpoint.py,sha256=x9Xww-MIFXSKjeg1tOZXE72LehCm5OfKy3HfucgIRWM,8833
|
|
@@ -974,8 +974,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
974
974
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
975
975
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
976
976
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
977
|
-
acryl_datahub-0.15.
|
|
978
|
-
acryl_datahub-0.15.
|
|
979
|
-
acryl_datahub-0.15.
|
|
980
|
-
acryl_datahub-0.15.
|
|
981
|
-
acryl_datahub-0.15.
|
|
977
|
+
acryl_datahub-0.15.0rc14.dist-info/METADATA,sha256=GAon0PKaDuM17zZePUxkbcfczQk1bvUN_FVAcWCPVgI,174408
|
|
978
|
+
acryl_datahub-0.15.0rc14.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
979
|
+
acryl_datahub-0.15.0rc14.dist-info/entry_points.txt,sha256=Yj0PWB0LQOq4Rj2fyR6ETx4BUGw4TOcNL0ZNoAZ9kQg,9504
|
|
980
|
+
acryl_datahub-0.15.0rc14.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
981
|
+
acryl_datahub-0.15.0rc14.dist-info/RECORD,,
|
datahub/__init__.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from datetime import datetime, timedelta, timezone
|
|
2
|
-
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
|
2
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Union
|
|
3
3
|
|
|
4
4
|
import boto3
|
|
5
5
|
from boto3.session import Session
|
|
@@ -107,6 +107,14 @@ class AwsConnectionConfig(ConfigModel):
|
|
|
107
107
|
default=None,
|
|
108
108
|
description="A set of proxy configs to use with AWS. See the [botocore.config](https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html) docs for details.",
|
|
109
109
|
)
|
|
110
|
+
aws_retry_num: int = Field(
|
|
111
|
+
default=5,
|
|
112
|
+
description="Number of times to retry failed AWS requests. See the [botocore.retry](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html) docs for details.",
|
|
113
|
+
)
|
|
114
|
+
aws_retry_mode: Literal["legacy", "standard", "adaptive"] = Field(
|
|
115
|
+
default="standard",
|
|
116
|
+
description="Retry mode to use for failed AWS requests. See the [botocore.retry](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html) docs for details.",
|
|
117
|
+
)
|
|
110
118
|
|
|
111
119
|
read_timeout: float = Field(
|
|
112
120
|
default=DEFAULT_TIMEOUT,
|
|
@@ -199,6 +207,10 @@ class AwsConnectionConfig(ConfigModel):
|
|
|
199
207
|
return Config(
|
|
200
208
|
proxies=self.aws_proxy,
|
|
201
209
|
read_timeout=self.read_timeout,
|
|
210
|
+
retries={
|
|
211
|
+
"max_attempts": self.aws_retry_num,
|
|
212
|
+
"mode": self.aws_retry_mode,
|
|
213
|
+
},
|
|
202
214
|
**self.aws_advanced_config,
|
|
203
215
|
)
|
|
204
216
|
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from collections import defaultdict
|
|
2
3
|
from typing import TYPE_CHECKING, DefaultDict, Dict, Iterable, List, Optional
|
|
3
4
|
|
|
@@ -36,6 +37,8 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
|
36
37
|
if TYPE_CHECKING:
|
|
37
38
|
from mypy_boto3_sagemaker import SageMakerClient
|
|
38
39
|
|
|
40
|
+
logger = logging.getLogger(__name__)
|
|
41
|
+
|
|
39
42
|
|
|
40
43
|
@platform_name("SageMaker")
|
|
41
44
|
@config_class(SagemakerSourceConfig)
|
|
@@ -75,6 +78,7 @@ class SagemakerSource(StatefulIngestionSourceBase):
|
|
|
75
78
|
]
|
|
76
79
|
|
|
77
80
|
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
81
|
+
logger.info("Starting SageMaker ingestion...")
|
|
78
82
|
# get common lineage graph
|
|
79
83
|
lineage_processor = LineageProcessor(
|
|
80
84
|
sagemaker_client=self.sagemaker_client, env=self.env, report=self.report
|
|
@@ -83,6 +87,7 @@ class SagemakerSource(StatefulIngestionSourceBase):
|
|
|
83
87
|
|
|
84
88
|
# extract feature groups if specified
|
|
85
89
|
if self.source_config.extract_feature_groups:
|
|
90
|
+
logger.info("Extracting feature groups...")
|
|
86
91
|
feature_group_processor = FeatureGroupProcessor(
|
|
87
92
|
sagemaker_client=self.sagemaker_client, env=self.env, report=self.report
|
|
88
93
|
)
|
|
@@ -95,6 +100,7 @@ class SagemakerSource(StatefulIngestionSourceBase):
|
|
|
95
100
|
|
|
96
101
|
# extract jobs if specified
|
|
97
102
|
if self.source_config.extract_jobs is not False:
|
|
103
|
+
logger.info("Extracting jobs...")
|
|
98
104
|
job_processor = JobProcessor(
|
|
99
105
|
sagemaker_client=self.client_factory.get_client,
|
|
100
106
|
env=self.env,
|
|
@@ -109,6 +115,8 @@ class SagemakerSource(StatefulIngestionSourceBase):
|
|
|
109
115
|
|
|
110
116
|
# extract models if specified
|
|
111
117
|
if self.source_config.extract_models:
|
|
118
|
+
logger.info("Extracting models...")
|
|
119
|
+
|
|
112
120
|
model_processor = ModelProcessor(
|
|
113
121
|
sagemaker_client=self.sagemaker_client,
|
|
114
122
|
env=self.env,
|
|
@@ -40,8 +40,11 @@ class SagemakerSourceReport(StaleEntityRemovalSourceReport):
|
|
|
40
40
|
groups_scanned = 0
|
|
41
41
|
models_scanned = 0
|
|
42
42
|
jobs_scanned = 0
|
|
43
|
+
jobs_processed = 0
|
|
43
44
|
datasets_scanned = 0
|
|
44
45
|
filtered: List[str] = field(default_factory=list)
|
|
46
|
+
model_endpoint_lineage = 0
|
|
47
|
+
model_group_lineage = 0
|
|
45
48
|
|
|
46
49
|
def report_feature_group_scanned(self) -> None:
|
|
47
50
|
self.feature_groups_scanned += 1
|
|
@@ -58,6 +61,9 @@ class SagemakerSourceReport(StaleEntityRemovalSourceReport):
|
|
|
58
61
|
def report_model_scanned(self) -> None:
|
|
59
62
|
self.models_scanned += 1
|
|
60
63
|
|
|
64
|
+
def report_job_processed(self) -> None:
|
|
65
|
+
self.jobs_processed += 1
|
|
66
|
+
|
|
61
67
|
def report_job_scanned(self) -> None:
|
|
62
68
|
self.jobs_scanned += 1
|
|
63
69
|
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from collections import defaultdict
|
|
2
3
|
from dataclasses import dataclass, field
|
|
3
4
|
from enum import Enum
|
|
@@ -49,6 +50,8 @@ from datahub.metadata.schema_classes import (
|
|
|
49
50
|
if TYPE_CHECKING:
|
|
50
51
|
from mypy_boto3_sagemaker import SageMakerClient
|
|
51
52
|
|
|
53
|
+
logger = logging.getLogger(__name__)
|
|
54
|
+
|
|
52
55
|
JobInfo = TypeVar(
|
|
53
56
|
"JobInfo",
|
|
54
57
|
AutoMlJobInfo,
|
|
@@ -274,15 +277,18 @@ class JobProcessor:
|
|
|
274
277
|
)
|
|
275
278
|
|
|
276
279
|
def get_workunits(self) -> Iterable[MetadataWorkUnit]:
|
|
280
|
+
logger.info("Getting all SageMaker jobs")
|
|
277
281
|
jobs = self.get_all_jobs()
|
|
278
282
|
|
|
279
283
|
processed_jobs: Dict[str, SageMakerJob] = {}
|
|
280
284
|
|
|
285
|
+
logger.info("Processing SageMaker jobs")
|
|
281
286
|
# first pass: process jobs and collect datasets used
|
|
287
|
+
logger.info("first pass: process jobs and collect datasets used")
|
|
282
288
|
for job in jobs:
|
|
283
289
|
job_type = job_type_to_info[job["type"]]
|
|
284
290
|
job_name = job[job_type.list_name_key]
|
|
285
|
-
|
|
291
|
+
logger.debug(f"Processing job {job_name} with type {job_type}")
|
|
286
292
|
job_details = self.get_job_details(job_name, job["type"])
|
|
287
293
|
|
|
288
294
|
processed_job = getattr(self, job_type.processor)(job_details)
|
|
@@ -293,6 +299,9 @@ class JobProcessor:
|
|
|
293
299
|
# second pass:
|
|
294
300
|
# - move output jobs to inputs
|
|
295
301
|
# - aggregate i/o datasets
|
|
302
|
+
logger.info(
|
|
303
|
+
"second pass: move output jobs to inputs and aggregate i/o datasets"
|
|
304
|
+
)
|
|
296
305
|
for job_urn in sorted(processed_jobs):
|
|
297
306
|
processed_job = processed_jobs[job_urn]
|
|
298
307
|
|
|
@@ -301,6 +310,7 @@ class JobProcessor:
|
|
|
301
310
|
|
|
302
311
|
all_datasets.update(processed_job.input_datasets)
|
|
303
312
|
all_datasets.update(processed_job.output_datasets)
|
|
313
|
+
self.report.report_job_processed()
|
|
304
314
|
|
|
305
315
|
# yield datasets
|
|
306
316
|
for dataset_urn, dataset in all_datasets.items():
|
|
@@ -322,6 +332,7 @@ class JobProcessor:
|
|
|
322
332
|
self.report.report_dataset_scanned()
|
|
323
333
|
|
|
324
334
|
# third pass: construct and yield MCEs
|
|
335
|
+
logger.info("third pass: construct and yield MCEs")
|
|
325
336
|
for job_urn in sorted(processed_jobs):
|
|
326
337
|
processed_job = processed_jobs[job_urn]
|
|
327
338
|
job_snapshot = processed_job.job_snapshot
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from collections import defaultdict
|
|
2
3
|
from dataclasses import dataclass, field
|
|
3
4
|
from typing import TYPE_CHECKING, Any, DefaultDict, Dict, List, Set
|
|
@@ -6,6 +7,8 @@ from datahub.ingestion.source.aws.sagemaker_processors.common import (
|
|
|
6
7
|
SagemakerSourceReport,
|
|
7
8
|
)
|
|
8
9
|
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
9
12
|
if TYPE_CHECKING:
|
|
10
13
|
from mypy_boto3_sagemaker import SageMakerClient
|
|
11
14
|
from mypy_boto3_sagemaker.type_defs import (
|
|
@@ -88,7 +91,6 @@ class LineageProcessor:
|
|
|
88
91
|
paginator = self.sagemaker_client.get_paginator("list_contexts")
|
|
89
92
|
for page in paginator.paginate():
|
|
90
93
|
contexts += page["ContextSummaries"]
|
|
91
|
-
|
|
92
94
|
return contexts
|
|
93
95
|
|
|
94
96
|
def get_incoming_edges(self, node_arn: str) -> List["AssociationSummaryTypeDef"]:
|
|
@@ -225,27 +227,32 @@ class LineageProcessor:
|
|
|
225
227
|
"""
|
|
226
228
|
Get the lineage of all artifacts in SageMaker.
|
|
227
229
|
"""
|
|
228
|
-
|
|
230
|
+
logger.info("Getting lineage for SageMaker artifacts...")
|
|
231
|
+
logger.info("Getting all actions")
|
|
229
232
|
for action in self.get_all_actions():
|
|
230
233
|
self.nodes[action["ActionArn"]] = {**action, "node_type": "action"}
|
|
234
|
+
logger.info("Getting all artifacts")
|
|
231
235
|
for artifact in self.get_all_artifacts():
|
|
232
236
|
self.nodes[artifact["ArtifactArn"]] = {**artifact, "node_type": "artifact"}
|
|
237
|
+
logger.info("Getting all contexts")
|
|
233
238
|
for context in self.get_all_contexts():
|
|
234
239
|
self.nodes[context["ContextArn"]] = {**context, "node_type": "context"}
|
|
235
240
|
|
|
241
|
+
logger.info("Getting lineage for model deployments and model groups")
|
|
236
242
|
for node_arn, node in self.nodes.items():
|
|
243
|
+
logger.debug(f"Getting lineage for node {node_arn}")
|
|
237
244
|
# get model-endpoint lineage
|
|
238
245
|
if (
|
|
239
246
|
node["node_type"] == "action"
|
|
240
247
|
and node.get("ActionType") == "ModelDeployment"
|
|
241
248
|
):
|
|
242
249
|
self.get_model_deployment_lineage(node_arn)
|
|
243
|
-
|
|
250
|
+
self.report.model_endpoint_lineage += 1
|
|
244
251
|
# get model-group lineage
|
|
245
252
|
if (
|
|
246
253
|
node["node_type"] == "context"
|
|
247
254
|
and node.get("ContextType") == "ModelGroup"
|
|
248
255
|
):
|
|
249
256
|
self.get_model_group_lineage(node_arn, node)
|
|
250
|
-
|
|
257
|
+
self.report.model_group_lineage += 1
|
|
251
258
|
return self.lineage_info
|
|
@@ -207,6 +207,9 @@ class DataProcessCleanup:
|
|
|
207
207
|
assert self.ctx.graph
|
|
208
208
|
dpis = []
|
|
209
209
|
start = 0
|
|
210
|
+
# This graphql endpoint doesn't support scrolling and therefore after 10k DPIs it causes performance issues on ES
|
|
211
|
+
# Therefore, we are limiting the max DPIs to 9000
|
|
212
|
+
max_item = 9000
|
|
210
213
|
while True:
|
|
211
214
|
try:
|
|
212
215
|
job_query_result = self.ctx.graph.execute_graphql(
|
|
@@ -226,10 +229,12 @@ class DataProcessCleanup:
|
|
|
226
229
|
runs = runs_data.get("runs")
|
|
227
230
|
dpis.extend(runs)
|
|
228
231
|
start += batch_size
|
|
229
|
-
if len(runs) < batch_size:
|
|
232
|
+
if len(runs) < batch_size or start >= max_item:
|
|
230
233
|
break
|
|
231
234
|
except Exception as e:
|
|
232
|
-
|
|
235
|
+
self.report.failure(
|
|
236
|
+
f"Exception while fetching DPIs for job {job_urn}:", exc=e
|
|
237
|
+
)
|
|
233
238
|
break
|
|
234
239
|
return dpis
|
|
235
240
|
|
|
@@ -254,8 +259,9 @@ class DataProcessCleanup:
|
|
|
254
259
|
deleted_count_last_n += 1
|
|
255
260
|
futures[future]["deleted"] = True
|
|
256
261
|
except Exception as e:
|
|
257
|
-
|
|
258
|
-
|
|
262
|
+
self.report.report_failure(
|
|
263
|
+
f"Exception while deleting DPI: {e}", exc=e
|
|
264
|
+
)
|
|
259
265
|
if deleted_count_last_n % self.config.batch_size == 0:
|
|
260
266
|
logger.info(f"Deleted {deleted_count_last_n} DPIs from {job.urn}")
|
|
261
267
|
if self.config.delay:
|
|
@@ -289,7 +295,7 @@ class DataProcessCleanup:
|
|
|
289
295
|
dpis = self.fetch_dpis(job.urn, self.config.batch_size)
|
|
290
296
|
dpis.sort(
|
|
291
297
|
key=lambda x: x["created"]["time"]
|
|
292
|
-
if "created"
|
|
298
|
+
if x.get("created") and x["created"].get("time")
|
|
293
299
|
else 0,
|
|
294
300
|
reverse=True,
|
|
295
301
|
)
|
|
@@ -325,8 +331,8 @@ class DataProcessCleanup:
|
|
|
325
331
|
continue
|
|
326
332
|
|
|
327
333
|
if (
|
|
328
|
-
"created"
|
|
329
|
-
or
|
|
334
|
+
not dpi.get("created")
|
|
335
|
+
or not dpi["created"].get("time")
|
|
330
336
|
or dpi["created"]["time"] < retention_time * 1000
|
|
331
337
|
):
|
|
332
338
|
future = executor.submit(
|
|
@@ -340,7 +346,7 @@ class DataProcessCleanup:
|
|
|
340
346
|
deleted_count_retention += 1
|
|
341
347
|
futures[future]["deleted"] = True
|
|
342
348
|
except Exception as e:
|
|
343
|
-
|
|
349
|
+
self.report.report_failure(f"Exception while deleting DPI: {e}", exc=e)
|
|
344
350
|
|
|
345
351
|
if deleted_count_retention % self.config.batch_size == 0:
|
|
346
352
|
logger.info(
|
|
@@ -351,9 +357,12 @@ class DataProcessCleanup:
|
|
|
351
357
|
logger.info(f"Sleeping for {self.config.delay} seconds")
|
|
352
358
|
time.sleep(self.config.delay)
|
|
353
359
|
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
360
|
+
if deleted_count_retention > 0:
|
|
361
|
+
logger.info(
|
|
362
|
+
f"Deleted {deleted_count_retention} DPIs from {job.urn} due to retention"
|
|
363
|
+
)
|
|
364
|
+
else:
|
|
365
|
+
logger.debug(f"No DPIs to delete from {job.urn} due to retention")
|
|
357
366
|
|
|
358
367
|
def get_data_flows(self) -> Iterable[DataFlowEntity]:
|
|
359
368
|
assert self.ctx.graph
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from abc import ABC
|
|
3
2
|
from dataclasses import dataclass
|
|
4
3
|
from enum import Enum
|
|
5
4
|
from typing import Any, Dict, List, Optional
|
|
@@ -12,18 +11,8 @@ from datahub.sql_parsing.sqlglot_lineage import ColumnLineageInfo
|
|
|
12
11
|
TRACE_POWERBI_MQUERY_PARSER = os.getenv("DATAHUB_TRACE_POWERBI_MQUERY_PARSER", False)
|
|
13
12
|
|
|
14
13
|
|
|
15
|
-
class AbstractIdentifierAccessor(ABC): # To pass lint
|
|
16
|
-
pass
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
# @dataclass
|
|
20
|
-
# class ItemSelector:
|
|
21
|
-
# items: Dict[str, Any]
|
|
22
|
-
# next: Optional[AbstractIdentifierAccessor]
|
|
23
|
-
|
|
24
|
-
|
|
25
14
|
@dataclass
|
|
26
|
-
class IdentifierAccessor
|
|
15
|
+
class IdentifierAccessor:
|
|
27
16
|
"""
|
|
28
17
|
statement
|
|
29
18
|
public_order_date = Source{[Schema="public",Item="order_date"]}[Data]
|
|
@@ -40,7 +29,7 @@ class IdentifierAccessor(AbstractIdentifierAccessor):
|
|
|
40
29
|
|
|
41
30
|
identifier: str
|
|
42
31
|
items: Dict[str, Any]
|
|
43
|
-
next: Optional[
|
|
32
|
+
next: Optional["IdentifierAccessor"]
|
|
44
33
|
|
|
45
34
|
|
|
46
35
|
@dataclass
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from abc import ABC, abstractmethod
|
|
3
3
|
from enum import Enum
|
|
4
|
-
from typing import Dict, List, Optional, Tuple, Type,
|
|
4
|
+
from typing import Dict, List, Optional, Tuple, Type, cast
|
|
5
5
|
|
|
6
6
|
from lark import Tree
|
|
7
7
|
|
|
@@ -22,7 +22,6 @@ from datahub.ingestion.source.powerbi.dataplatform_instance_resolver import (
|
|
|
22
22
|
)
|
|
23
23
|
from datahub.ingestion.source.powerbi.m_query import native_sql_parser, tree_function
|
|
24
24
|
from datahub.ingestion.source.powerbi.m_query.data_classes import (
|
|
25
|
-
AbstractIdentifierAccessor,
|
|
26
25
|
DataAccessFunctionDetail,
|
|
27
26
|
DataPlatformTable,
|
|
28
27
|
FunctionName,
|
|
@@ -412,33 +411,25 @@ class DatabricksLineage(AbstractLineage):
|
|
|
412
411
|
)
|
|
413
412
|
table_detail: Dict[str, str] = {}
|
|
414
413
|
temp_accessor: Optional[
|
|
415
|
-
|
|
414
|
+
IdentifierAccessor
|
|
416
415
|
] = data_access_func_detail.identifier_accessor
|
|
417
416
|
|
|
418
417
|
while temp_accessor:
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
table_detail["Table"] = temp_accessor.items["Item"]
|
|
428
|
-
else:
|
|
429
|
-
table_detail[temp_accessor.items["Kind"]] = temp_accessor.items[
|
|
430
|
-
"Name"
|
|
431
|
-
]
|
|
432
|
-
|
|
433
|
-
if temp_accessor.next is not None:
|
|
434
|
-
temp_accessor = temp_accessor.next
|
|
435
|
-
else:
|
|
436
|
-
break
|
|
418
|
+
# Condition to handle databricks M-query pattern where table, schema and database all are present in
|
|
419
|
+
# the same invoke statement
|
|
420
|
+
if all(
|
|
421
|
+
element in temp_accessor.items
|
|
422
|
+
for element in ["Item", "Schema", "Catalog"]
|
|
423
|
+
):
|
|
424
|
+
table_detail["Schema"] = temp_accessor.items["Schema"]
|
|
425
|
+
table_detail["Table"] = temp_accessor.items["Item"]
|
|
437
426
|
else:
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
427
|
+
table_detail[temp_accessor.items["Kind"]] = temp_accessor.items["Name"]
|
|
428
|
+
|
|
429
|
+
if temp_accessor.next is not None:
|
|
430
|
+
temp_accessor = temp_accessor.next
|
|
431
|
+
else:
|
|
432
|
+
break
|
|
442
433
|
|
|
443
434
|
table_reference = self.create_reference_table(
|
|
444
435
|
arg_list=data_access_func_detail.arg_list,
|
|
@@ -786,9 +777,10 @@ class NativeQueryLineage(AbstractLineage):
|
|
|
786
777
|
def create_lineage(
|
|
787
778
|
self, data_access_func_detail: DataAccessFunctionDetail
|
|
788
779
|
) -> Lineage:
|
|
789
|
-
t1: Tree =
|
|
790
|
-
|
|
780
|
+
t1: Optional[Tree] = tree_function.first_arg_list_func(
|
|
781
|
+
data_access_func_detail.arg_list
|
|
791
782
|
)
|
|
783
|
+
assert t1 is not None
|
|
792
784
|
flat_argument_list: List[Tree] = tree_function.flat_argument_list(t1)
|
|
793
785
|
|
|
794
786
|
if len(flat_argument_list) != 2:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from abc import ABC, abstractmethod
|
|
3
|
-
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
3
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
4
4
|
|
|
5
5
|
from lark import Tree
|
|
6
6
|
|
|
@@ -95,14 +95,12 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
|
|
|
95
95
|
# remove whitespaces and quotes from token
|
|
96
96
|
tokens: List[str] = tree_function.strip_char_from_list(
|
|
97
97
|
tree_function.remove_whitespaces_from_list(
|
|
98
|
-
tree_function.token_values(
|
|
99
|
-
cast(Tree, item_selector), parameters=self.parameters
|
|
100
|
-
)
|
|
98
|
+
tree_function.token_values(item_selector, parameters=self.parameters)
|
|
101
99
|
),
|
|
102
100
|
)
|
|
103
101
|
identifier: List[str] = tree_function.token_values(
|
|
104
|
-
|
|
105
|
-
)
|
|
102
|
+
identifier_tree, parameters={}
|
|
103
|
+
)
|
|
106
104
|
|
|
107
105
|
# convert tokens to dict
|
|
108
106
|
iterator = iter(tokens)
|
|
@@ -238,10 +236,10 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
|
|
|
238
236
|
def _process_item_selector_expression(
|
|
239
237
|
self, rh_tree: Tree
|
|
240
238
|
) -> Tuple[Optional[str], Optional[Dict[str, str]]]:
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
)
|
|
239
|
+
first_expression: Optional[Tree] = tree_function.first_expression_func(rh_tree)
|
|
240
|
+
assert first_expression is not None
|
|
244
241
|
|
|
242
|
+
new_identifier, key_vs_value = self.get_item_selector_tokens(first_expression)
|
|
245
243
|
return new_identifier, key_vs_value
|
|
246
244
|
|
|
247
245
|
@staticmethod
|
|
@@ -327,7 +325,7 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
|
|
|
327
325
|
# The first argument can be a single table argument or list of table.
|
|
328
326
|
# For example Table.Combine({t1,t2},....), here first argument is list of table.
|
|
329
327
|
# Table.AddColumn(t1,....), here first argument is single table.
|
|
330
|
-
for token in
|
|
328
|
+
for token in result:
|
|
331
329
|
internal(token, identifier_accessor)
|
|
332
330
|
|
|
333
331
|
else:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from functools import partial
|
|
3
|
-
from typing import Any, Dict, List, Optional, Union
|
|
3
|
+
from typing import Any, Dict, List, Optional, Union
|
|
4
4
|
|
|
5
5
|
from lark import Token, Tree
|
|
6
6
|
|
|
@@ -58,7 +58,7 @@ def get_first_rule(tree: Tree, rule: str) -> Optional[Tree]:
|
|
|
58
58
|
if isinstance(node, Token):
|
|
59
59
|
return None
|
|
60
60
|
|
|
61
|
-
for child in
|
|
61
|
+
for child in node.children:
|
|
62
62
|
child_node: Optional[Tree] = internal(child)
|
|
63
63
|
if child_node is not None:
|
|
64
64
|
return child_node
|
|
@@ -99,7 +99,7 @@ def token_values(tree: Tree, parameters: Dict[str, str] = {}) -> List[str]:
|
|
|
99
99
|
logger.debug(f"Unable to resolve parameter reference to {ref}")
|
|
100
100
|
values.append(ref)
|
|
101
101
|
elif isinstance(node, Token):
|
|
102
|
-
values.append(
|
|
102
|
+
values.append(node.value)
|
|
103
103
|
return
|
|
104
104
|
else:
|
|
105
105
|
for child in node.children:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from collections import defaultdict
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
-
from typing import Dict, List, Optional, Set
|
|
4
|
+
from typing import Dict, List, Optional, Set
|
|
5
5
|
|
|
6
6
|
import pydantic
|
|
7
7
|
from pydantic import Field, SecretStr, root_validator, validator
|
|
@@ -118,9 +118,10 @@ class SnowflakeFilterConfig(SQLFilterConfig):
|
|
|
118
118
|
)
|
|
119
119
|
|
|
120
120
|
# Always exclude reporting metadata for INFORMATION_SCHEMA schema
|
|
121
|
-
if schema_pattern
|
|
121
|
+
if schema_pattern:
|
|
122
122
|
logger.debug("Adding deny for INFORMATION_SCHEMA to schema_pattern.")
|
|
123
|
-
|
|
123
|
+
assert isinstance(schema_pattern, AllowDenyPattern)
|
|
124
|
+
schema_pattern.deny.append(r".*INFORMATION_SCHEMA$")
|
|
124
125
|
|
|
125
126
|
return values
|
|
126
127
|
|