acryl-datahub 0.14.1.13rc5__py3-none-any.whl → 0.14.1.13rc6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (27) hide show
  1. {acryl_datahub-0.14.1.13rc5.dist-info → acryl_datahub-0.14.1.13rc6.dist-info}/METADATA +2305 -2305
  2. {acryl_datahub-0.14.1.13rc5.dist-info → acryl_datahub-0.14.1.13rc6.dist-info}/RECORD +27 -26
  3. datahub/__init__.py +1 -1
  4. datahub/configuration/kafka_consumer_config.py +4 -1
  5. datahub/ingestion/source/bigquery_v2/bigquery_report.py +2 -2
  6. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +35 -12
  7. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +12 -11
  8. datahub/ingestion/source/dremio/dremio_reporting.py +2 -2
  9. datahub/ingestion/source/ge_data_profiler.py +1 -1
  10. datahub/ingestion/source/ge_profiling_config.py +6 -2
  11. datahub/ingestion/source/redshift/report.py +2 -2
  12. datahub/ingestion/source/snowflake/snowflake_report.py +2 -2
  13. datahub/ingestion/source/sql/oracle.py +50 -0
  14. datahub/ingestion/source/sql/sql_common.py +18 -52
  15. datahub/ingestion/source/sql/sql_generic_profiler.py +3 -32
  16. datahub/ingestion/source/sql/sql_report.py +75 -0
  17. datahub/ingestion/source/sql/teradata.py +2 -2
  18. datahub/ingestion/source/sql/vertica.py +2 -2
  19. datahub/ingestion/source/unity/report.py +2 -2
  20. datahub/metadata/schema.avsc +1 -1
  21. datahub/metadata/schemas/AssertionInfo.avsc +1 -1
  22. datahub/metadata/schemas/InputFields.avsc +1 -1
  23. datahub/metadata/schemas/MetadataChangeEvent.avsc +1 -1
  24. datahub/metadata/schemas/SchemaMetadata.avsc +1 -1
  25. {acryl_datahub-0.14.1.13rc5.dist-info → acryl_datahub-0.14.1.13rc6.dist-info}/WHEEL +0 -0
  26. {acryl_datahub-0.14.1.13rc5.dist-info → acryl_datahub-0.14.1.13rc6.dist-info}/entry_points.txt +0 -0
  27. {acryl_datahub-0.14.1.13rc5.dist-info → acryl_datahub-0.14.1.13rc6.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- datahub/__init__.py,sha256=sacWO6qm2tPLBhc25GFoFnt_AeiT0Qk9ZiibpHJbPhQ,577
1
+ datahub/__init__.py,sha256=JvS6bVIee0O3OSc7IBHb7mEeuHYesjHcY2dgF-FuZMs,577
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
3
  datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
4
4
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -97,7 +97,7 @@ datahub/configuration/git.py,sha256=s55eUHxKqVZgtVsISaDyS-1F4iZBiybbjYsjbp5LU5o,
97
97
  datahub/configuration/import_resolver.py,sha256=b4Ie9L7knN1LALEVMxTcNFSklDD6CVE-4Ipy4ZYhNYA,369
98
98
  datahub/configuration/json_loader.py,sha256=vIDnjwXWi9yHDO8KW64EupOzOb_sspehGCD7xGHzg84,302
99
99
  datahub/configuration/kafka.py,sha256=MlIwpd5FFyOyjdDXW_X9JTLNk7f988sPMgevkcZYVgI,2579
100
- datahub/configuration/kafka_consumer_config.py,sha256=DSwUU4HoqNyK4CNk9eIbX3eYsJMGQvORDiy1ZxkjlRc,1022
100
+ datahub/configuration/kafka_consumer_config.py,sha256=14UWK6kTAnKTgHM43aeWjg67fddACsSLzhO6wgp6cm4,1175
101
101
  datahub/configuration/pattern_utils.py,sha256=Q5IB9RfWOOo5FvRVBU7XkhiwHCxSQ1NTMfUlWtWI9qc,699
102
102
  datahub/configuration/pydantic_migration_helpers.py,sha256=4C_COAVZ5iJ8yxcWNgXZNWsY7ULogICNZ368oNF7zWg,1462
103
103
  datahub/configuration/source_common.py,sha256=68LZOuB23zSEcfgQJE1wZQnyYQHVVnEZK3Sniv_nEQs,2107
@@ -189,8 +189,8 @@ datahub/ingestion/source/demo_data.py,sha256=yzA_R-wfSX2WPz0i5ukYlscpmpb0Pt8D7Ek
189
189
  datahub/ingestion/source/elastic_search.py,sha256=qFUVNzynTVJTabASTjGMu8Qhf9UpNbEtSBFjaPQjBJE,22641
190
190
  datahub/ingestion/source/feast.py,sha256=NYaAjzLVRhmMKDawBwN0OL8AMyKDLsxOwEj3YFX0wIA,14244
191
191
  datahub/ingestion/source/file.py,sha256=pH-Qkjh5FQ2XvyYPE7Z8XEY4vUk_SUHxm8p8IxG12tU,15879
192
- datahub/ingestion/source/ge_data_profiler.py,sha256=7oUvvADX4t1WiXwOruCURh3sNEY_7I41wbwXFvKaKWM,63587
193
- datahub/ingestion/source/ge_profiling_config.py,sha256=WusEMGFPu17y99jTT-1tTOiN87Q1sY7nyxXvXnPs1-E,10489
192
+ datahub/ingestion/source/ge_data_profiler.py,sha256=jORUlsmN2XtHm3QyltENvhEyt-CwbF2O548mFxtisxY,63587
193
+ datahub/ingestion/source/ge_profiling_config.py,sha256=E65adlsUrs17mQB7WQnoe3QCjvbGaGoNNPMf8szNK6s,10648
194
194
  datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suabri1yV5geaAPjzg2eORDc,2559
195
195
  datahub/ingestion/source/ldap.py,sha256=Vnzg8tpwBYeyM-KBVVsUJvGZGBMJiCJ_i_FhxaFRQ9A,18627
196
196
  datahub/ingestion/source/metabase.py,sha256=oemiMdzjfr82Hx6rdwTNBzFM8962LDkosYh7SD_I5cY,31717
@@ -239,9 +239,9 @@ datahub/ingestion/source/bigquery_v2/bigquery_data_reader.py,sha256=DeT3v_Z82__8
239
239
  datahub/ingestion/source/bigquery_v2/bigquery_helper.py,sha256=QER3gY8e_k1_eNVj7cBso7ZzrWl_vO5PYSa6CpvqNx8,1554
240
240
  datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256=8nuQ8hMuJEswWDZtV2RjbK8RvDJUzT_S74dnyPpGFdQ,4857
241
241
  datahub/ingestion/source/bigquery_v2/bigquery_queries.py,sha256=EoHo9twb0_QdX7Nvd1HJC1Yn0rqtrfR52EVk7Hu3XOQ,3296
242
- datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=00GMkriZy7Iz46fXKe9SEIScvCKQEb0kgogY7GVwCSM,8114
243
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=rSG_tDmTg9qXVEkaVGG8X3zw0pKtk9TWDYmGPrD3M4o,31456
244
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=vJt2Z573M0Hmdkb8LaNo5bIoqmkbkI29KFDFT8b70cE,51995
242
+ datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=WxiLPFc7LwZXNDYfV9oySUD43kc2GcOf_pUokp3vFNM,8098
243
+ datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=QQk2xnyLCpywwRA3a2Pm95zJd0LgJUGbe5ht-5yadmQ,32352
244
+ datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=JkQqG8GrMnsp6efUCSv1Efc0ZUmdC6q5_M6wWIyg_dQ,51774
245
245
  datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
246
246
  datahub/ingestion/source/bigquery_v2/common.py,sha256=Cxjf1a8ibkL_YRQeS0BqsjlyMgFJpaZ3iq_d7e8T8MQ,4030
247
247
  datahub/ingestion/source/bigquery_v2/lineage.py,sha256=Jg_pwnaj7l_KEcgq0enJXwrKh5jyUfBl4YB05YpkIVg,45415
@@ -287,7 +287,7 @@ datahub/ingestion/source/dremio/dremio_config.py,sha256=Mu9LjohVwVPdVgM53FhBDquJ
287
287
  datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py,sha256=YkYC3-TB-Jn65z2GN_NMErQDovwU7krQ9b92DBh4uvY,3021
288
288
  datahub/ingestion/source/dremio/dremio_entities.py,sha256=cIupn6fRxYX3FrcsdoDTSmvOTHLuaoyuNEkHV70-ve0,14902
289
289
  datahub/ingestion/source/dremio/dremio_profiling.py,sha256=TAcnpo8ZRKhLDHnQSJzJg3YdwTSyEa73LUAzENs7wG4,12287
290
- datahub/ingestion/source/dremio/dremio_reporting.py,sha256=Lb60cv_4IIJSIJmHmCl2FlO57VFYM152WONMsSFUAF4,1289
290
+ datahub/ingestion/source/dremio/dremio_reporting.py,sha256=IPgv7lOnhK6mQeqwRsPscKnXhzgVZG8Id3yNcsmG7nw,1273
291
291
  datahub/ingestion/source/dremio/dremio_source.py,sha256=vQTYxB-PQdoMsoEY69ewtfraDloKypHF6pXv8jbVlG4,25940
292
292
  datahub/ingestion/source/dremio/dremio_sql_queries.py,sha256=W0rcXawlwJOHNYr5o73rilMijtFOO3cVkn6pY-JLc6o,8186
293
293
  datahub/ingestion/source/dynamodb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -387,7 +387,7 @@ datahub/ingestion/source/redshift/query.py,sha256=bY1D9RoOHaw89LgcXal7GYlJN0RG7P
387
387
  datahub/ingestion/source/redshift/redshift.py,sha256=j3yz9cct77IU2RaiaXDWiTZIGxoZGFUxGFWvNEVaw7E,44234
388
388
  datahub/ingestion/source/redshift/redshift_data_reader.py,sha256=zc69jwXHdF-w8J4Hq-ZQ6BjHQ75Ij2iNDMpoRJlcmlU,1724
389
389
  datahub/ingestion/source/redshift/redshift_schema.py,sha256=9IYeUsnISenq3eVB3k-s7zK8nInWDAYViFnDrNjtkb0,19149
390
- datahub/ingestion/source/redshift/report.py,sha256=RqoCB-K351J1_S55xDstnHsC-7cN7ngrruRBq85dn-A,2974
390
+ datahub/ingestion/source/redshift/report.py,sha256=M19aUHBkd9n-BVBX4fRhyRNdVkN2b9Es6ZqInRx5ZGI,2958
391
391
  datahub/ingestion/source/redshift/usage.py,sha256=1Zqa_1OQYRI1l-5cHfk-VW2ik1A2AuL3_wjJDuFGBzs,17373
392
392
  datahub/ingestion/source/s3/__init__.py,sha256=HjqFPj11WtNFZM3kcVshlDb7kOsc19-l_3LM8PBjlJM,56
393
393
  datahub/ingestion/source/s3/config.py,sha256=Zs1nrBZKLImteZreIcSMMRLj8vBGgxakNDs96xf8g44,7847
@@ -427,7 +427,7 @@ datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=w2CPm5XEU-KMUS
427
427
  datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
428
428
  datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=fu-8S9eADIXZcd_kHc6cBeMa-on9RF9qG3yqjJnS3DE,26085
429
429
  datahub/ingestion/source/snowflake/snowflake_query.py,sha256=oNmtg-ZVcZ3-w1X5t-JGv2qTH64Z0qzEnaZaRxbRquo,38035
430
- datahub/ingestion/source/snowflake/snowflake_report.py,sha256=jQv1wnexD6bOkmM8HXpdg-aP4pRHZjxE6MV1qDqx2b8,6435
430
+ datahub/ingestion/source/snowflake/snowflake_report.py,sha256=KjNvYufQMVkFP7F5sEFumKorkiFAmFVCQ1jYqXr0ev0,6419
431
431
  datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=fatrKpBUY9CnzXhLJcFlHkHGt0QWFhkYH9ZXwWoQCLA,20392
432
432
  datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=4A9DxtEoGtEeTEy2YbKxllLFjsRFWjFsFzWq2TqOASw,38838
433
433
  datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=ud3Ah4qHrmSfpD8Od-gPdzwtON9dJa0eqHt-8Yr5h2Q,6366
@@ -446,21 +446,22 @@ datahub/ingestion/source/sql/hive.py,sha256=AgEo94zyBL-NLZxR5-jQlNwq_R9FQ4AUOe8w
446
446
  datahub/ingestion/source/sql/hive_metastore.py,sha256=PisLrswev583xW0xDJ5yfKWCWm_ZTl1OeuaMcv_SvXc,35865
447
447
  datahub/ingestion/source/sql/mariadb.py,sha256=Hm102kmfs_1rd4lsTYhzVMZq5S3B6cyfvpHSzJjqvMw,737
448
448
  datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_k94o7g-0,3350
449
- datahub/ingestion/source/sql/oracle.py,sha256=exJJvMSC42Oj0IiFtpAPFatWkRu9mVu8sd7ofOdanqU,22460
449
+ datahub/ingestion/source/sql/oracle.py,sha256=ibBtjaneCFto-Rw3k2OxsbT3YHgux1aCtPtv5oA8St4,24533
450
450
  datahub/ingestion/source/sql/postgres.py,sha256=uC1kYEI8VdxiZ1Y9IxMWzwmg11wtMqYN0e2fkok1rxo,11972
451
451
  datahub/ingestion/source/sql/presto.py,sha256=PB-CS5MX2dSRFRHjlxfkLHGXLZXFNCsVAAyRBtY6HMg,3611
452
- datahub/ingestion/source/sql/sql_common.py,sha256=Qe481Pct1vfl-NVpgOufSEaNSMgJGD3bq1DPvjIkPTg,52164
452
+ datahub/ingestion/source/sql/sql_common.py,sha256=Ux50vDfO8er_T9KL0unjLkPqvl49aei5E-nNMUKbPL4,50989
453
453
  datahub/ingestion/source/sql/sql_config.py,sha256=M-l_uXau0ODolLZHBzAXhy-Rq5yYxvJ6cLbCIea7Mww,9449
454
454
  datahub/ingestion/source/sql/sql_generic.py,sha256=9AERvkK8kdJUeDOzCYJDb93xdv6Z4DGho0NfeHj5Uyg,2740
455
- datahub/ingestion/source/sql/sql_generic_profiler.py,sha256=vWna43rv9ClfK9MBNTx8tdoUr35nD5d8ppgeamEEhSQ,12528
455
+ datahub/ingestion/source/sql/sql_generic_profiler.py,sha256=6QbhkQH_F13GV1HsavVTq3BE9F7Pr_vfGOjCX2o2c60,11675
456
+ datahub/ingestion/source/sql/sql_report.py,sha256=19YVvatcCZsBP533HWn0X9Y30jo4TUxSkQ9rYpMQpT4,2487
456
457
  datahub/ingestion/source/sql/sql_types.py,sha256=XcZo5CYo1kHVkvD8lDCFqWQxaLL_CzJC-kV1gvXaXiY,12676
457
458
  datahub/ingestion/source/sql/sql_utils.py,sha256=w9YFNm_qJNjOcWAWBI_lUoFMbd0wT8q0LoT7Ia71tIE,8100
458
459
  datahub/ingestion/source/sql/sqlalchemy_data_reader.py,sha256=FvHZ4JEK3aR2DYOBZiT_ZsAy12RjTu4t_KIR_92B11k,2644
459
460
  datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py,sha256=KOpbmDIE2h1hyYEsbVHJi2B7FlsyUMTXZx4diyzltQg,1826
460
- datahub/ingestion/source/sql/teradata.py,sha256=PE6UeDNe-LjVzphfUImQTwCnASxDOQkM0QV1qB6D84Q,32524
461
+ datahub/ingestion/source/sql/teradata.py,sha256=_MXXFgaVJWKR9dgGNka-CpzKYy919yDdex2qSrqeVdE,32508
461
462
  datahub/ingestion/source/sql/trino.py,sha256=FEn_BQ3pm23hKx94ek5kk5IXGNYcBqZEhllRJFUzfU8,17895
462
463
  datahub/ingestion/source/sql/two_tier_sql_source.py,sha256=YDrGBb5WKVls6qv17QU5foKrf71SydzEltc3WsVAhQc,5732
463
- datahub/ingestion/source/sql/vertica.py,sha256=pkx-1JDBmow7WhoEIEh0SLj7kff9L0zUOHDytoC43gk,33339
464
+ datahub/ingestion/source/sql/vertica.py,sha256=_9OgSgIgqBml0av063rb8nACiT3SAmzpw0ouyF91wv8,33382
464
465
  datahub/ingestion/source/sql/mssql/__init__.py,sha256=1agpl8S_uDW40olkhCX_W19dbr5GO9qgjS3R7pLRZSk,87
465
466
  datahub/ingestion/source/sql/mssql/job_models.py,sha256=eMyR0Efl5kvi7QNgNXzd5_6PdDKYly_552Y8OGSj9PY,6012
466
467
  datahub/ingestion/source/sql/mssql/source.py,sha256=fzpWjwexGvJgpd6Z4DCsK6Ld2vQCfPkD2M1xE4pU9Ec,29542
@@ -493,7 +494,7 @@ datahub/ingestion/source/unity/hive_metastore_proxy.py,sha256=IAWWJjaW0si_UF52Se
493
494
  datahub/ingestion/source/unity/proxy.py,sha256=2-pYQ-3B9UVUwO1yB9iTdi3DqgqZ2JrpQknLodI7UjM,18976
494
495
  datahub/ingestion/source/unity/proxy_profiling.py,sha256=WLqvYP6MziaisA4LYL4T_GA-kPt6Xdde7bfaYsjYw40,9663
495
496
  datahub/ingestion/source/unity/proxy_types.py,sha256=g4qhREewta0vapO7JJPHPli_ZXmtYVmGBNzt_TtfNAQ,9307
496
- datahub/ingestion/source/unity/report.py,sha256=Xi5wKqdJhEDRbq_hcvVEIxELeRj_iRqBw-rDV4owZVI,2804
497
+ datahub/ingestion/source/unity/report.py,sha256=0Y-ciHVTI6ZKNCJ5zWoQh3Ze1c_GMqmTMKFwzXDuuOg,2788
497
498
  datahub/ingestion/source/unity/source.py,sha256=i2WU0H6Gvce51I3qWVOoEp6lZ1FAxEm_9u2qS6zmsL8,41482
498
499
  datahub/ingestion/source/unity/usage.py,sha256=r91-ishhv9QTNLevVhQ9HPZ47CRvVeeAMBtWuRsONxk,11089
499
500
  datahub/ingestion/source/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -555,7 +556,7 @@ datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1
555
556
  datahub/lite/lite_util.py,sha256=pgBpT3vTO1YCQ2njZRNyicSkHYeEmQCt41BaXU8WvMo,4503
556
557
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
557
558
  datahub/metadata/_schema_classes.py,sha256=g7jG02LNEUUFRP76CCJhdi9JIsvbanWivbeWjb9B0p8,950122
558
- datahub/metadata/schema.avsc,sha256=0EO3TupbhhCHiH_SkLQi4u4bVXv7Oka9wLTcvTI3070,675487
559
+ datahub/metadata/schema.avsc,sha256=YttbenOzBUCzHHXR3HztORZ746UckyicBk7IyXvG-yU,675487
559
560
  datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
560
561
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
561
562
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
@@ -633,7 +634,7 @@ datahub/metadata/com/linkedin/pegasus2avro/view/__init__.py,sha256=-Le-jOqUJKv3p
633
634
  datahub/metadata/schemas/Access.avsc,sha256=gdEfWJLkvjIz-jzlceK4Dl5pBDdCHG423Ba_EYGQgUk,1562
634
635
  datahub/metadata/schemas/Actors.avsc,sha256=M76L2_Dlp7VyhVtu9__jhnh8rBNvNobtNJUfvl7bcPE,1188
635
636
  datahub/metadata/schemas/AssertionActions.avsc,sha256=zrvXzX2Nv_dmK6I3ZXCVWQ1bVs1q01Gl9sDRJA8oSDU,1618
636
- datahub/metadata/schemas/AssertionInfo.avsc,sha256=Rg6bgc5zk1PsP23TDaj3dI7HgKQiH8RdrLUoZ3sPQ8E,125083
637
+ datahub/metadata/schemas/AssertionInfo.avsc,sha256=BRkjHzkTCjfqaYpm8EDiLtTAhN50yMkHH5eE3QLpGbY,125083
637
638
  datahub/metadata/schemas/AssertionKey.avsc,sha256=EjNaTyzGxtWzTsZd87P2ZSaGX5dn8Y7HGBdqvlQVrFI,638
638
639
  datahub/metadata/schemas/AssertionRunEvent.avsc,sha256=FUyV73bUliBC-a_XFUlfgh75o99-lu1fl36b2q8Pqx4,12886
639
640
  datahub/metadata/schemas/BrowsePaths.avsc,sha256=NR_4dKuJMk1X2RB4DLkHVSqyMQc4PvT7eR0n6lM5aOM,654
@@ -769,7 +770,7 @@ datahub/metadata/schemas/IncidentInfo.avsc,sha256=JCKSXCjTwwONte-GA4QhstlBn5Ditu
769
770
  datahub/metadata/schemas/IncidentKey.avsc,sha256=Pip__DyNNTal7NxryM3kFi9qHlwntp1rIA8Al8Zz264,542
770
771
  datahub/metadata/schemas/IncidentSource.avsc,sha256=lY_SarA3cM55KNENcB5z1Gu2MygxEl9l7R8LdMak9AQ,1199
771
772
  datahub/metadata/schemas/IncidentsSummary.avsc,sha256=NTYp-6Oe92ALApbM3759TJ5pLXRArsSriIPq-f7w9vI,4514
772
- datahub/metadata/schemas/InputFields.avsc,sha256=YkwSoAOU45W3ELJxypZ2GOvk5FNvKgx8JT7WHu6Pf-0,33671
773
+ datahub/metadata/schemas/InputFields.avsc,sha256=jmmfYSqKTeyQGrIlgPxVMvTQmG4NYuriPMA8K14cM-4,33671
773
774
  datahub/metadata/schemas/InstitutionalMemory.avsc,sha256=vNeAoFSIH1jEDa2rEyTStUjWDvf53W9AvL7F8oh7isY,3785
774
775
  datahub/metadata/schemas/IntendedUse.avsc,sha256=IKZSWdvc0uAyyT-FtdQOGbMC-P7RS9cO0vOVKWT6fbw,1361
775
776
  datahub/metadata/schemas/InviteToken.avsc,sha256=8k_9MxHu9GVf7gvS0SlnQu7tJfpbXsRFdz6lQrFKPNc,737
@@ -789,7 +790,7 @@ datahub/metadata/schemas/MLModelKey.avsc,sha256=deK5u7b9S9-qYUNtlflj2OUxqS_PlUYW
789
790
  datahub/metadata/schemas/MLModelProperties.avsc,sha256=wGCQ-yMLnss-rRl-NigFPMLEesvSeK0t1iOblyG_m0U,8411
790
791
  datahub/metadata/schemas/MLPrimaryKeyKey.avsc,sha256=mX4CQcoN3FC_VQDBCkhlmJk4pfQKDrSeuqqCTTXTmq8,1092
791
792
  datahub/metadata/schemas/MLPrimaryKeyProperties.avsc,sha256=akhjegh2q_4pm4_C2mP0rWpCGVqmJ8Ta6X8lqNtbVbg,4468
792
- datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=tNPN762s7BwlrpoR389kKf8nEqZfAhmLade2pHmeJ7c,367471
793
+ datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=y5J5nOdo_GJZnVkerZlbNJWfZqA_Uw-cW8GCCxI0J0s,367471
793
794
  datahub/metadata/schemas/MetadataChangeLog.avsc,sha256=mpdodpx25E6M1Gq_7slEcPAm-1Es5xPsoqV60HgO7zg,12167
794
795
  datahub/metadata/schemas/MetadataChangeProposal.avsc,sha256=EMfQrYsuHf1p6UvBjoLtfdTHGe-vGNJaCFEHz8hdKU0,9698
795
796
  datahub/metadata/schemas/Metrics.avsc,sha256=O7DJGjOwmHbb1x_Zj7AuM_HaHKjBvkfJKfUsX8icXD4,690
@@ -819,7 +820,7 @@ datahub/metadata/schemas/RoleProperties.avsc,sha256=tDw-WF1uBGIcrk38nOnXs3FCF_Yj
819
820
  datahub/metadata/schemas/SchemaFieldAliases.avsc,sha256=El_cxn0KUhMf2LGfMPzcZ6Xtths2wQOaF9fnM1KQmxQ,560
820
821
  datahub/metadata/schemas/SchemaFieldInfo.avsc,sha256=Gf9EGqrEf10554hd4Eut7T8ZdOR-9OHgDXVRhFo311o,800
821
822
  datahub/metadata/schemas/SchemaFieldKey.avsc,sha256=ClAugan-eR71rp38YJklEglca8EW5MMAxEQLoSX-L6Y,946
822
- datahub/metadata/schemas/SchemaMetadata.avsc,sha256=U2XEwkd-DV7TX8IA3dRebssv-xa9WJlGvlNQHWcBvnc,40849
823
+ datahub/metadata/schemas/SchemaMetadata.avsc,sha256=1rUs2G3tpG02FNxRGMlRaW0FDeTEwS36Rmh2Obz4h40,40849
823
824
  datahub/metadata/schemas/Siblings.avsc,sha256=NTktntlHuA1InH3TgrspWlFBntYlqmp3erUd-JFMsps,842
824
825
  datahub/metadata/schemas/SourceCode.avsc,sha256=tUgo2rczO5x1fxw3fYNWQj-51vRNmNIj38b1wayA0aQ,1370
825
826
  datahub/metadata/schemas/Status.avsc,sha256=rPZSXSJdwnNywqNx2qll8cdt54aYgI-YUbRr3GK7h78,522
@@ -970,8 +971,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
970
971
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
971
972
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
972
973
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
973
- acryl_datahub-0.14.1.13rc5.dist-info/METADATA,sha256=9ZZBowQ_PYKf6d4V6EHqTTlJDY77BqxSX8uKfiIqFT4,171138
974
- acryl_datahub-0.14.1.13rc5.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
975
- acryl_datahub-0.14.1.13rc5.dist-info/entry_points.txt,sha256=VcQx0dnqaYLyeY_L5OaX7bLmmE-Il7TAXkxCKvEn2bA,9432
976
- acryl_datahub-0.14.1.13rc5.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
977
- acryl_datahub-0.14.1.13rc5.dist-info/RECORD,,
974
+ acryl_datahub-0.14.1.13rc6.dist-info/METADATA,sha256=i-L9n8PCRIcSK6UAfHiUJxDRVEom4BHtjGf2ZdGR-Hc,171138
975
+ acryl_datahub-0.14.1.13rc6.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
976
+ acryl_datahub-0.14.1.13rc6.dist-info/entry_points.txt,sha256=VcQx0dnqaYLyeY_L5OaX7bLmmE-Il7TAXkxCKvEn2bA,9432
977
+ acryl_datahub-0.14.1.13rc6.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
978
+ acryl_datahub-0.14.1.13rc6.dist-info/RECORD,,
datahub/__init__.py CHANGED
@@ -3,7 +3,7 @@ import warnings
3
3
 
4
4
  # Published at https://pypi.org/project/acryl-datahub/.
5
5
  __package_name__ = "acryl-datahub"
6
- __version__ = "0.14.1.13rc5"
6
+ __version__ = "0.14.1.13rc6"
7
7
 
8
8
 
9
9
  def is_dev_mode() -> bool:
@@ -30,6 +30,9 @@ class CallableConsumerConfig:
30
30
 
31
31
  call_back = self.get_call_back_attribute()
32
32
 
33
- assert call_back # to silent lint
33
+ assert isinstance(call_back, str), (
34
+ "oauth_cb must be a string representing python function reference "
35
+ "in the format <python-module>:<function-name>."
36
+ )
34
37
  # Set the callback
35
38
  self._config[CallableConsumerConfig.CALLBACK_ATTRIBUTE] = import_path(call_back)
@@ -8,7 +8,7 @@ import pydantic
8
8
 
9
9
  from datahub.ingestion.api.report import Report
10
10
  from datahub.ingestion.glossary.classification_mixin import ClassificationReportMixin
11
- from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport
11
+ from datahub.ingestion.source.sql.sql_report import SQLSourceReport
12
12
  from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
13
13
  from datahub.ingestion.source_report.time_window import BaseTimeWindowReport
14
14
  from datahub.sql_parsing.sql_parsing_aggregator import SqlAggregatorReport
@@ -77,7 +77,7 @@ class BigQueryQueriesExtractorReport(Report):
77
77
 
78
78
  @dataclass
79
79
  class BigQueryV2Report(
80
- ProfilingSqlReport,
80
+ SQLSourceReport,
81
81
  IngestionStageReport,
82
82
  BaseTimeWindowReport,
83
83
  ClassificationReportMixin,
@@ -152,6 +152,21 @@ class BigqueryDataset:
152
152
  snapshots: List[BigqueryTableSnapshot] = field(default_factory=list)
153
153
  columns: List[BigqueryColumn] = field(default_factory=list)
154
154
 
155
+ # Some INFORMATION_SCHEMA views are not available for BigLake tables
156
+ # based on Amazon S3 and Blob Storage data.
157
+ # https://cloud.google.com/bigquery/docs/omni-introduction#limitations
158
+ # Omni Locations - https://cloud.google.com/bigquery/docs/omni-introduction#locations
159
+ def is_biglake_dataset(self) -> bool:
160
+ return self.location is not None and self.location.lower().startswith(
161
+ ("aws-", "azure-")
162
+ )
163
+
164
+ def supports_table_constraints(self) -> bool:
165
+ return not self.is_biglake_dataset()
166
+
167
+ def supports_table_partitions(self) -> bool:
168
+ return not self.is_biglake_dataset()
169
+
155
170
 
156
171
  @dataclass
157
172
  class BigqueryProject:
@@ -541,18 +556,26 @@ class BigQuerySchemaApi:
541
556
  table_name=constraint.table_name,
542
557
  type=constraint.constraint_type,
543
558
  field_path=constraint.column_name,
544
- referenced_project_id=constraint.referenced_catalog
545
- if constraint.constraint_type == "FOREIGN KEY"
546
- else None,
547
- referenced_dataset=constraint.referenced_schema
548
- if constraint.constraint_type == "FOREIGN KEY"
549
- else None,
550
- referenced_table_name=constraint.referenced_table
551
- if constraint.constraint_type == "FOREIGN KEY"
552
- else None,
553
- referenced_column_name=constraint.referenced_column
554
- if constraint.constraint_type == "FOREIGN KEY"
555
- else None,
559
+ referenced_project_id=(
560
+ constraint.referenced_catalog
561
+ if constraint.constraint_type == "FOREIGN KEY"
562
+ else None
563
+ ),
564
+ referenced_dataset=(
565
+ constraint.referenced_schema
566
+ if constraint.constraint_type == "FOREIGN KEY"
567
+ else None
568
+ ),
569
+ referenced_table_name=(
570
+ constraint.referenced_table
571
+ if constraint.constraint_type == "FOREIGN KEY"
572
+ else None
573
+ ),
574
+ referenced_column_name=(
575
+ constraint.referenced_column
576
+ if constraint.constraint_type == "FOREIGN KEY"
577
+ else None
578
+ ),
556
579
  )
557
580
  )
558
581
  self.report.num_get_table_constraints_for_dataset_api_requests += 1
@@ -498,7 +498,10 @@ class BigQuerySchemaGenerator:
498
498
  report=self.report,
499
499
  rate_limiter=rate_limiter,
500
500
  )
501
- if self.config.include_table_constraints:
501
+ if (
502
+ self.config.include_table_constraints
503
+ and bigquery_dataset.supports_table_constraints()
504
+ ):
502
505
  constraints = self.schema_api.get_table_constraints_for_dataset(
503
506
  project_id=project_id, dataset_name=dataset_name, report=self.report
504
507
  )
@@ -1157,9 +1160,11 @@ class BigQuerySchemaGenerator:
1157
1160
  # fields=[],
1158
1161
  fields=self.gen_schema_fields(
1159
1162
  columns,
1160
- table.constraints
1161
- if (isinstance(table, BigqueryTable) and table.constraints)
1162
- else [],
1163
+ (
1164
+ table.constraints
1165
+ if (isinstance(table, BigqueryTable) and table.constraints)
1166
+ else []
1167
+ ),
1163
1168
  ),
1164
1169
  foreignKeys=foreign_keys if foreign_keys else None,
1165
1170
  )
@@ -1180,13 +1185,9 @@ class BigQuerySchemaGenerator:
1180
1185
  ) -> Iterable[BigqueryTable]:
1181
1186
  # In bigquery there is no way to query all tables in a Project id
1182
1187
  with PerfTimer() as timer:
1183
- # PARTITIONS INFORMATION_SCHEMA view is not available for BigLake tables
1184
- # based on Amazon S3 and Blob Storage data.
1185
- # https://cloud.google.com/bigquery/docs/omni-introduction#limitations
1186
- # Omni Locations - https://cloud.google.com/bigquery/docs/omni-introduction#locations
1187
- with_partitions = self.config.have_table_data_read_permission and not (
1188
- dataset.location
1189
- and dataset.location.lower().startswith(("aws-", "azure-"))
1188
+ with_partitions = (
1189
+ self.config.have_table_data_read_permission
1190
+ and dataset.supports_table_partitions()
1190
1191
  )
1191
1192
 
1192
1193
  # Partitions view throw exception if we try to query partition info for too many tables
@@ -1,7 +1,7 @@
1
1
  from dataclasses import dataclass
2
2
  from datetime import datetime
3
3
 
4
- from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport
4
+ from datahub.ingestion.source.sql.sql_report import SQLSourceReport
5
5
  from datahub.ingestion.source.state.stale_entity_removal_handler import (
6
6
  StaleEntityRemovalSourceReport,
7
7
  )
@@ -10,7 +10,7 @@ from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
10
10
 
11
11
  @dataclass
12
12
  class DremioSourceReport(
13
- ProfilingSqlReport, StaleEntityRemovalSourceReport, IngestionStageReport
13
+ SQLSourceReport, StaleEntityRemovalSourceReport, IngestionStageReport
14
14
  ):
15
15
  num_containers_failed: int = 0
16
16
  num_datasets_failed: int = 0
@@ -55,7 +55,7 @@ from datahub.ingestion.source.profiling.common import (
55
55
  Cardinality,
56
56
  convert_to_cardinality,
57
57
  )
58
- from datahub.ingestion.source.sql.sql_common import SQLSourceReport
58
+ from datahub.ingestion.source.sql.sql_report import SQLSourceReport
59
59
  from datahub.metadata.com.linkedin.pegasus2avro.schema import EditableSchemaMetadata
60
60
  from datahub.metadata.schema_classes import (
61
61
  DatasetFieldProfileClass,
@@ -125,12 +125,16 @@ class GEProfilingConfig(GEProfilingBaseConfig):
125
125
 
126
126
  profile_table_size_limit: Optional[int] = Field(
127
127
  default=5,
128
- description="Profile tables only if their size is less then specified GBs. If set to `null`, no limit on the size of tables to profile. Supported only in `snowflake` and `BigQuery`",
128
+ description="Profile tables only if their size is less than specified GBs. If set to `null`, "
129
+ "no limit on the size of tables to profile. Supported only in `snowflake` and `BigQuery`"
130
+ "Supported for `oracle` based on calculated size from gathered stats.",
129
131
  )
130
132
 
131
133
  profile_table_row_limit: Optional[int] = Field(
132
134
  default=5000000,
133
- description="Profile tables only if their row count is less then specified count. If set to `null`, no limit on the row count of tables to profile. Supported only in `snowflake` and `BigQuery`",
135
+ description="Profile tables only if their row count is less than specified count. If set to `null`, "
136
+ "no limit on the row count of tables to profile. Supported only in `snowflake` and `BigQuery`"
137
+ "Supported for `oracle` based on gathered stats.",
134
138
  )
135
139
 
136
140
  profile_table_row_count_estimate_only: bool = Field(
@@ -3,7 +3,7 @@ from datetime import datetime
3
3
  from typing import Dict, Optional
4
4
 
5
5
  from datahub.ingestion.glossary.classification_mixin import ClassificationReportMixin
6
- from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport
6
+ from datahub.ingestion.source.sql.sql_report import SQLSourceReport
7
7
  from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
8
8
  from datahub.ingestion.source_report.time_window import BaseTimeWindowReport
9
9
  from datahub.sql_parsing.sql_parsing_aggregator import SqlAggregatorReport
@@ -14,7 +14,7 @@ from datahub.utilities.stats_collections import TopKDict
14
14
 
15
15
  @dataclass
16
16
  class RedshiftReport(
17
- ProfilingSqlReport,
17
+ SQLSourceReport,
18
18
  IngestionStageReport,
19
19
  BaseTimeWindowReport,
20
20
  ClassificationReportMixin,
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Dict, List, MutableSet, Optional
5
5
  from datahub.ingestion.api.report import Report
6
6
  from datahub.ingestion.glossary.classification_mixin import ClassificationReportMixin
7
7
  from datahub.ingestion.source.snowflake.constants import SnowflakeEdition
8
- from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport
8
+ from datahub.ingestion.source.sql.sql_report import SQLSourceReport
9
9
  from datahub.ingestion.source.state.stateful_ingestion_base import (
10
10
  StatefulIngestionReport,
11
11
  )
@@ -59,7 +59,7 @@ class SnowflakeUsageReport:
59
59
 
60
60
 
61
61
  @dataclass
62
- class SnowflakeReport(ProfilingSqlReport, BaseTimeWindowReport):
62
+ class SnowflakeReport(SQLSourceReport, BaseTimeWindowReport):
63
63
  num_table_to_table_edges_scanned: int = 0
64
64
  num_table_to_view_edges_scanned: int = 0
65
65
  num_view_to_table_edges_scanned: int = 0
@@ -1,3 +1,4 @@
1
+ import datetime
1
2
  import logging
2
3
  import re
3
4
 
@@ -631,3 +632,52 @@ class OracleSource(SQLAlchemySource):
631
632
  clear=False,
632
633
  ):
633
634
  return super().get_workunits()
635
+
636
+ def generate_profile_candidates(
637
+ self,
638
+ inspector: Inspector,
639
+ threshold_time: Optional[datetime.datetime],
640
+ schema: str,
641
+ ) -> Optional[List[str]]:
642
+ tables_table_name = (
643
+ "ALL_TABLES" if self.config.data_dictionary_mode == "ALL" else "DBA_TABLES"
644
+ )
645
+
646
+ # If stats are available , they are used even if they are stale.
647
+ # Assuming that the table would typically grow over time, this will ensure to filter
648
+ # large tables known at stats collection time from profiling candidates.
649
+ # If stats are not available (NULL), such tables are not filtered and are considered
650
+ # as profiling candidates.
651
+ cursor = inspector.bind.execute(
652
+ sql.text(
653
+ f"""SELECT
654
+ t.OWNER,
655
+ t.TABLE_NAME,
656
+ t.NUM_ROWS,
657
+ t.LAST_ANALYZED,
658
+ COALESCE(t.NUM_ROWS * t.AVG_ROW_LEN, 0) / (1024 * 1024 * 1024) AS SIZE_GB
659
+ FROM {tables_table_name} t
660
+ WHERE t.OWNER = :owner
661
+ AND (t.NUM_ROWS < :table_row_limit OR t.NUM_ROWS IS NULL)
662
+ AND COALESCE(t.NUM_ROWS * t.AVG_ROW_LEN, 0) / (1024 * 1024 * 1024) < :table_size_limit
663
+ """
664
+ ),
665
+ dict(
666
+ owner=inspector.dialect.denormalize_name(schema),
667
+ table_row_limit=self.config.profiling.profile_table_row_limit,
668
+ table_size_limit=self.config.profiling.profile_table_size_limit,
669
+ ),
670
+ )
671
+
672
+ TABLE_NAME_COL_LOC = 1
673
+ return [
674
+ self.get_identifier(
675
+ schema=schema,
676
+ entity=inspector.dialect.normalize_name(row[TABLE_NAME_COL_LOC])
677
+ or _raise_err(
678
+ ValueError(f"Invalid table name: {row[TABLE_NAME_COL_LOC]}")
679
+ ),
680
+ inspector=inspector,
681
+ )
682
+ for row in cursor
683
+ ]
@@ -51,7 +51,6 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
51
51
  from datahub.ingestion.glossary.classification_mixin import (
52
52
  SAMPLE_SIZE_MULTIPLIER,
53
53
  ClassificationHandler,
54
- ClassificationReportMixin,
55
54
  )
56
55
  from datahub.ingestion.source.common.data_reader import DataReader
57
56
  from datahub.ingestion.source.common.subtypes import (
@@ -59,6 +58,7 @@ from datahub.ingestion.source.common.subtypes import (
59
58
  DatasetSubTypes,
60
59
  )
61
60
  from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
61
+ from datahub.ingestion.source.sql.sql_report import SQLSourceReport
62
62
  from datahub.ingestion.source.sql.sql_utils import (
63
63
  add_table_to_schema_container,
64
64
  downgrade_schema_from_v2,
@@ -74,7 +74,6 @@ from datahub.ingestion.source.sql.sqlalchemy_data_reader import (
74
74
  )
75
75
  from datahub.ingestion.source.state.stale_entity_removal_handler import (
76
76
  StaleEntityRemovalHandler,
77
- StaleEntityRemovalSourceReport,
78
77
  )
79
78
  from datahub.ingestion.source.state.stateful_ingestion_base import (
80
79
  StatefulIngestionSourceBase,
@@ -118,9 +117,7 @@ from datahub.sql_parsing.sqlglot_lineage import (
118
117
  )
119
118
  from datahub.telemetry import telemetry
120
119
  from datahub.utilities.file_backed_collections import FileBackedDict
121
- from datahub.utilities.lossy_collections import LossyList
122
120
  from datahub.utilities.registries.domain_registry import DomainRegistry
123
- from datahub.utilities.sqlalchemy_query_combiner import SQLAlchemyQueryCombinerReport
124
121
  from datahub.utilities.sqlalchemy_type_converter import (
125
122
  get_native_data_type_for_sqlalchemy_type,
126
123
  )
@@ -134,43 +131,6 @@ if TYPE_CHECKING:
134
131
  logger: logging.Logger = logging.getLogger(__name__)
135
132
 
136
133
 
137
- @dataclass
138
- class SQLSourceReport(StaleEntityRemovalSourceReport, ClassificationReportMixin):
139
- tables_scanned: int = 0
140
- views_scanned: int = 0
141
- entities_profiled: int = 0
142
- filtered: LossyList[str] = field(default_factory=LossyList)
143
-
144
- query_combiner: Optional[SQLAlchemyQueryCombinerReport] = None
145
-
146
- num_view_definitions_parsed: int = 0
147
- num_view_definitions_failed_parsing: int = 0
148
- num_view_definitions_failed_column_parsing: int = 0
149
- view_definitions_parsing_failures: LossyList[str] = field(default_factory=LossyList)
150
-
151
- def report_entity_scanned(self, name: str, ent_type: str = "table") -> None:
152
- """
153
- Entity could be a view or a table
154
- """
155
- if ent_type == "table":
156
- self.tables_scanned += 1
157
- elif ent_type == "view":
158
- self.views_scanned += 1
159
- else:
160
- raise KeyError(f"Unknown entity {ent_type}.")
161
-
162
- def report_entity_profiled(self, name: str) -> None:
163
- self.entities_profiled += 1
164
-
165
- def report_dropped(self, ent_name: str) -> None:
166
- self.filtered.append(ent_name)
167
-
168
- def report_from_query_combiner(
169
- self, query_combiner_report: SQLAlchemyQueryCombinerReport
170
- ) -> None:
171
- self.query_combiner = query_combiner_report
172
-
173
-
174
134
  class SqlWorkUnit(MetadataWorkUnit):
175
135
  pass
176
136
 
@@ -352,7 +312,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
352
312
 
353
313
  def __init__(self, config: SQLCommonConfig, ctx: PipelineContext, platform: str):
354
314
  super().__init__(config, ctx)
355
- self.config = config
315
+ self.config: SQLCommonConfig = config
356
316
  self.platform = platform
357
317
  self.report: SQLSourceReport = SQLSourceReport()
358
318
  self.profile_metadata_info: ProfileMetadata = ProfileMetadata()
@@ -1282,17 +1242,22 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
1282
1242
  def is_dataset_eligible_for_profiling(
1283
1243
  self,
1284
1244
  dataset_name: str,
1285
- sql_config: SQLCommonConfig,
1245
+ schema: str,
1286
1246
  inspector: Inspector,
1287
1247
  profile_candidates: Optional[List[str]],
1288
1248
  ) -> bool:
1289
- return (
1290
- sql_config.table_pattern.allowed(dataset_name)
1291
- and sql_config.profile_pattern.allowed(dataset_name)
1292
- ) and (
1293
- profile_candidates is None
1294
- or (profile_candidates is not None and dataset_name in profile_candidates)
1295
- )
1249
+ if not (
1250
+ self.config.table_pattern.allowed(dataset_name)
1251
+ and self.config.profile_pattern.allowed(dataset_name)
1252
+ ):
1253
+ self.report.profiling_skipped_table_profile_pattern[schema] += 1
1254
+ return False
1255
+
1256
+ if profile_candidates is not None and dataset_name not in profile_candidates:
1257
+ self.report.profiling_skipped_other[schema] += 1
1258
+ return False
1259
+
1260
+ return True
1296
1261
 
1297
1262
  def loop_profiler_requests(
1298
1263
  self,
@@ -1307,7 +1272,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
1307
1272
  if (
1308
1273
  sql_config.profiling.profile_if_updated_since_days is not None
1309
1274
  or sql_config.profiling.profile_table_size_limit is not None
1310
- or sql_config.profiling.profile_table_row_limit is None
1275
+ or sql_config.profiling.profile_table_row_limit is not None
1311
1276
  ):
1312
1277
  try:
1313
1278
  threshold_time: Optional[datetime.datetime] = None
@@ -1328,8 +1293,9 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
1328
1293
  schema=schema, entity=table, inspector=inspector
1329
1294
  )
1330
1295
  if not self.is_dataset_eligible_for_profiling(
1331
- dataset_name, sql_config, inspector, profile_candidates
1296
+ dataset_name, schema, inspector, profile_candidates
1332
1297
  ):
1298
+ self.report.num_tables_not_eligible_profiling[schema] += 1
1333
1299
  if self.config.profiling.report_dropped_profiles:
1334
1300
  self.report.report_dropped(f"profile of {dataset_name}")
1335
1301
  continue