acryl-datahub 0.14.1.13rc5__py3-none-any.whl → 0.14.1.13rc7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.14.1.13rc5.dist-info → acryl_datahub-0.14.1.13rc7.dist-info}/METADATA +2332 -2332
- {acryl_datahub-0.14.1.13rc5.dist-info → acryl_datahub-0.14.1.13rc7.dist-info}/RECORD +29 -28
- datahub/__init__.py +1 -1
- datahub/configuration/kafka_consumer_config.py +4 -1
- datahub/ingestion/source/bigquery_v2/bigquery.py +4 -0
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +2 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +35 -12
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +12 -11
- datahub/ingestion/source/dremio/dremio_reporting.py +2 -2
- datahub/ingestion/source/ge_data_profiler.py +1 -1
- datahub/ingestion/source/ge_profiling_config.py +6 -2
- datahub/ingestion/source/redshift/report.py +2 -2
- datahub/ingestion/source/snowflake/snowflake_report.py +2 -2
- datahub/ingestion/source/sql/oracle.py +50 -0
- datahub/ingestion/source/sql/sql_common.py +18 -52
- datahub/ingestion/source/sql/sql_generic_profiler.py +3 -32
- datahub/ingestion/source/sql/sql_report.py +75 -0
- datahub/ingestion/source/sql/teradata.py +2 -2
- datahub/ingestion/source/sql/vertica.py +2 -2
- datahub/ingestion/source/tableau/tableau_common.py +5 -2
- datahub/ingestion/source/unity/report.py +2 -2
- datahub/metadata/schema.avsc +1 -1
- datahub/metadata/schemas/AssertionInfo.avsc +1 -1
- datahub/metadata/schemas/InputFields.avsc +1 -1
- datahub/metadata/schemas/MetadataChangeEvent.avsc +1 -1
- datahub/metadata/schemas/SchemaMetadata.avsc +1 -1
- {acryl_datahub-0.14.1.13rc5.dist-info → acryl_datahub-0.14.1.13rc7.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.14.1.13rc5.dist-info → acryl_datahub-0.14.1.13rc7.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.14.1.13rc5.dist-info → acryl_datahub-0.14.1.13rc7.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datahub/__init__.py,sha256=
|
|
1
|
+
datahub/__init__.py,sha256=iJWtbGzvLO2Ab_1AQTeXndrbJHFOcpOmE_RHQzAvOEo,577
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
3
|
datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
|
|
4
4
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -97,7 +97,7 @@ datahub/configuration/git.py,sha256=s55eUHxKqVZgtVsISaDyS-1F4iZBiybbjYsjbp5LU5o,
|
|
|
97
97
|
datahub/configuration/import_resolver.py,sha256=b4Ie9L7knN1LALEVMxTcNFSklDD6CVE-4Ipy4ZYhNYA,369
|
|
98
98
|
datahub/configuration/json_loader.py,sha256=vIDnjwXWi9yHDO8KW64EupOzOb_sspehGCD7xGHzg84,302
|
|
99
99
|
datahub/configuration/kafka.py,sha256=MlIwpd5FFyOyjdDXW_X9JTLNk7f988sPMgevkcZYVgI,2579
|
|
100
|
-
datahub/configuration/kafka_consumer_config.py,sha256=
|
|
100
|
+
datahub/configuration/kafka_consumer_config.py,sha256=14UWK6kTAnKTgHM43aeWjg67fddACsSLzhO6wgp6cm4,1175
|
|
101
101
|
datahub/configuration/pattern_utils.py,sha256=Q5IB9RfWOOo5FvRVBU7XkhiwHCxSQ1NTMfUlWtWI9qc,699
|
|
102
102
|
datahub/configuration/pydantic_migration_helpers.py,sha256=4C_COAVZ5iJ8yxcWNgXZNWsY7ULogICNZ368oNF7zWg,1462
|
|
103
103
|
datahub/configuration/source_common.py,sha256=68LZOuB23zSEcfgQJE1wZQnyYQHVVnEZK3Sniv_nEQs,2107
|
|
@@ -189,8 +189,8 @@ datahub/ingestion/source/demo_data.py,sha256=yzA_R-wfSX2WPz0i5ukYlscpmpb0Pt8D7Ek
|
|
|
189
189
|
datahub/ingestion/source/elastic_search.py,sha256=qFUVNzynTVJTabASTjGMu8Qhf9UpNbEtSBFjaPQjBJE,22641
|
|
190
190
|
datahub/ingestion/source/feast.py,sha256=NYaAjzLVRhmMKDawBwN0OL8AMyKDLsxOwEj3YFX0wIA,14244
|
|
191
191
|
datahub/ingestion/source/file.py,sha256=pH-Qkjh5FQ2XvyYPE7Z8XEY4vUk_SUHxm8p8IxG12tU,15879
|
|
192
|
-
datahub/ingestion/source/ge_data_profiler.py,sha256=
|
|
193
|
-
datahub/ingestion/source/ge_profiling_config.py,sha256=
|
|
192
|
+
datahub/ingestion/source/ge_data_profiler.py,sha256=jORUlsmN2XtHm3QyltENvhEyt-CwbF2O548mFxtisxY,63587
|
|
193
|
+
datahub/ingestion/source/ge_profiling_config.py,sha256=E65adlsUrs17mQB7WQnoe3QCjvbGaGoNNPMf8szNK6s,10648
|
|
194
194
|
datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suabri1yV5geaAPjzg2eORDc,2559
|
|
195
195
|
datahub/ingestion/source/ldap.py,sha256=Vnzg8tpwBYeyM-KBVVsUJvGZGBMJiCJ_i_FhxaFRQ9A,18627
|
|
196
196
|
datahub/ingestion/source/metabase.py,sha256=oemiMdzjfr82Hx6rdwTNBzFM8962LDkosYh7SD_I5cY,31717
|
|
@@ -231,7 +231,7 @@ datahub/ingestion/source/azure/abs_folder_utils.py,sha256=7skXus-4fSIoKpqCeU-GG0
|
|
|
231
231
|
datahub/ingestion/source/azure/abs_utils.py,sha256=KdAlCK-PMrn35kFHxz5vrsjajyx2PD5GRgoBKdoRvcg,2075
|
|
232
232
|
datahub/ingestion/source/azure/azure_common.py,sha256=Zl0pPuE6L3QcM5B1P0LsPthZmD0h7fUUS0kg2okl6IY,4053
|
|
233
233
|
datahub/ingestion/source/bigquery_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
234
|
-
datahub/ingestion/source/bigquery_v2/bigquery.py,sha256
|
|
234
|
+
datahub/ingestion/source/bigquery_v2/bigquery.py,sha256=-12CZWeSIAkI6Kb4AY8NAF3wsC_2lxhPErm5o0oUUes,14116
|
|
235
235
|
datahub/ingestion/source/bigquery_v2/bigquery_audit.py,sha256=oFccDfFNU3vjjfe5QlV0EtSp8Ow4SBd6h2KdGgj7XW8,25115
|
|
236
236
|
datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py,sha256=LuGJ6LgPViLIfDQfylxlQ3CA7fZYM5MDt8M-7sfzm84,5096
|
|
237
237
|
datahub/ingestion/source/bigquery_v2/bigquery_config.py,sha256=xnYWxbhvv-rJRHLGkOWIAn4Ir__hwinEZF1F7TWWirE,26086
|
|
@@ -239,9 +239,9 @@ datahub/ingestion/source/bigquery_v2/bigquery_data_reader.py,sha256=DeT3v_Z82__8
|
|
|
239
239
|
datahub/ingestion/source/bigquery_v2/bigquery_helper.py,sha256=QER3gY8e_k1_eNVj7cBso7ZzrWl_vO5PYSa6CpvqNx8,1554
|
|
240
240
|
datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256=8nuQ8hMuJEswWDZtV2RjbK8RvDJUzT_S74dnyPpGFdQ,4857
|
|
241
241
|
datahub/ingestion/source/bigquery_v2/bigquery_queries.py,sha256=EoHo9twb0_QdX7Nvd1HJC1Yn0rqtrfR52EVk7Hu3XOQ,3296
|
|
242
|
-
datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=
|
|
243
|
-
datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=
|
|
244
|
-
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=
|
|
242
|
+
datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=WxiLPFc7LwZXNDYfV9oySUD43kc2GcOf_pUokp3vFNM,8098
|
|
243
|
+
datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=QQk2xnyLCpywwRA3a2Pm95zJd0LgJUGbe5ht-5yadmQ,32352
|
|
244
|
+
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=JkQqG8GrMnsp6efUCSv1Efc0ZUmdC6q5_M6wWIyg_dQ,51774
|
|
245
245
|
datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
|
|
246
246
|
datahub/ingestion/source/bigquery_v2/common.py,sha256=Cxjf1a8ibkL_YRQeS0BqsjlyMgFJpaZ3iq_d7e8T8MQ,4030
|
|
247
247
|
datahub/ingestion/source/bigquery_v2/lineage.py,sha256=Jg_pwnaj7l_KEcgq0enJXwrKh5jyUfBl4YB05YpkIVg,45415
|
|
@@ -287,7 +287,7 @@ datahub/ingestion/source/dremio/dremio_config.py,sha256=Mu9LjohVwVPdVgM53FhBDquJ
|
|
|
287
287
|
datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py,sha256=YkYC3-TB-Jn65z2GN_NMErQDovwU7krQ9b92DBh4uvY,3021
|
|
288
288
|
datahub/ingestion/source/dremio/dremio_entities.py,sha256=cIupn6fRxYX3FrcsdoDTSmvOTHLuaoyuNEkHV70-ve0,14902
|
|
289
289
|
datahub/ingestion/source/dremio/dremio_profiling.py,sha256=TAcnpo8ZRKhLDHnQSJzJg3YdwTSyEa73LUAzENs7wG4,12287
|
|
290
|
-
datahub/ingestion/source/dremio/dremio_reporting.py,sha256=
|
|
290
|
+
datahub/ingestion/source/dremio/dremio_reporting.py,sha256=IPgv7lOnhK6mQeqwRsPscKnXhzgVZG8Id3yNcsmG7nw,1273
|
|
291
291
|
datahub/ingestion/source/dremio/dremio_source.py,sha256=vQTYxB-PQdoMsoEY69ewtfraDloKypHF6pXv8jbVlG4,25940
|
|
292
292
|
datahub/ingestion/source/dremio/dremio_sql_queries.py,sha256=W0rcXawlwJOHNYr5o73rilMijtFOO3cVkn6pY-JLc6o,8186
|
|
293
293
|
datahub/ingestion/source/dynamodb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -387,7 +387,7 @@ datahub/ingestion/source/redshift/query.py,sha256=bY1D9RoOHaw89LgcXal7GYlJN0RG7P
|
|
|
387
387
|
datahub/ingestion/source/redshift/redshift.py,sha256=j3yz9cct77IU2RaiaXDWiTZIGxoZGFUxGFWvNEVaw7E,44234
|
|
388
388
|
datahub/ingestion/source/redshift/redshift_data_reader.py,sha256=zc69jwXHdF-w8J4Hq-ZQ6BjHQ75Ij2iNDMpoRJlcmlU,1724
|
|
389
389
|
datahub/ingestion/source/redshift/redshift_schema.py,sha256=9IYeUsnISenq3eVB3k-s7zK8nInWDAYViFnDrNjtkb0,19149
|
|
390
|
-
datahub/ingestion/source/redshift/report.py,sha256=
|
|
390
|
+
datahub/ingestion/source/redshift/report.py,sha256=M19aUHBkd9n-BVBX4fRhyRNdVkN2b9Es6ZqInRx5ZGI,2958
|
|
391
391
|
datahub/ingestion/source/redshift/usage.py,sha256=1Zqa_1OQYRI1l-5cHfk-VW2ik1A2AuL3_wjJDuFGBzs,17373
|
|
392
392
|
datahub/ingestion/source/s3/__init__.py,sha256=HjqFPj11WtNFZM3kcVshlDb7kOsc19-l_3LM8PBjlJM,56
|
|
393
393
|
datahub/ingestion/source/s3/config.py,sha256=Zs1nrBZKLImteZreIcSMMRLj8vBGgxakNDs96xf8g44,7847
|
|
@@ -427,7 +427,7 @@ datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=w2CPm5XEU-KMUS
|
|
|
427
427
|
datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
|
|
428
428
|
datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=fu-8S9eADIXZcd_kHc6cBeMa-on9RF9qG3yqjJnS3DE,26085
|
|
429
429
|
datahub/ingestion/source/snowflake/snowflake_query.py,sha256=oNmtg-ZVcZ3-w1X5t-JGv2qTH64Z0qzEnaZaRxbRquo,38035
|
|
430
|
-
datahub/ingestion/source/snowflake/snowflake_report.py,sha256=
|
|
430
|
+
datahub/ingestion/source/snowflake/snowflake_report.py,sha256=KjNvYufQMVkFP7F5sEFumKorkiFAmFVCQ1jYqXr0ev0,6419
|
|
431
431
|
datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=fatrKpBUY9CnzXhLJcFlHkHGt0QWFhkYH9ZXwWoQCLA,20392
|
|
432
432
|
datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=4A9DxtEoGtEeTEy2YbKxllLFjsRFWjFsFzWq2TqOASw,38838
|
|
433
433
|
datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=ud3Ah4qHrmSfpD8Od-gPdzwtON9dJa0eqHt-8Yr5h2Q,6366
|
|
@@ -446,21 +446,22 @@ datahub/ingestion/source/sql/hive.py,sha256=AgEo94zyBL-NLZxR5-jQlNwq_R9FQ4AUOe8w
|
|
|
446
446
|
datahub/ingestion/source/sql/hive_metastore.py,sha256=PisLrswev583xW0xDJ5yfKWCWm_ZTl1OeuaMcv_SvXc,35865
|
|
447
447
|
datahub/ingestion/source/sql/mariadb.py,sha256=Hm102kmfs_1rd4lsTYhzVMZq5S3B6cyfvpHSzJjqvMw,737
|
|
448
448
|
datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_k94o7g-0,3350
|
|
449
|
-
datahub/ingestion/source/sql/oracle.py,sha256=
|
|
449
|
+
datahub/ingestion/source/sql/oracle.py,sha256=ibBtjaneCFto-Rw3k2OxsbT3YHgux1aCtPtv5oA8St4,24533
|
|
450
450
|
datahub/ingestion/source/sql/postgres.py,sha256=uC1kYEI8VdxiZ1Y9IxMWzwmg11wtMqYN0e2fkok1rxo,11972
|
|
451
451
|
datahub/ingestion/source/sql/presto.py,sha256=PB-CS5MX2dSRFRHjlxfkLHGXLZXFNCsVAAyRBtY6HMg,3611
|
|
452
|
-
datahub/ingestion/source/sql/sql_common.py,sha256=
|
|
452
|
+
datahub/ingestion/source/sql/sql_common.py,sha256=Ux50vDfO8er_T9KL0unjLkPqvl49aei5E-nNMUKbPL4,50989
|
|
453
453
|
datahub/ingestion/source/sql/sql_config.py,sha256=M-l_uXau0ODolLZHBzAXhy-Rq5yYxvJ6cLbCIea7Mww,9449
|
|
454
454
|
datahub/ingestion/source/sql/sql_generic.py,sha256=9AERvkK8kdJUeDOzCYJDb93xdv6Z4DGho0NfeHj5Uyg,2740
|
|
455
|
-
datahub/ingestion/source/sql/sql_generic_profiler.py,sha256=
|
|
455
|
+
datahub/ingestion/source/sql/sql_generic_profiler.py,sha256=6QbhkQH_F13GV1HsavVTq3BE9F7Pr_vfGOjCX2o2c60,11675
|
|
456
|
+
datahub/ingestion/source/sql/sql_report.py,sha256=19YVvatcCZsBP533HWn0X9Y30jo4TUxSkQ9rYpMQpT4,2487
|
|
456
457
|
datahub/ingestion/source/sql/sql_types.py,sha256=XcZo5CYo1kHVkvD8lDCFqWQxaLL_CzJC-kV1gvXaXiY,12676
|
|
457
458
|
datahub/ingestion/source/sql/sql_utils.py,sha256=w9YFNm_qJNjOcWAWBI_lUoFMbd0wT8q0LoT7Ia71tIE,8100
|
|
458
459
|
datahub/ingestion/source/sql/sqlalchemy_data_reader.py,sha256=FvHZ4JEK3aR2DYOBZiT_ZsAy12RjTu4t_KIR_92B11k,2644
|
|
459
460
|
datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py,sha256=KOpbmDIE2h1hyYEsbVHJi2B7FlsyUMTXZx4diyzltQg,1826
|
|
460
|
-
datahub/ingestion/source/sql/teradata.py,sha256=
|
|
461
|
+
datahub/ingestion/source/sql/teradata.py,sha256=_MXXFgaVJWKR9dgGNka-CpzKYy919yDdex2qSrqeVdE,32508
|
|
461
462
|
datahub/ingestion/source/sql/trino.py,sha256=FEn_BQ3pm23hKx94ek5kk5IXGNYcBqZEhllRJFUzfU8,17895
|
|
462
463
|
datahub/ingestion/source/sql/two_tier_sql_source.py,sha256=YDrGBb5WKVls6qv17QU5foKrf71SydzEltc3WsVAhQc,5732
|
|
463
|
-
datahub/ingestion/source/sql/vertica.py,sha256=
|
|
464
|
+
datahub/ingestion/source/sql/vertica.py,sha256=_9OgSgIgqBml0av063rb8nACiT3SAmzpw0ouyF91wv8,33382
|
|
464
465
|
datahub/ingestion/source/sql/mssql/__init__.py,sha256=1agpl8S_uDW40olkhCX_W19dbr5GO9qgjS3R7pLRZSk,87
|
|
465
466
|
datahub/ingestion/source/sql/mssql/job_models.py,sha256=eMyR0Efl5kvi7QNgNXzd5_6PdDKYly_552Y8OGSj9PY,6012
|
|
466
467
|
datahub/ingestion/source/sql/mssql/source.py,sha256=fzpWjwexGvJgpd6Z4DCsK6Ld2vQCfPkD2M1xE4pU9Ec,29542
|
|
@@ -482,7 +483,7 @@ datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py
|
|
|
482
483
|
datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
|
|
483
484
|
datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
484
485
|
datahub/ingestion/source/tableau/tableau.py,sha256=LgsVZPMRiJiyyB9-ljj53T8WGUyBSiQwByW_1TSuxC4,130417
|
|
485
|
-
datahub/ingestion/source/tableau/tableau_common.py,sha256=
|
|
486
|
+
datahub/ingestion/source/tableau/tableau_common.py,sha256=Dy_2pvkPucZJsG_LvQZLlxNEkjh-yOXHlZ4jurq9opM,26069
|
|
486
487
|
datahub/ingestion/source/tableau/tableau_constant.py,sha256=nWElhtDo5kj5mWivZFmtVF_4Ugw0-EatBYWyDVzu5hE,2501
|
|
487
488
|
datahub/ingestion/source/unity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
488
489
|
datahub/ingestion/source/unity/analyze_profiler.py,sha256=2pqkFY30CfN4aHgFZZntjeG0hNhBytZJvXC13VfTc1I,4689
|
|
@@ -493,7 +494,7 @@ datahub/ingestion/source/unity/hive_metastore_proxy.py,sha256=IAWWJjaW0si_UF52Se
|
|
|
493
494
|
datahub/ingestion/source/unity/proxy.py,sha256=2-pYQ-3B9UVUwO1yB9iTdi3DqgqZ2JrpQknLodI7UjM,18976
|
|
494
495
|
datahub/ingestion/source/unity/proxy_profiling.py,sha256=WLqvYP6MziaisA4LYL4T_GA-kPt6Xdde7bfaYsjYw40,9663
|
|
495
496
|
datahub/ingestion/source/unity/proxy_types.py,sha256=g4qhREewta0vapO7JJPHPli_ZXmtYVmGBNzt_TtfNAQ,9307
|
|
496
|
-
datahub/ingestion/source/unity/report.py,sha256=
|
|
497
|
+
datahub/ingestion/source/unity/report.py,sha256=0Y-ciHVTI6ZKNCJ5zWoQh3Ze1c_GMqmTMKFwzXDuuOg,2788
|
|
497
498
|
datahub/ingestion/source/unity/source.py,sha256=i2WU0H6Gvce51I3qWVOoEp6lZ1FAxEm_9u2qS6zmsL8,41482
|
|
498
499
|
datahub/ingestion/source/unity/usage.py,sha256=r91-ishhv9QTNLevVhQ9HPZ47CRvVeeAMBtWuRsONxk,11089
|
|
499
500
|
datahub/ingestion/source/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -555,7 +556,7 @@ datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1
|
|
|
555
556
|
datahub/lite/lite_util.py,sha256=pgBpT3vTO1YCQ2njZRNyicSkHYeEmQCt41BaXU8WvMo,4503
|
|
556
557
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
557
558
|
datahub/metadata/_schema_classes.py,sha256=g7jG02LNEUUFRP76CCJhdi9JIsvbanWivbeWjb9B0p8,950122
|
|
558
|
-
datahub/metadata/schema.avsc,sha256=
|
|
559
|
+
datahub/metadata/schema.avsc,sha256=YttbenOzBUCzHHXR3HztORZ746UckyicBk7IyXvG-yU,675487
|
|
559
560
|
datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
|
|
560
561
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
561
562
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
@@ -633,7 +634,7 @@ datahub/metadata/com/linkedin/pegasus2avro/view/__init__.py,sha256=-Le-jOqUJKv3p
|
|
|
633
634
|
datahub/metadata/schemas/Access.avsc,sha256=gdEfWJLkvjIz-jzlceK4Dl5pBDdCHG423Ba_EYGQgUk,1562
|
|
634
635
|
datahub/metadata/schemas/Actors.avsc,sha256=M76L2_Dlp7VyhVtu9__jhnh8rBNvNobtNJUfvl7bcPE,1188
|
|
635
636
|
datahub/metadata/schemas/AssertionActions.avsc,sha256=zrvXzX2Nv_dmK6I3ZXCVWQ1bVs1q01Gl9sDRJA8oSDU,1618
|
|
636
|
-
datahub/metadata/schemas/AssertionInfo.avsc,sha256=
|
|
637
|
+
datahub/metadata/schemas/AssertionInfo.avsc,sha256=BRkjHzkTCjfqaYpm8EDiLtTAhN50yMkHH5eE3QLpGbY,125083
|
|
637
638
|
datahub/metadata/schemas/AssertionKey.avsc,sha256=EjNaTyzGxtWzTsZd87P2ZSaGX5dn8Y7HGBdqvlQVrFI,638
|
|
638
639
|
datahub/metadata/schemas/AssertionRunEvent.avsc,sha256=FUyV73bUliBC-a_XFUlfgh75o99-lu1fl36b2q8Pqx4,12886
|
|
639
640
|
datahub/metadata/schemas/BrowsePaths.avsc,sha256=NR_4dKuJMk1X2RB4DLkHVSqyMQc4PvT7eR0n6lM5aOM,654
|
|
@@ -769,7 +770,7 @@ datahub/metadata/schemas/IncidentInfo.avsc,sha256=JCKSXCjTwwONte-GA4QhstlBn5Ditu
|
|
|
769
770
|
datahub/metadata/schemas/IncidentKey.avsc,sha256=Pip__DyNNTal7NxryM3kFi9qHlwntp1rIA8Al8Zz264,542
|
|
770
771
|
datahub/metadata/schemas/IncidentSource.avsc,sha256=lY_SarA3cM55KNENcB5z1Gu2MygxEl9l7R8LdMak9AQ,1199
|
|
771
772
|
datahub/metadata/schemas/IncidentsSummary.avsc,sha256=NTYp-6Oe92ALApbM3759TJ5pLXRArsSriIPq-f7w9vI,4514
|
|
772
|
-
datahub/metadata/schemas/InputFields.avsc,sha256=
|
|
773
|
+
datahub/metadata/schemas/InputFields.avsc,sha256=jmmfYSqKTeyQGrIlgPxVMvTQmG4NYuriPMA8K14cM-4,33671
|
|
773
774
|
datahub/metadata/schemas/InstitutionalMemory.avsc,sha256=vNeAoFSIH1jEDa2rEyTStUjWDvf53W9AvL7F8oh7isY,3785
|
|
774
775
|
datahub/metadata/schemas/IntendedUse.avsc,sha256=IKZSWdvc0uAyyT-FtdQOGbMC-P7RS9cO0vOVKWT6fbw,1361
|
|
775
776
|
datahub/metadata/schemas/InviteToken.avsc,sha256=8k_9MxHu9GVf7gvS0SlnQu7tJfpbXsRFdz6lQrFKPNc,737
|
|
@@ -789,7 +790,7 @@ datahub/metadata/schemas/MLModelKey.avsc,sha256=deK5u7b9S9-qYUNtlflj2OUxqS_PlUYW
|
|
|
789
790
|
datahub/metadata/schemas/MLModelProperties.avsc,sha256=wGCQ-yMLnss-rRl-NigFPMLEesvSeK0t1iOblyG_m0U,8411
|
|
790
791
|
datahub/metadata/schemas/MLPrimaryKeyKey.avsc,sha256=mX4CQcoN3FC_VQDBCkhlmJk4pfQKDrSeuqqCTTXTmq8,1092
|
|
791
792
|
datahub/metadata/schemas/MLPrimaryKeyProperties.avsc,sha256=akhjegh2q_4pm4_C2mP0rWpCGVqmJ8Ta6X8lqNtbVbg,4468
|
|
792
|
-
datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=
|
|
793
|
+
datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=y5J5nOdo_GJZnVkerZlbNJWfZqA_Uw-cW8GCCxI0J0s,367471
|
|
793
794
|
datahub/metadata/schemas/MetadataChangeLog.avsc,sha256=mpdodpx25E6M1Gq_7slEcPAm-1Es5xPsoqV60HgO7zg,12167
|
|
794
795
|
datahub/metadata/schemas/MetadataChangeProposal.avsc,sha256=EMfQrYsuHf1p6UvBjoLtfdTHGe-vGNJaCFEHz8hdKU0,9698
|
|
795
796
|
datahub/metadata/schemas/Metrics.avsc,sha256=O7DJGjOwmHbb1x_Zj7AuM_HaHKjBvkfJKfUsX8icXD4,690
|
|
@@ -819,7 +820,7 @@ datahub/metadata/schemas/RoleProperties.avsc,sha256=tDw-WF1uBGIcrk38nOnXs3FCF_Yj
|
|
|
819
820
|
datahub/metadata/schemas/SchemaFieldAliases.avsc,sha256=El_cxn0KUhMf2LGfMPzcZ6Xtths2wQOaF9fnM1KQmxQ,560
|
|
820
821
|
datahub/metadata/schemas/SchemaFieldInfo.avsc,sha256=Gf9EGqrEf10554hd4Eut7T8ZdOR-9OHgDXVRhFo311o,800
|
|
821
822
|
datahub/metadata/schemas/SchemaFieldKey.avsc,sha256=ClAugan-eR71rp38YJklEglca8EW5MMAxEQLoSX-L6Y,946
|
|
822
|
-
datahub/metadata/schemas/SchemaMetadata.avsc,sha256=
|
|
823
|
+
datahub/metadata/schemas/SchemaMetadata.avsc,sha256=1rUs2G3tpG02FNxRGMlRaW0FDeTEwS36Rmh2Obz4h40,40849
|
|
823
824
|
datahub/metadata/schemas/Siblings.avsc,sha256=NTktntlHuA1InH3TgrspWlFBntYlqmp3erUd-JFMsps,842
|
|
824
825
|
datahub/metadata/schemas/SourceCode.avsc,sha256=tUgo2rczO5x1fxw3fYNWQj-51vRNmNIj38b1wayA0aQ,1370
|
|
825
826
|
datahub/metadata/schemas/Status.avsc,sha256=rPZSXSJdwnNywqNx2qll8cdt54aYgI-YUbRr3GK7h78,522
|
|
@@ -970,8 +971,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
970
971
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
971
972
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
972
973
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
973
|
-
acryl_datahub-0.14.1.
|
|
974
|
-
acryl_datahub-0.14.1.
|
|
975
|
-
acryl_datahub-0.14.1.
|
|
976
|
-
acryl_datahub-0.14.1.
|
|
977
|
-
acryl_datahub-0.14.1.
|
|
974
|
+
acryl_datahub-0.14.1.13rc7.dist-info/METADATA,sha256=GLfCmMbHHo7KoYo_AKEO6D8Cty0VP2obvuJ5sSSclMo,171138
|
|
975
|
+
acryl_datahub-0.14.1.13rc7.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
976
|
+
acryl_datahub-0.14.1.13rc7.dist-info/entry_points.txt,sha256=VcQx0dnqaYLyeY_L5OaX7bLmmE-Il7TAXkxCKvEn2bA,9432
|
|
977
|
+
acryl_datahub-0.14.1.13rc7.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
978
|
+
acryl_datahub-0.14.1.13rc7.dist-info/RECORD,,
|
datahub/__init__.py
CHANGED
|
@@ -30,6 +30,9 @@ class CallableConsumerConfig:
|
|
|
30
30
|
|
|
31
31
|
call_back = self.get_call_back_attribute()
|
|
32
32
|
|
|
33
|
-
assert call_back
|
|
33
|
+
assert isinstance(call_back, str), (
|
|
34
|
+
"oauth_cb must be a string representing python function reference "
|
|
35
|
+
"in the format <python-module>:<function-name>."
|
|
36
|
+
)
|
|
34
37
|
# Set the callback
|
|
35
38
|
self._config[CallableConsumerConfig.CALLBACK_ATTRIBUTE] = import_path(call_back)
|
|
@@ -95,6 +95,10 @@ def cleanup(config: BigQueryV2Config) -> None:
|
|
|
95
95
|
"Optionally enabled via `classification.enabled`",
|
|
96
96
|
supported=True,
|
|
97
97
|
)
|
|
98
|
+
@capability(
|
|
99
|
+
SourceCapability.PARTITION_SUPPORT,
|
|
100
|
+
"Enabled by default, partition keys and clustering keys are supported.",
|
|
101
|
+
)
|
|
98
102
|
class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
|
|
99
103
|
def __init__(self, ctx: PipelineContext, config: BigQueryV2Config):
|
|
100
104
|
super().__init__(config, ctx)
|
|
@@ -8,7 +8,7 @@ import pydantic
|
|
|
8
8
|
|
|
9
9
|
from datahub.ingestion.api.report import Report
|
|
10
10
|
from datahub.ingestion.glossary.classification_mixin import ClassificationReportMixin
|
|
11
|
-
from datahub.ingestion.source.sql.
|
|
11
|
+
from datahub.ingestion.source.sql.sql_report import SQLSourceReport
|
|
12
12
|
from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
|
|
13
13
|
from datahub.ingestion.source_report.time_window import BaseTimeWindowReport
|
|
14
14
|
from datahub.sql_parsing.sql_parsing_aggregator import SqlAggregatorReport
|
|
@@ -77,7 +77,7 @@ class BigQueryQueriesExtractorReport(Report):
|
|
|
77
77
|
|
|
78
78
|
@dataclass
|
|
79
79
|
class BigQueryV2Report(
|
|
80
|
-
|
|
80
|
+
SQLSourceReport,
|
|
81
81
|
IngestionStageReport,
|
|
82
82
|
BaseTimeWindowReport,
|
|
83
83
|
ClassificationReportMixin,
|
|
@@ -152,6 +152,21 @@ class BigqueryDataset:
|
|
|
152
152
|
snapshots: List[BigqueryTableSnapshot] = field(default_factory=list)
|
|
153
153
|
columns: List[BigqueryColumn] = field(default_factory=list)
|
|
154
154
|
|
|
155
|
+
# Some INFORMATION_SCHEMA views are not available for BigLake tables
|
|
156
|
+
# based on Amazon S3 and Blob Storage data.
|
|
157
|
+
# https://cloud.google.com/bigquery/docs/omni-introduction#limitations
|
|
158
|
+
# Omni Locations - https://cloud.google.com/bigquery/docs/omni-introduction#locations
|
|
159
|
+
def is_biglake_dataset(self) -> bool:
|
|
160
|
+
return self.location is not None and self.location.lower().startswith(
|
|
161
|
+
("aws-", "azure-")
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
def supports_table_constraints(self) -> bool:
|
|
165
|
+
return not self.is_biglake_dataset()
|
|
166
|
+
|
|
167
|
+
def supports_table_partitions(self) -> bool:
|
|
168
|
+
return not self.is_biglake_dataset()
|
|
169
|
+
|
|
155
170
|
|
|
156
171
|
@dataclass
|
|
157
172
|
class BigqueryProject:
|
|
@@ -541,18 +556,26 @@ class BigQuerySchemaApi:
|
|
|
541
556
|
table_name=constraint.table_name,
|
|
542
557
|
type=constraint.constraint_type,
|
|
543
558
|
field_path=constraint.column_name,
|
|
544
|
-
referenced_project_id=
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
559
|
+
referenced_project_id=(
|
|
560
|
+
constraint.referenced_catalog
|
|
561
|
+
if constraint.constraint_type == "FOREIGN KEY"
|
|
562
|
+
else None
|
|
563
|
+
),
|
|
564
|
+
referenced_dataset=(
|
|
565
|
+
constraint.referenced_schema
|
|
566
|
+
if constraint.constraint_type == "FOREIGN KEY"
|
|
567
|
+
else None
|
|
568
|
+
),
|
|
569
|
+
referenced_table_name=(
|
|
570
|
+
constraint.referenced_table
|
|
571
|
+
if constraint.constraint_type == "FOREIGN KEY"
|
|
572
|
+
else None
|
|
573
|
+
),
|
|
574
|
+
referenced_column_name=(
|
|
575
|
+
constraint.referenced_column
|
|
576
|
+
if constraint.constraint_type == "FOREIGN KEY"
|
|
577
|
+
else None
|
|
578
|
+
),
|
|
556
579
|
)
|
|
557
580
|
)
|
|
558
581
|
self.report.num_get_table_constraints_for_dataset_api_requests += 1
|
|
@@ -498,7 +498,10 @@ class BigQuerySchemaGenerator:
|
|
|
498
498
|
report=self.report,
|
|
499
499
|
rate_limiter=rate_limiter,
|
|
500
500
|
)
|
|
501
|
-
if
|
|
501
|
+
if (
|
|
502
|
+
self.config.include_table_constraints
|
|
503
|
+
and bigquery_dataset.supports_table_constraints()
|
|
504
|
+
):
|
|
502
505
|
constraints = self.schema_api.get_table_constraints_for_dataset(
|
|
503
506
|
project_id=project_id, dataset_name=dataset_name, report=self.report
|
|
504
507
|
)
|
|
@@ -1157,9 +1160,11 @@ class BigQuerySchemaGenerator:
|
|
|
1157
1160
|
# fields=[],
|
|
1158
1161
|
fields=self.gen_schema_fields(
|
|
1159
1162
|
columns,
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
+
(
|
|
1164
|
+
table.constraints
|
|
1165
|
+
if (isinstance(table, BigqueryTable) and table.constraints)
|
|
1166
|
+
else []
|
|
1167
|
+
),
|
|
1163
1168
|
),
|
|
1164
1169
|
foreignKeys=foreign_keys if foreign_keys else None,
|
|
1165
1170
|
)
|
|
@@ -1180,13 +1185,9 @@ class BigQuerySchemaGenerator:
|
|
|
1180
1185
|
) -> Iterable[BigqueryTable]:
|
|
1181
1186
|
# In bigquery there is no way to query all tables in a Project id
|
|
1182
1187
|
with PerfTimer() as timer:
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
# Omni Locations - https://cloud.google.com/bigquery/docs/omni-introduction#locations
|
|
1187
|
-
with_partitions = self.config.have_table_data_read_permission and not (
|
|
1188
|
-
dataset.location
|
|
1189
|
-
and dataset.location.lower().startswith(("aws-", "azure-"))
|
|
1188
|
+
with_partitions = (
|
|
1189
|
+
self.config.have_table_data_read_permission
|
|
1190
|
+
and dataset.supports_table_partitions()
|
|
1190
1191
|
)
|
|
1191
1192
|
|
|
1192
1193
|
# Partitions view throw exception if we try to query partition info for too many tables
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
|
|
4
|
-
from datahub.ingestion.source.sql.
|
|
4
|
+
from datahub.ingestion.source.sql.sql_report import SQLSourceReport
|
|
5
5
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
6
6
|
StaleEntityRemovalSourceReport,
|
|
7
7
|
)
|
|
@@ -10,7 +10,7 @@ from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
|
|
|
10
10
|
|
|
11
11
|
@dataclass
|
|
12
12
|
class DremioSourceReport(
|
|
13
|
-
|
|
13
|
+
SQLSourceReport, StaleEntityRemovalSourceReport, IngestionStageReport
|
|
14
14
|
):
|
|
15
15
|
num_containers_failed: int = 0
|
|
16
16
|
num_datasets_failed: int = 0
|
|
@@ -55,7 +55,7 @@ from datahub.ingestion.source.profiling.common import (
|
|
|
55
55
|
Cardinality,
|
|
56
56
|
convert_to_cardinality,
|
|
57
57
|
)
|
|
58
|
-
from datahub.ingestion.source.sql.
|
|
58
|
+
from datahub.ingestion.source.sql.sql_report import SQLSourceReport
|
|
59
59
|
from datahub.metadata.com.linkedin.pegasus2avro.schema import EditableSchemaMetadata
|
|
60
60
|
from datahub.metadata.schema_classes import (
|
|
61
61
|
DatasetFieldProfileClass,
|
|
@@ -125,12 +125,16 @@ class GEProfilingConfig(GEProfilingBaseConfig):
|
|
|
125
125
|
|
|
126
126
|
profile_table_size_limit: Optional[int] = Field(
|
|
127
127
|
default=5,
|
|
128
|
-
description="Profile tables only if their size is less
|
|
128
|
+
description="Profile tables only if their size is less than specified GBs. If set to `null`, "
|
|
129
|
+
"no limit on the size of tables to profile. Supported only in `snowflake` and `BigQuery`"
|
|
130
|
+
"Supported for `oracle` based on calculated size from gathered stats.",
|
|
129
131
|
)
|
|
130
132
|
|
|
131
133
|
profile_table_row_limit: Optional[int] = Field(
|
|
132
134
|
default=5000000,
|
|
133
|
-
description="Profile tables only if their row count is less
|
|
135
|
+
description="Profile tables only if their row count is less than specified count. If set to `null`, "
|
|
136
|
+
"no limit on the row count of tables to profile. Supported only in `snowflake` and `BigQuery`"
|
|
137
|
+
"Supported for `oracle` based on gathered stats.",
|
|
134
138
|
)
|
|
135
139
|
|
|
136
140
|
profile_table_row_count_estimate_only: bool = Field(
|
|
@@ -3,7 +3,7 @@ from datetime import datetime
|
|
|
3
3
|
from typing import Dict, Optional
|
|
4
4
|
|
|
5
5
|
from datahub.ingestion.glossary.classification_mixin import ClassificationReportMixin
|
|
6
|
-
from datahub.ingestion.source.sql.
|
|
6
|
+
from datahub.ingestion.source.sql.sql_report import SQLSourceReport
|
|
7
7
|
from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
|
|
8
8
|
from datahub.ingestion.source_report.time_window import BaseTimeWindowReport
|
|
9
9
|
from datahub.sql_parsing.sql_parsing_aggregator import SqlAggregatorReport
|
|
@@ -14,7 +14,7 @@ from datahub.utilities.stats_collections import TopKDict
|
|
|
14
14
|
|
|
15
15
|
@dataclass
|
|
16
16
|
class RedshiftReport(
|
|
17
|
-
|
|
17
|
+
SQLSourceReport,
|
|
18
18
|
IngestionStageReport,
|
|
19
19
|
BaseTimeWindowReport,
|
|
20
20
|
ClassificationReportMixin,
|
|
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Dict, List, MutableSet, Optional
|
|
|
5
5
|
from datahub.ingestion.api.report import Report
|
|
6
6
|
from datahub.ingestion.glossary.classification_mixin import ClassificationReportMixin
|
|
7
7
|
from datahub.ingestion.source.snowflake.constants import SnowflakeEdition
|
|
8
|
-
from datahub.ingestion.source.sql.
|
|
8
|
+
from datahub.ingestion.source.sql.sql_report import SQLSourceReport
|
|
9
9
|
from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
10
10
|
StatefulIngestionReport,
|
|
11
11
|
)
|
|
@@ -59,7 +59,7 @@ class SnowflakeUsageReport:
|
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
@dataclass
|
|
62
|
-
class SnowflakeReport(
|
|
62
|
+
class SnowflakeReport(SQLSourceReport, BaseTimeWindowReport):
|
|
63
63
|
num_table_to_table_edges_scanned: int = 0
|
|
64
64
|
num_table_to_view_edges_scanned: int = 0
|
|
65
65
|
num_view_to_table_edges_scanned: int = 0
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import datetime
|
|
1
2
|
import logging
|
|
2
3
|
import re
|
|
3
4
|
|
|
@@ -631,3 +632,52 @@ class OracleSource(SQLAlchemySource):
|
|
|
631
632
|
clear=False,
|
|
632
633
|
):
|
|
633
634
|
return super().get_workunits()
|
|
635
|
+
|
|
636
|
+
def generate_profile_candidates(
|
|
637
|
+
self,
|
|
638
|
+
inspector: Inspector,
|
|
639
|
+
threshold_time: Optional[datetime.datetime],
|
|
640
|
+
schema: str,
|
|
641
|
+
) -> Optional[List[str]]:
|
|
642
|
+
tables_table_name = (
|
|
643
|
+
"ALL_TABLES" if self.config.data_dictionary_mode == "ALL" else "DBA_TABLES"
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
# If stats are available , they are used even if they are stale.
|
|
647
|
+
# Assuming that the table would typically grow over time, this will ensure to filter
|
|
648
|
+
# large tables known at stats collection time from profiling candidates.
|
|
649
|
+
# If stats are not available (NULL), such tables are not filtered and are considered
|
|
650
|
+
# as profiling candidates.
|
|
651
|
+
cursor = inspector.bind.execute(
|
|
652
|
+
sql.text(
|
|
653
|
+
f"""SELECT
|
|
654
|
+
t.OWNER,
|
|
655
|
+
t.TABLE_NAME,
|
|
656
|
+
t.NUM_ROWS,
|
|
657
|
+
t.LAST_ANALYZED,
|
|
658
|
+
COALESCE(t.NUM_ROWS * t.AVG_ROW_LEN, 0) / (1024 * 1024 * 1024) AS SIZE_GB
|
|
659
|
+
FROM {tables_table_name} t
|
|
660
|
+
WHERE t.OWNER = :owner
|
|
661
|
+
AND (t.NUM_ROWS < :table_row_limit OR t.NUM_ROWS IS NULL)
|
|
662
|
+
AND COALESCE(t.NUM_ROWS * t.AVG_ROW_LEN, 0) / (1024 * 1024 * 1024) < :table_size_limit
|
|
663
|
+
"""
|
|
664
|
+
),
|
|
665
|
+
dict(
|
|
666
|
+
owner=inspector.dialect.denormalize_name(schema),
|
|
667
|
+
table_row_limit=self.config.profiling.profile_table_row_limit,
|
|
668
|
+
table_size_limit=self.config.profiling.profile_table_size_limit,
|
|
669
|
+
),
|
|
670
|
+
)
|
|
671
|
+
|
|
672
|
+
TABLE_NAME_COL_LOC = 1
|
|
673
|
+
return [
|
|
674
|
+
self.get_identifier(
|
|
675
|
+
schema=schema,
|
|
676
|
+
entity=inspector.dialect.normalize_name(row[TABLE_NAME_COL_LOC])
|
|
677
|
+
or _raise_err(
|
|
678
|
+
ValueError(f"Invalid table name: {row[TABLE_NAME_COL_LOC]}")
|
|
679
|
+
),
|
|
680
|
+
inspector=inspector,
|
|
681
|
+
)
|
|
682
|
+
for row in cursor
|
|
683
|
+
]
|
|
@@ -51,7 +51,6 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
|
51
51
|
from datahub.ingestion.glossary.classification_mixin import (
|
|
52
52
|
SAMPLE_SIZE_MULTIPLIER,
|
|
53
53
|
ClassificationHandler,
|
|
54
|
-
ClassificationReportMixin,
|
|
55
54
|
)
|
|
56
55
|
from datahub.ingestion.source.common.data_reader import DataReader
|
|
57
56
|
from datahub.ingestion.source.common.subtypes import (
|
|
@@ -59,6 +58,7 @@ from datahub.ingestion.source.common.subtypes import (
|
|
|
59
58
|
DatasetSubTypes,
|
|
60
59
|
)
|
|
61
60
|
from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
|
|
61
|
+
from datahub.ingestion.source.sql.sql_report import SQLSourceReport
|
|
62
62
|
from datahub.ingestion.source.sql.sql_utils import (
|
|
63
63
|
add_table_to_schema_container,
|
|
64
64
|
downgrade_schema_from_v2,
|
|
@@ -74,7 +74,6 @@ from datahub.ingestion.source.sql.sqlalchemy_data_reader import (
|
|
|
74
74
|
)
|
|
75
75
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
76
76
|
StaleEntityRemovalHandler,
|
|
77
|
-
StaleEntityRemovalSourceReport,
|
|
78
77
|
)
|
|
79
78
|
from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
80
79
|
StatefulIngestionSourceBase,
|
|
@@ -118,9 +117,7 @@ from datahub.sql_parsing.sqlglot_lineage import (
|
|
|
118
117
|
)
|
|
119
118
|
from datahub.telemetry import telemetry
|
|
120
119
|
from datahub.utilities.file_backed_collections import FileBackedDict
|
|
121
|
-
from datahub.utilities.lossy_collections import LossyList
|
|
122
120
|
from datahub.utilities.registries.domain_registry import DomainRegistry
|
|
123
|
-
from datahub.utilities.sqlalchemy_query_combiner import SQLAlchemyQueryCombinerReport
|
|
124
121
|
from datahub.utilities.sqlalchemy_type_converter import (
|
|
125
122
|
get_native_data_type_for_sqlalchemy_type,
|
|
126
123
|
)
|
|
@@ -134,43 +131,6 @@ if TYPE_CHECKING:
|
|
|
134
131
|
logger: logging.Logger = logging.getLogger(__name__)
|
|
135
132
|
|
|
136
133
|
|
|
137
|
-
@dataclass
|
|
138
|
-
class SQLSourceReport(StaleEntityRemovalSourceReport, ClassificationReportMixin):
|
|
139
|
-
tables_scanned: int = 0
|
|
140
|
-
views_scanned: int = 0
|
|
141
|
-
entities_profiled: int = 0
|
|
142
|
-
filtered: LossyList[str] = field(default_factory=LossyList)
|
|
143
|
-
|
|
144
|
-
query_combiner: Optional[SQLAlchemyQueryCombinerReport] = None
|
|
145
|
-
|
|
146
|
-
num_view_definitions_parsed: int = 0
|
|
147
|
-
num_view_definitions_failed_parsing: int = 0
|
|
148
|
-
num_view_definitions_failed_column_parsing: int = 0
|
|
149
|
-
view_definitions_parsing_failures: LossyList[str] = field(default_factory=LossyList)
|
|
150
|
-
|
|
151
|
-
def report_entity_scanned(self, name: str, ent_type: str = "table") -> None:
|
|
152
|
-
"""
|
|
153
|
-
Entity could be a view or a table
|
|
154
|
-
"""
|
|
155
|
-
if ent_type == "table":
|
|
156
|
-
self.tables_scanned += 1
|
|
157
|
-
elif ent_type == "view":
|
|
158
|
-
self.views_scanned += 1
|
|
159
|
-
else:
|
|
160
|
-
raise KeyError(f"Unknown entity {ent_type}.")
|
|
161
|
-
|
|
162
|
-
def report_entity_profiled(self, name: str) -> None:
|
|
163
|
-
self.entities_profiled += 1
|
|
164
|
-
|
|
165
|
-
def report_dropped(self, ent_name: str) -> None:
|
|
166
|
-
self.filtered.append(ent_name)
|
|
167
|
-
|
|
168
|
-
def report_from_query_combiner(
|
|
169
|
-
self, query_combiner_report: SQLAlchemyQueryCombinerReport
|
|
170
|
-
) -> None:
|
|
171
|
-
self.query_combiner = query_combiner_report
|
|
172
|
-
|
|
173
|
-
|
|
174
134
|
class SqlWorkUnit(MetadataWorkUnit):
|
|
175
135
|
pass
|
|
176
136
|
|
|
@@ -352,7 +312,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
352
312
|
|
|
353
313
|
def __init__(self, config: SQLCommonConfig, ctx: PipelineContext, platform: str):
|
|
354
314
|
super().__init__(config, ctx)
|
|
355
|
-
self.config = config
|
|
315
|
+
self.config: SQLCommonConfig = config
|
|
356
316
|
self.platform = platform
|
|
357
317
|
self.report: SQLSourceReport = SQLSourceReport()
|
|
358
318
|
self.profile_metadata_info: ProfileMetadata = ProfileMetadata()
|
|
@@ -1282,17 +1242,22 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1282
1242
|
def is_dataset_eligible_for_profiling(
|
|
1283
1243
|
self,
|
|
1284
1244
|
dataset_name: str,
|
|
1285
|
-
|
|
1245
|
+
schema: str,
|
|
1286
1246
|
inspector: Inspector,
|
|
1287
1247
|
profile_candidates: Optional[List[str]],
|
|
1288
1248
|
) -> bool:
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
and
|
|
1292
|
-
)
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1249
|
+
if not (
|
|
1250
|
+
self.config.table_pattern.allowed(dataset_name)
|
|
1251
|
+
and self.config.profile_pattern.allowed(dataset_name)
|
|
1252
|
+
):
|
|
1253
|
+
self.report.profiling_skipped_table_profile_pattern[schema] += 1
|
|
1254
|
+
return False
|
|
1255
|
+
|
|
1256
|
+
if profile_candidates is not None and dataset_name not in profile_candidates:
|
|
1257
|
+
self.report.profiling_skipped_other[schema] += 1
|
|
1258
|
+
return False
|
|
1259
|
+
|
|
1260
|
+
return True
|
|
1296
1261
|
|
|
1297
1262
|
def loop_profiler_requests(
|
|
1298
1263
|
self,
|
|
@@ -1307,7 +1272,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1307
1272
|
if (
|
|
1308
1273
|
sql_config.profiling.profile_if_updated_since_days is not None
|
|
1309
1274
|
or sql_config.profiling.profile_table_size_limit is not None
|
|
1310
|
-
or sql_config.profiling.profile_table_row_limit is None
|
|
1275
|
+
or sql_config.profiling.profile_table_row_limit is not None
|
|
1311
1276
|
):
|
|
1312
1277
|
try:
|
|
1313
1278
|
threshold_time: Optional[datetime.datetime] = None
|
|
@@ -1328,8 +1293,9 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1328
1293
|
schema=schema, entity=table, inspector=inspector
|
|
1329
1294
|
)
|
|
1330
1295
|
if not self.is_dataset_eligible_for_profiling(
|
|
1331
|
-
dataset_name,
|
|
1296
|
+
dataset_name, schema, inspector, profile_candidates
|
|
1332
1297
|
):
|
|
1298
|
+
self.report.num_tables_not_eligible_profiling[schema] += 1
|
|
1333
1299
|
if self.config.profiling.report_dropped_profiles:
|
|
1334
1300
|
self.report.report_dropped(f"profile of {dataset_name}")
|
|
1335
1301
|
continue
|