acryl-datahub 1.1.0.3rc1__py3-none-any.whl → 1.1.0.4rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,7 +1,7 @@
1
- acryl_datahub-1.1.0.3rc1.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.1.0.4rc1.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=S6g02VXkiMX8tOvjPQvhiycUlmQG8wDgMRipQjsiIuw,323
4
+ datahub/_version.py,sha256=4VQBpvRWcVdnl6_pdqPZchmeXrU7IuFILEmLChbNYQU,323
5
5
  datahub/entrypoints.py,sha256=H-YFTvxTJOgpWsFBVlxyb1opjkq-hjTzNmjy5Fq3RHg,8992
6
6
  datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -67,11 +67,11 @@ datahub/api/graphql/assertion.py,sha256=o_q6SV7N1rJTVMNKSUBGJnZPk6TcVYoVShgDmPw6
67
67
  datahub/api/graphql/base.py,sha256=zk724_oYSJ0nK7X7Z80MijnA6ry9JqpxnBsJeYuONKA,1737
68
68
  datahub/api/graphql/operation.py,sha256=7E80HyE-5JLfLbFkQbgJeNwIaKngjBCrWES8eJO4OYc,5112
69
69
  datahub/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
70
- datahub/cli/check_cli.py,sha256=82mO4a1_tEa9ewNzWAL6zxYEdVfUPOj0aH688X8_h28,14392
70
+ datahub/cli/check_cli.py,sha256=CsOF57GEJAxnRS7a2mrzNkbQSTxcvtr6nBcB9s5MBXA,15197
71
71
  datahub/cli/cli_utils.py,sha256=2uvPv6WqxbRdH7UteHwhRash4E0ncU5P6XebrFLeECo,13584
72
72
  datahub/cli/config_utils.py,sha256=EeBGfhmf4AxYoTfnZ4GSiGIgpzJFkduNjN_FwmxZGhA,4889
73
73
  datahub/cli/container_cli.py,sha256=uDOwewGEPYHQt-ppYEb8ESXhZjPNIZG0Rt3cm2FzPqc,1569
74
- datahub/cli/delete_cli.py,sha256=ustvR_mQgddXU3DWP3J2bx0UySD1LANvbMwhuDdCd8Y,23623
74
+ datahub/cli/delete_cli.py,sha256=ySVAOSEtDs1dgX6HBq6A0re52JEF-dN0bltXSC_H780,26537
75
75
  datahub/cli/docker_check.py,sha256=rED4wHXqxcQ_qNFyIgFEZ85BHT9ZTE5YC-oUKqbRqi0,9432
76
76
  datahub/cli/docker_cli.py,sha256=U2yvCB37sPZB5Uq73Y3AG-FK_BqeQaNLsF3DChuiyqI,36463
77
77
  datahub/cli/env_utils.py,sha256=RQzjg4JE29hjPt4v7p-RuqoOr99w8E3DBHWiN2Sm7T4,252
@@ -172,12 +172,12 @@ datahub/ingestion/fs/http_fs.py,sha256=NBIKp4vl7mW0YfVfkfpO3R6DBGqSC7f6EE_da0yz2
172
172
  datahub/ingestion/fs/local_fs.py,sha256=oWf-PZsl5sI-9eHWGeKlfKYagbQaSZ9fGfNbxcFji14,885
173
173
  datahub/ingestion/fs/s3_fs.py,sha256=B113EdaCelb80uF0n2rsLFettWB41RqFxa9X_XKRzZg,3190
174
174
  datahub/ingestion/glossary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
175
- datahub/ingestion/glossary/classification_mixin.py,sha256=s11mVgrCBs2fbHwKoKZzOxuQe-2QzapAEmqmMUfr9rI,13716
175
+ datahub/ingestion/glossary/classification_mixin.py,sha256=jAL7TPuC0t2_VR8nwr-zL6HhaT-i0sZYSFpBPmgyAqs,13894
176
176
  datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGot6z9Cir5Vuc,2981
177
177
  datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
178
178
  datahub/ingestion/glossary/datahub_classifier.py,sha256=O7wm6gQT1Jf2QSKdWjJQbS5oSzJwplXzfza26Gdq5Mg,7555
179
179
  datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
180
- datahub/ingestion/graph/client.py,sha256=apsLaAWqvJN6nIEL-T-BFkkvBhAr64RS07YRl0PoVu4,70790
180
+ datahub/ingestion/graph/client.py,sha256=VnrUsC2jGN5W3nkurAxIR887J4O9mP_S0VjdheekNQA,72412
181
181
  datahub/ingestion/graph/config.py,sha256=rmkcqAL8fJoY9QyAeS0Xm8HvwHzV3pCjY-Om-50JJTI,1015
182
182
  datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
183
183
  datahub/ingestion/graph/entity_versioning.py,sha256=nrcNz0Qm6kpE6oTu_mrYUQDx14KPspBTc6R9SyFUY6c,6901
@@ -208,7 +208,7 @@ datahub/ingestion/source/elastic_search.py,sha256=2dwIcSbYMaq_RoSnxLGz4Q_20oJ8AG
208
208
  datahub/ingestion/source/feast.py,sha256=rAqT7huVgi4c7iRU9qSbohPbNRrxZVw4PIvnfxNsiUk,18798
209
209
  datahub/ingestion/source/file.py,sha256=h6CRH7hrKcFxu1SmZDjqJcJUSrc031u5oJUl2clnPO4,15976
210
210
  datahub/ingestion/source/ge_data_profiler.py,sha256=Y_sdKK4Ot6MOpSKNfkkCJhiL7hqcjpU0hcDqXpfcNA0,66162
211
- datahub/ingestion/source/ge_profiling_config.py,sha256=FlWfXoVoayabVXNMB9qETEU0GX0az6HYqNUZRnIu_fQ,10866
211
+ datahub/ingestion/source/ge_profiling_config.py,sha256=sG_0BwPDRG3I4PnhfWGHf9AbePLDWG0kKcKEtlXHTuk,11544
212
212
  datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suabri1yV5geaAPjzg2eORDc,2559
213
213
  datahub/ingestion/source/ldap.py,sha256=PKoA5pVjuIxFfW1TcbYNIWSm7-C7shK2FDn7Zo5mrVM,18705
214
214
  datahub/ingestion/source/metabase.py,sha256=j8DRV2GvisezidL1JZ5HJLF_hdFdtvaoyDoEdEyh0Ks,32603
@@ -224,7 +224,7 @@ datahub/ingestion/source/redash.py,sha256=YxjSad-X_wPmxYH8dJmFz_VCFhiLTCTSlK99Wd
224
224
  datahub/ingestion/source/salesforce.py,sha256=CQtDFv1OsbC1vyzNbKOc6GxhFQ5GdYj45hgAF0-oIcw,40487
225
225
  datahub/ingestion/source/source_registry.py,sha256=a2mLjJPLkSI-gYCTb_7U7Jo4D8jGknNQ_yScPIihXFk,1208
226
226
  datahub/ingestion/source/sql_queries.py,sha256=Ip7UZub7fgMh7P5jL_zJPY7lSkc9GGTy8GJ8lqZrcsE,9502
227
- datahub/ingestion/source/superset.py,sha256=acxKU8XkaCNvhcv0CwU27_dYTdV5iR45BPcc83SR_T0,48380
227
+ datahub/ingestion/source/superset.py,sha256=dSXbsPj4_BY9O6esrJRt5WYcHj7QWoBk7PTfIFxS_Zw,48387
228
228
  datahub/ingestion/source/abs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
229
229
  datahub/ingestion/source/abs/config.py,sha256=mBQe0JTaP-Rcv4HnMUUySoYbSr4r3jDEMioxaXHnxXU,6709
230
230
  datahub/ingestion/source/abs/datalake_profiler_config.py,sha256=Rkf64evufyVGPiE4VK8QAjzBiJFu85tOGMmJ0lJZ2Og,3600
@@ -265,7 +265,7 @@ datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=v7_zkZzymKPmZKWAx
265
265
  datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=DHw5Z_rxj_fR09p7SO0UmDdvYEa_ViIRnLE9CFxPAAk,32525
266
266
  datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=29E_25jLpMIgWcYRC0ZcYd1fvaFtSi2T8S6hSwiTDTY,51090
267
267
  datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
268
- datahub/ingestion/source/bigquery_v2/common.py,sha256=Cxjf1a8ibkL_YRQeS0BqsjlyMgFJpaZ3iq_d7e8T8MQ,4030
268
+ datahub/ingestion/source/bigquery_v2/common.py,sha256=IinOy-RO4UZGxSf5scaN02672BzZuNsjJZ56axti6iI,4016
269
269
  datahub/ingestion/source/bigquery_v2/lineage.py,sha256=jju14mJbAUMA_K3j2yq-TdZV202cjd5rBAsDPJGEVno,44900
270
270
  datahub/ingestion/source/bigquery_v2/profiler.py,sha256=8-yAoq8sX0E6VIwr75YbM8wITRNhGfxgte9BCeGNkMM,10681
271
271
  datahub/ingestion/source/bigquery_v2/queries.py,sha256=c1BpeQP8p8y-FOhmiQkkY2IqGrEqrXtARDCYQ2xhXvo,20145
@@ -353,7 +353,7 @@ datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
353
353
  datahub/ingestion/source/identity/azure_ad.py,sha256=9Hrvm4CSfc02yjnPUsCYSY4Qw9fXPnDFWLexab0mcpc,28559
354
354
  datahub/ingestion/source/identity/okta.py,sha256=jC21myJuMRTaPgj0OD9heaC-mz8ECjqpy2hSJwlUSwM,31943
355
355
  datahub/ingestion/source/kafka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
356
- datahub/ingestion/source/kafka/kafka.py,sha256=HMoe1P0QE9JlcX6MNEALTgz7LsmG-HUXVuWnk3jkRo8,22900
356
+ datahub/ingestion/source/kafka/kafka.py,sha256=Gjlzxq0B8IZtNcJdDA8yvWbiTi_Km64KoU4TR2Bt5M0,23254
357
357
  datahub/ingestion/source/kafka/kafka_config.py,sha256=ijUB8PS5p-o3uLCHkAxAJAIM88s47rVaAUYXmi_lR4M,4406
358
358
  datahub/ingestion/source/kafka/kafka_schema_registry_base.py,sha256=13XjSwqyVhH1CJUFHAbWdmmv_Rw0Ju_9HQdBmIzPNNA,566
359
359
  datahub/ingestion/source/kafka_connect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -475,7 +475,7 @@ datahub/ingestion/source/snowflake/snowflake_report.py,sha256=O-465aBA8uaYZ6WepP
475
475
  datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=1yGBbs2aWIdHnrwgeTR7J2lqxbbBsIt8ejCLumIpLEA,27274
476
476
  datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=_37-AQyI4uGt4fu-d3v2eAWzQ3uG835ZQxMjFwGYCng,57193
477
477
  datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=maZyFkfrbVogEFM0tTKRiNp9c_1muv6YfleSd3q0umI,6341
478
- datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYhWeK4XafJq3ssFL9kcS03jEeWT4,5506
478
+ datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=WJfsP8w3HceUkM6GKONtWCTtYvTdR209cRW9g66xyYE,5671
479
479
  datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=eA9xh-G1Ydr1OwUUtrbXUWp26hE1jF0zvyKNky_i_nQ,8887
480
480
  datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=ySFm7WDk8FW9KjCnX4HQfTqObIrlUS-V8WIHl3j0CTI,24848
481
481
  datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=2lmvAeZELTjAzg4Y5E0oY41r1IzVEvg6OHAvVJftSFk,14081
@@ -489,7 +489,7 @@ datahub/ingestion/source/sql/hana.py,sha256=0PIvcX0Rz59NyR7Ag5Bv1MBV_UbJwxl9UAop
489
489
  datahub/ingestion/source/sql/hive.py,sha256=Sh3Us1vjUcbgQ4NxLKcXEdGtck5fJHIwOdjbDhBGfCw,31575
490
490
  datahub/ingestion/source/sql/hive_metastore.py,sha256=qpX9eCRm-zq3DKC49MaZP9vzGot9QIDfaaeFgXGbOuM,36283
491
491
  datahub/ingestion/source/sql/mariadb.py,sha256=Hm102kmfs_1rd4lsTYhzVMZq5S3B6cyfvpHSzJjqvMw,737
492
- datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_k94o7g-0,3350
492
+ datahub/ingestion/source/sql/mysql.py,sha256=CKF55rlL5ykS3KUpx9bS58SqTDQuBXJMoGlnzJEazg8,3596
493
493
  datahub/ingestion/source/sql/oracle.py,sha256=ftnrk3iiEelyv9PBHPYbairuP1WgxZbi1gu6YdqY69E,29965
494
494
  datahub/ingestion/source/sql/postgres.py,sha256=uC1kYEI8VdxiZ1Y9IxMWzwmg11wtMqYN0e2fkok1rxo,11972
495
495
  datahub/ingestion/source/sql/presto.py,sha256=tATa0M2q0PjUC_E9W_jSUsmKTP7cVJayLgrFMzG_eao,4223
@@ -931,7 +931,7 @@ datahub/sdk/dataset.py,sha256=2-iD-HqjgFpCBmU3k8O5tkltmwFj4jaumADiX8sexJU,29465
931
931
  datahub/sdk/entity.py,sha256=Q29AbpS58L4gD8ETwoNIwG-ouytz4c0MSSFi6-jLl_4,6742
932
932
  datahub/sdk/entity_client.py,sha256=xHLGLn6oJfxmHLXo5w4-isPFZGcf8yR5IwyC6pvl_I8,8903
933
933
  datahub/sdk/lineage_client.py,sha256=M4fN4p8YHpM7zoQ7RCCekV8oPgw8QsWxzMAA8mPoy20,33822
934
- datahub/sdk/main_client.py,sha256=nB9CsQxq7fgW7zXtDwSgu51V2OGT12O1zaYZv9oR2t0,5137
934
+ datahub/sdk/main_client.py,sha256=kJRmydyyaOjUdqOljl0w_Cx7BKkViZmnX-1vZBHY4nw,5266
935
935
  datahub/sdk/mlmodel.py,sha256=cO5R8BYVljmQ0w33RIOuZmj4nq8OJCDVAZGTQI6YFS8,12628
936
936
  datahub/sdk/mlmodelgroup.py,sha256=wlZZHny0UORpF0fRYuVkWLSQwIHX_fWl5lPb1NKR6dM,8194
937
937
  datahub/sdk/resolver_client.py,sha256=nKMAZJt2tRSGfKSzoREIh43PXqjM3umLiYkYHJjo1io,3243
@@ -967,7 +967,7 @@ datahub/sql_parsing/split_statements.py,sha256=OIQXA9e4k3G9Z1y7rbgdtZhMWt4FPnq41
967
967
  datahub/sql_parsing/sql_parsing_aggregator.py,sha256=tqFZsE-7owUiU0q49nmkTt50CU4vn8ffUbNcTv9nRbc,71431
968
968
  datahub/sql_parsing/sql_parsing_common.py,sha256=cZ4WvVyHZuXDGjnBvKMX2_fz2EMextB5WQWcK0_saBo,3155
969
969
  datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
970
- datahub/sql_parsing/sqlglot_lineage.py,sha256=6tuVv64MPO4i2VsmO9pjvP5IBWLEGollT3Ayubj6MU4,58668
970
+ datahub/sql_parsing/sqlglot_lineage.py,sha256=jchSPPYkFtHpyTRTWR5K0YQM6LIgWR5MtyVNQ6zA2Ig,59915
971
971
  datahub/sql_parsing/sqlglot_utils.py,sha256=TI11oBu1wrGeUuUGBg7hGTr6lTvztahdqiqXNJYRfbQ,14823
972
972
  datahub/sql_parsing/tool_meta_extractor.py,sha256=EV_g7sOchTSUm2p6wluNJqND7-rDYokVTqqFCM7hQ6c,7599
973
973
  datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -1075,8 +1075,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1075
1075
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1076
1076
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1077
1077
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1078
- acryl_datahub-1.1.0.3rc1.dist-info/METADATA,sha256=qmKShqlg2qDoeuPK1c0sKxBOlOOoD_3auEUXvZa4QyU,182347
1079
- acryl_datahub-1.1.0.3rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1080
- acryl_datahub-1.1.0.3rc1.dist-info/entry_points.txt,sha256=-N2PGtn1uwKR7-VM9spziE_RNyOdKm_XNpOWL1lnaj4,9790
1081
- acryl_datahub-1.1.0.3rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1082
- acryl_datahub-1.1.0.3rc1.dist-info/RECORD,,
1078
+ acryl_datahub-1.1.0.4rc1.dist-info/METADATA,sha256=N-cpSiw6Gk0kjPEpr_ernkkG9f6hs3Mj7m8g-e6GxCs,182347
1079
+ acryl_datahub-1.1.0.4rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1080
+ acryl_datahub-1.1.0.4rc1.dist-info/entry_points.txt,sha256=-N2PGtn1uwKR7-VM9spziE_RNyOdKm_XNpOWL1lnaj4,9790
1081
+ acryl_datahub-1.1.0.4rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1082
+ acryl_datahub-1.1.0.4rc1.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.1.0.3rc1"
3
+ __version__ = "1.1.0.4rc1"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
datahub/cli/check_cli.py CHANGED
@@ -478,3 +478,30 @@ def server_config() -> None:
478
478
  server_config = graph.get_server_config()
479
479
 
480
480
  click.echo(pprint.pformat(server_config))
481
+
482
+
483
+ @check.command()
484
+ @click.option(
485
+ "--urn", required=True, help="The urn or urn pattern (supports % for wildcard)"
486
+ )
487
+ @click.option("--aspect", default=None, help="Filter to a specific aspect name.")
488
+ @click.option(
489
+ "--start", type=int, default=None, help="Row number of sql store to restore from."
490
+ )
491
+ @click.option("--batch-size", type=int, default=None, help="How many rows to restore.")
492
+ def restore_indices(
493
+ urn: str,
494
+ aspect: Optional[str],
495
+ start: Optional[int],
496
+ batch_size: Optional[int],
497
+ ) -> None:
498
+ """Resync metadata changes into the search and graph indices."""
499
+ graph = get_default_graph(ClientMode.CLI)
500
+
501
+ result = graph.restore_indices(
502
+ urn_pattern=urn,
503
+ aspect=aspect,
504
+ start=start,
505
+ batch_size=batch_size,
506
+ )
507
+ click.echo(result)
datahub/cli/delete_cli.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  import random
3
+ import sys
3
4
  from concurrent.futures import ThreadPoolExecutor, as_completed
4
5
  from dataclasses import dataclass
5
6
  from datetime import datetime
@@ -317,6 +318,19 @@ def undo_by_filter(
317
318
  is_flag=True,
318
319
  help="Recursively delete all contained entities (only for containers and dataPlatformInstances)",
319
320
  )
321
+ @click.option(
322
+ "--streaming-batch",
323
+ required=False,
324
+ is_flag=True,
325
+ help="Use streaming batch deletion for recursive operations. Benefit of being resumable for large hierarchies where getting all URNs at once can take a long time.",
326
+ )
327
+ @click.option(
328
+ "--streaming-batch-size",
329
+ required=False,
330
+ default=12000,
331
+ type=int,
332
+ help="Batch size for streaming batch deletion for recursive operations.",
333
+ )
320
334
  @click.option(
321
335
  "--start-time",
322
336
  required=False,
@@ -368,6 +382,8 @@ def by_filter(
368
382
  entity_type: Optional[str],
369
383
  query: Optional[str],
370
384
  recursive: bool,
385
+ streaming_batch: bool,
386
+ streaming_batch_size: int,
371
387
  start_time: Optional[datetime],
372
388
  end_time: Optional[datetime],
373
389
  batch_size: int,
@@ -386,6 +402,7 @@ def by_filter(
386
402
  env=env,
387
403
  query=query,
388
404
  recursive=recursive,
405
+ streaming_batch=streaming_batch,
389
406
  )
390
407
  soft_delete_filter = _validate_user_soft_delete_flags(
391
408
  soft=soft, aspect=aspect, only_soft_deleted=only_soft_deleted
@@ -417,26 +434,27 @@ def by_filter(
417
434
  # Determine which urns to delete.
418
435
  delete_by_urn = bool(urn) and not recursive
419
436
  if urn:
420
- urns = [urn]
421
-
422
437
  if recursive:
423
- # Add children urns to the list.
424
- if guess_entity_type(urn) == "dataPlatformInstance":
425
- urns.extend(
426
- graph.get_urns_by_filter(
427
- platform_instance=urn,
428
- status=soft_delete_filter,
429
- batch_size=batch_size,
430
- )
431
- )
432
- else:
433
- urns.extend(
434
- graph.get_urns_by_filter(
435
- container=urn,
436
- status=soft_delete_filter,
437
- batch_size=batch_size,
438
- )
439
- )
438
+ _delete_urns_streaming_recursive(
439
+ graph=graph,
440
+ parent_urn=urn,
441
+ aspect_name=aspect,
442
+ soft=soft,
443
+ dry_run=dry_run,
444
+ start_time=start_time,
445
+ end_time=end_time,
446
+ workers=workers,
447
+ soft_delete_filter=soft_delete_filter,
448
+ batch_size=batch_size,
449
+ force=force,
450
+ streaming_batch_size=streaming_batch_size
451
+ if streaming_batch
452
+ else sys.maxsize,
453
+ )
454
+ return
455
+
456
+ else:
457
+ urns = [urn]
440
458
  elif urn_file:
441
459
  with open(urn_file, "r") as r:
442
460
  urns = []
@@ -557,6 +575,7 @@ def _validate_user_urn_and_filters(
557
575
  env: Optional[str],
558
576
  query: Optional[str],
559
577
  recursive: bool,
578
+ streaming_batch: bool,
560
579
  ) -> None:
561
580
  # Check urn / filters options.
562
581
  if urn:
@@ -592,6 +611,12 @@ def _validate_user_urn_and_filters(
592
611
  f"This will only delete {urn}. Use --recursive to delete all contained entities."
593
612
  )
594
613
 
614
+ # Check streaming flag.
615
+ if streaming_batch and not recursive:
616
+ raise click.UsageError(
617
+ "The --streaming-batch flag can only be used with --recursive."
618
+ )
619
+
595
620
 
596
621
  def _validate_user_soft_delete_flags(
597
622
  soft: bool, aspect: Optional[str], only_soft_deleted: bool
@@ -738,3 +763,76 @@ def _delete_one_urn(
738
763
  num_timeseries_records=ts_rows_affected,
739
764
  num_referenced_entities=referenced_entities_affected,
740
765
  )
766
+
767
+
768
+ def _delete_urns_streaming_recursive(
769
+ graph: DataHubGraph,
770
+ parent_urn: str,
771
+ aspect_name: Optional[str],
772
+ soft: bool,
773
+ dry_run: bool,
774
+ start_time: Optional[datetime],
775
+ end_time: Optional[datetime],
776
+ workers: int,
777
+ soft_delete_filter: RemovedStatusFilter,
778
+ batch_size: int,
779
+ force: bool,
780
+ streaming_batch_size: int,
781
+ ) -> None:
782
+ """Streaming recursive batch deletion that processes URNs in batches."""
783
+
784
+ entity_type = guess_entity_type(parent_urn)
785
+ click.echo(f"Starting recursive deletion of {entity_type} {parent_urn}")
786
+
787
+ if not force and not dry_run:
788
+ click.confirm(
789
+ f"This will recursively delete {parent_urn} and all its contained entities. Do you want to continue?",
790
+ abort=True,
791
+ )
792
+
793
+ urns = []
794
+
795
+ if entity_type == "dataPlatformInstance":
796
+ child_urns_iter = graph.get_urns_by_filter(
797
+ platform_instance=parent_urn,
798
+ status=soft_delete_filter,
799
+ batch_size=batch_size,
800
+ # Important to skip cache so we can resume from where we left off.
801
+ skip_cache=True,
802
+ )
803
+ else:
804
+ child_urns_iter = graph.get_urns_by_filter(
805
+ container=parent_urn,
806
+ status=soft_delete_filter,
807
+ batch_size=batch_size,
808
+ # Important to skip cache so we can resume from where we left off.
809
+ skip_cache=True,
810
+ )
811
+
812
+ for child_urn in child_urns_iter:
813
+ urns.append(child_urn)
814
+ if len(urns) >= streaming_batch_size:
815
+ _delete_urns_parallel(
816
+ graph=graph,
817
+ urns=urns,
818
+ aspect_name=aspect_name,
819
+ soft=soft,
820
+ dry_run=dry_run,
821
+ delete_by_urn=False,
822
+ start_time=start_time,
823
+ end_time=end_time,
824
+ workers=workers,
825
+ )
826
+ urns = []
827
+ urns.append(parent_urn)
828
+ _delete_urns_parallel(
829
+ graph=graph,
830
+ urns=urns,
831
+ aspect_name=aspect_name,
832
+ soft=soft,
833
+ dry_run=dry_run,
834
+ delete_by_urn=False,
835
+ start_time=start_time,
836
+ end_time=end_time,
837
+ workers=workers,
838
+ )
@@ -90,6 +90,11 @@ class ClassificationHandler:
90
90
 
91
91
  def get_classifiers(self) -> List[Classifier]:
92
92
  classifiers = []
93
+ if (
94
+ not isinstance(self.config, ClassificationSourceConfigMixin)
95
+ or self.config.classification is None
96
+ ):
97
+ return classifiers
93
98
 
94
99
  for classifier in self.config.classification.classifiers:
95
100
  classifier_class = classifier_registry.get(classifier.type)
@@ -906,6 +906,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
906
906
  batch_size: int = 5000,
907
907
  extraFilters: Optional[List[RawSearchFilterRule]] = None,
908
908
  extra_or_filters: Optional[RawSearchFilter] = None,
909
+ skip_cache: bool = False,
909
910
  ) -> Iterable[str]:
910
911
  """Fetch all urns that match all of the given filters.
911
912
 
@@ -924,6 +925,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
924
925
  Note that this requires browsePathV2 aspects (added in 0.10.4+).
925
926
  :param status: Filter on the deletion status of the entity. The default is only return non-soft-deleted entities.
926
927
  :param extraFilters: Additional filters to apply. If specified, the results will match all of the filters.
928
+ :param skip_cache: Whether to bypass caching. Defaults to False.
927
929
 
928
930
  :return: An iterable of urns that match the filters.
929
931
  """
@@ -951,7 +953,8 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
951
953
  $query: String!,
952
954
  $orFilters: [AndFilterInput!],
953
955
  $batchSize: Int!,
954
- $scrollId: String) {
956
+ $scrollId: String,
957
+ $skipCache: Boolean!) {
955
958
 
956
959
  scrollAcrossEntities(input: {
957
960
  query: $query,
@@ -962,6 +965,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
962
965
  searchFlags: {
963
966
  skipHighlighting: true
964
967
  skipAggregates: true
968
+ skipCache: $skipCache
965
969
  }
966
970
  }) {
967
971
  nextScrollId
@@ -980,6 +984,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
980
984
  "query": query,
981
985
  "orFilters": orFilters,
982
986
  "batchSize": batch_size,
987
+ "skipCache": skip_cache,
983
988
  }
984
989
 
985
990
  for entity in self._scroll_across_entities(graphql_query, variables):
@@ -1085,7 +1090,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1085
1090
  "query": query,
1086
1091
  "orFilters": or_filters_final,
1087
1092
  "batchSize": batch_size,
1088
- "skipCache": "true" if skip_cache else "false",
1093
+ "skipCache": skip_cache,
1089
1094
  "fetchExtraFields": extra_source_fields,
1090
1095
  }
1091
1096
 
@@ -1429,6 +1434,41 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1429
1434
  related_aspects = response.get("relatedAspects", [])
1430
1435
  return reference_count, related_aspects
1431
1436
 
1437
+ def restore_indices(
1438
+ self,
1439
+ urn_pattern: str,
1440
+ aspect: Optional[str] = None,
1441
+ start: Optional[int] = None,
1442
+ batch_size: Optional[int] = None,
1443
+ ) -> str:
1444
+ """Restore the indices for a given urn or urn-like pattern.
1445
+
1446
+ Args:
1447
+ urn_pattern: The exact URN or a pattern (with % for wildcard) to match URNs.
1448
+ aspect: Optional aspect string to restore indices for a specific aspect.
1449
+ start: Optional integer to decide which row number of sql store to restore from. Default: 0.
1450
+ batch_size: Optional integer to decide how many rows to restore. Default: 10.
1451
+
1452
+ Returns:
1453
+ A string containing the result of the restore indices operation. This format is subject to change.
1454
+ """
1455
+ if "%" in urn_pattern:
1456
+ payload_obj: dict = {"urnLike": urn_pattern}
1457
+ else:
1458
+ payload_obj = {"urn": urn_pattern}
1459
+ if aspect is not None:
1460
+ payload_obj["aspect"] = aspect
1461
+ if start is not None:
1462
+ payload_obj["start"] = start
1463
+ if batch_size is not None:
1464
+ payload_obj["batchSize"] = batch_size
1465
+ raw_result = self._post_generic(
1466
+ f"{self._gms_server}/operations?action=restoreIndices", payload_obj
1467
+ )
1468
+ result = raw_result["value"]
1469
+ logger.debug(f"Restore indices result: {result}")
1470
+ return result
1471
+
1432
1472
  @functools.lru_cache
1433
1473
  def _make_schema_resolver(
1434
1474
  self,
@@ -63,7 +63,7 @@ class BigQueryIdentifierBuilder:
63
63
  )
64
64
 
65
65
  def gen_user_urn(self, user_email: str) -> str:
66
- return make_user_urn(user_email.split("@")[0])
66
+ return make_user_urn(user_email)
67
67
 
68
68
  def make_data_platform_urn(self) -> str:
69
69
  return make_data_platform_urn(self.platform)
@@ -125,6 +125,7 @@ class GEProfilingConfig(GEProfilingBaseConfig):
125
125
  description="Profile table only if it has been updated since these many number of days. "
126
126
  "If set to `null`, no constraint of last modified time for tables to profile. "
127
127
  "Supported only in `snowflake` and `BigQuery`.",
128
+ schema_extra={"supported_sources": ["snowflake", "bigquery"]},
128
129
  )
129
130
 
130
131
  profile_table_size_limit: Optional[int] = Field(
@@ -132,6 +133,9 @@ class GEProfilingConfig(GEProfilingBaseConfig):
132
133
  description="Profile tables only if their size is less than specified GBs. If set to `null`, "
133
134
  "no limit on the size of tables to profile. Supported only in `Snowflake`, `BigQuery` and "
134
135
  "`Databricks`. Supported for `Oracle` based on calculated size from gathered stats.",
136
+ schema_extra={
137
+ "supported_sources": ["snowflake", "bigquery", "unity-catalog", "oracle"]
138
+ },
135
139
  )
136
140
 
137
141
  profile_table_row_limit: Optional[int] = Field(
@@ -139,12 +143,14 @@ class GEProfilingConfig(GEProfilingBaseConfig):
139
143
  description="Profile tables only if their row count is less than specified count. "
140
144
  "If set to `null`, no limit on the row count of tables to profile. Supported only in "
141
145
  "`Snowflake`, `BigQuery`. Supported for `Oracle` based on gathered stats.",
146
+ schema_extra={"supported_sources": ["snowflake", "bigquery", "oracle"]},
142
147
  )
143
148
 
144
149
  profile_table_row_count_estimate_only: bool = Field(
145
150
  default=False,
146
151
  description="Use an approximate query for row count. This will be much faster but slightly "
147
152
  "less accurate. Only supported for Postgres and MySQL. ",
153
+ schema_extra={"supported_sources": ["postgres", "mysql"]},
148
154
  )
149
155
 
150
156
  # The query combiner enables us to combine multiple queries into a single query,
@@ -161,27 +167,32 @@ class GEProfilingConfig(GEProfilingBaseConfig):
161
167
  default=True,
162
168
  description="Whether to profile partitioned tables. Only BigQuery and Aws Athena supports this. "
163
169
  "If enabled, latest partition data is used for profiling.",
170
+ schema_extra={"supported_sources": ["athena", "bigquery"]},
164
171
  )
165
172
  partition_datetime: Optional[datetime.datetime] = Field(
166
173
  default=None,
167
174
  description="If specified, profile only the partition which matches this datetime. "
168
175
  "If not specified, profile the latest partition. Only Bigquery supports this.",
176
+ schema_extra={"supported_sources": ["bigquery"]},
169
177
  )
170
178
  use_sampling: bool = Field(
171
179
  default=True,
172
180
  description="Whether to profile column level stats on sample of table. Only BigQuery and Snowflake support this. "
173
181
  "If enabled, profiling is done on rows sampled from table. Sampling is not done for smaller tables. ",
182
+ schema_extra={"supported_sources": ["bigquery", "snowflake"]},
174
183
  )
175
184
 
176
185
  sample_size: int = Field(
177
186
  default=10000,
178
187
  description="Number of rows to be sampled from table for column level profiling."
179
188
  "Applicable only if `use_sampling` is set to True.",
189
+ schema_extra={"supported_sources": ["bigquery", "snowflake"]},
180
190
  )
181
191
 
182
192
  profile_external_tables: bool = Field(
183
193
  default=False,
184
194
  description="Whether to profile external tables. Only Snowflake and Redshift supports this.",
195
+ schema_extra={"supported_sources": ["redshift", "snowflake"]},
185
196
  )
186
197
 
187
198
  tags_to_ignore_sampling: Optional[List[str]] = pydantic.Field(
@@ -189,6 +189,21 @@ class KafkaConnectionTest:
189
189
  SourceCapability.SCHEMA_METADATA,
190
190
  "Schemas associated with each topic are extracted from the schema registry. Avro and Protobuf (certified), JSON (incubating). Schema references are supported.",
191
191
  )
192
+ @capability(
193
+ SourceCapability.DATA_PROFILING,
194
+ "Not supported",
195
+ supported=False,
196
+ )
197
+ @capability(
198
+ SourceCapability.LINEAGE_COARSE,
199
+ "Not supported. If you use Kafka Connect, the kafka-connect source can generate lineage.",
200
+ supported=False,
201
+ )
202
+ @capability(
203
+ SourceCapability.LINEAGE_FINE,
204
+ "Not supported",
205
+ supported=False,
206
+ )
192
207
  class KafkaSource(StatefulIngestionSourceBase, TestableSource):
193
208
  """
194
209
  This plugin extracts the following:
@@ -20,6 +20,7 @@ from datahub.ingestion.source.snowflake.snowflake_schema_gen import (
20
20
  SnowflakeSchemaGenerator,
21
21
  )
22
22
  from datahub.ingestion.source.snowflake.snowflake_utils import (
23
+ SnowflakeFilter,
23
24
  SnowflakeIdentifierBuilder,
24
25
  )
25
26
  from datahub.ingestion.source_report.time_window import BaseTimeWindowReport
@@ -81,6 +82,10 @@ class SnowflakeSummarySource(Source):
81
82
  profiler=None,
82
83
  aggregator=None,
83
84
  snowsight_url_builder=None,
85
+ filters=SnowflakeFilter(
86
+ filter_config=self.config,
87
+ structured_reporter=self.report,
88
+ ),
84
89
  )
85
90
 
86
91
  # Databases.
@@ -66,6 +66,14 @@ class MySQLConfig(MySQLConnectionConfig, TwoTierSQLAlchemyConfig):
66
66
  @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
67
67
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
68
68
  @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
69
+ @capability(
70
+ SourceCapability.LINEAGE_COARSE,
71
+ "Supported for views if `include_view_column_lineage` is enabled.",
72
+ )
73
+ @capability(
74
+ SourceCapability.LINEAGE_FINE,
75
+ "Supported for views if `include_view_column_lineage` is enabled.",
76
+ )
69
77
  class MySQLSource(TwoTierSQLAlchemySource):
70
78
  """
71
79
  This plugin extracts the following:
@@ -1087,7 +1087,7 @@ class SupersetSource(StatefulIngestionSourceBase):
1087
1087
  datasource_urn = self.get_datasource_urn_from_id(
1088
1088
  dataset_response, self.platform
1089
1089
  )
1090
- dataset_url = f"{self.config.display_uri}{dataset_response.get('result', {}).get('url', '')}"
1090
+ dataset_url = f"{self.config.display_uri}/explore/?datasource_type=table&datasource_id={dataset.id}"
1091
1091
 
1092
1092
  modified_actor = f"urn:li:corpuser:{self.owner_info.get((dataset_data.get('changed_by') or {}).get('id', -1), 'unknown')}"
1093
1093
  now = datetime.now().strftime("%I:%M%p on %B %d, %Y")
@@ -9,15 +9,6 @@ from datahub.sdk.entity_client import EntityClient
9
9
  from datahub.sdk.lineage_client import LineageClient
10
10
  from datahub.sdk.search_client import SearchClient
11
11
 
12
- try:
13
- from acryl_datahub_cloud.sdk import ( # type: ignore[import-not-found]
14
- ResolverClient,
15
- )
16
- except ImportError:
17
- from datahub.sdk.resolver_client import ( # type: ignore[assignment] # If the client is not installed, use the one from the SDK
18
- ResolverClient,
19
- )
20
-
21
12
 
22
13
  class DataHubClient:
23
14
  """Main client for interacting with DataHub.
@@ -101,7 +92,15 @@ class DataHubClient:
101
92
  return EntityClient(self)
102
93
 
103
94
  @property
104
- def resolve(self) -> ResolverClient:
95
+ def resolve(self): # type: ignore[report-untyped-call] # Not available due to circular import issues
96
+ try:
97
+ from acryl_datahub_cloud.sdk import ( # type: ignore[import-not-found]
98
+ ResolverClient,
99
+ )
100
+ except ImportError:
101
+ from datahub.sdk.resolver_client import ( # type: ignore[assignment] # If the client is not installed, use the one from the SDK
102
+ ResolverClient,
103
+ )
105
104
  return ResolverClient(self)
106
105
 
107
106
  @property