acryl-datahub 0.15.0.5rc9__py3-none-any.whl → 0.15.0.5rc10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (33) hide show
  1. {acryl_datahub-0.15.0.5rc9.dist-info → acryl_datahub-0.15.0.5rc10.dist-info}/METADATA +2491 -2491
  2. {acryl_datahub-0.15.0.5rc9.dist-info → acryl_datahub-0.15.0.5rc10.dist-info}/RECORD +33 -32
  3. datahub/_version.py +1 -1
  4. datahub/ingestion/graph/client.py +2 -1
  5. datahub/ingestion/graph/entity_versioning.py +201 -0
  6. datahub/ingestion/source/abs/report.py +2 -2
  7. datahub/ingestion/source/aws/sagemaker_processors/common.py +3 -2
  8. datahub/ingestion/source/bigquery_v2/bigquery_report.py +1 -1
  9. datahub/ingestion/source/delta_lake/report.py +2 -2
  10. datahub/ingestion/source/dynamodb/dynamodb.py +2 -1
  11. datahub/ingestion/source/elastic_search.py +2 -1
  12. datahub/ingestion/source/ge_profiling_config.py +11 -7
  13. datahub/ingestion/source/iceberg/iceberg_common.py +3 -2
  14. datahub/ingestion/source/identity/okta.py +2 -1
  15. datahub/ingestion/source/kafka/kafka.py +2 -1
  16. datahub/ingestion/source/kafka_connect/common.py +2 -1
  17. datahub/ingestion/source/ldap.py +2 -1
  18. datahub/ingestion/source/looker/lookml_config.py +9 -5
  19. datahub/ingestion/source/mongodb.py +2 -1
  20. datahub/ingestion/source/nifi.py +2 -1
  21. datahub/ingestion/source/powerbi/config.py +2 -2
  22. datahub/ingestion/source/powerbi_report_server/report_server.py +2 -1
  23. datahub/ingestion/source/redash.py +5 -5
  24. datahub/ingestion/source/salesforce.py +4 -1
  25. datahub/ingestion/source/snowflake/snowflake_report.py +2 -1
  26. datahub/ingestion/source/tableau/tableau.py +2 -1
  27. datahub/ingestion/source/unity/ge_profiler.py +55 -4
  28. datahub/ingestion/source/unity/report.py +1 -0
  29. datahub/ingestion/source_report/pulsar.py +5 -4
  30. {acryl_datahub-0.15.0.5rc9.dist-info → acryl_datahub-0.15.0.5rc10.dist-info}/LICENSE +0 -0
  31. {acryl_datahub-0.15.0.5rc9.dist-info → acryl_datahub-0.15.0.5rc10.dist-info}/WHEEL +0 -0
  32. {acryl_datahub-0.15.0.5rc9.dist-info → acryl_datahub-0.15.0.5rc10.dist-info}/entry_points.txt +0 -0
  33. {acryl_datahub-0.15.0.5rc9.dist-info → acryl_datahub-0.15.0.5rc10.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
- datahub/_version.py,sha256=KhEsNpGTq01OO4NFbvH7t1xq-yYSpEDm5CLJZkdZi6o,324
3
+ datahub/_version.py,sha256=VqyyvoJV4bnpYg2UMs1kyEsTgRq0wVADin5Gxtvya04,325
4
4
  datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
5
5
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -168,9 +168,10 @@ datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGo
168
168
  datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
169
169
  datahub/ingestion/glossary/datahub_classifier.py,sha256=O7wm6gQT1Jf2QSKdWjJQbS5oSzJwplXzfza26Gdq5Mg,7555
170
170
  datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
171
- datahub/ingestion/graph/client.py,sha256=e3JEBWnEaZcVBRjL1WA83anxJ1icR8bzO_eDetO1Rlw,65036
171
+ datahub/ingestion/graph/client.py,sha256=TR8bppz-_bXZbsqZsGG9sUFNyuFo5lch6GL1Oz3yTq4,65131
172
172
  datahub/ingestion/graph/config.py,sha256=_oha8Je7P80ZmrkZUAaRHyYbdMmTkMI5JkYjEP2Ri1Q,751
173
173
  datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
174
+ datahub/ingestion/graph/entity_versioning.py,sha256=PG_GKJrtSu9n1oewDJfgYDVhqVMll8NXE_i0slmcTm0,6871
174
175
  datahub/ingestion/graph/filters.py,sha256=UeUZQHoimavIYx-jXLA0WGkOUe10TaO8uEZkfa-QgNE,6188
175
176
  datahub/ingestion/reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
176
177
  datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py,sha256=697UOvhZb5gneESRXtIIYOSU74gE2P_BTw0TBhQ9I7w,9917
@@ -192,24 +193,24 @@ datahub/ingestion/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZ
192
193
  datahub/ingestion/source/confluent_schema_registry.py,sha256=WednrFENtANY7bWvrmMKoxEfFK9lnrMDLB0C-hXdJDQ,18808
193
194
  datahub/ingestion/source/csv_enricher.py,sha256=hlG2njZIytLi14SmxhoscWuXxSn0M-7uaajChUNdT_E,29553
194
195
  datahub/ingestion/source/demo_data.py,sha256=PbtCHlZx3wrKlOPPgkWhDQuPm7ZfIx2neXJUzbUi9YY,1305
195
- datahub/ingestion/source/elastic_search.py,sha256=kr7ABye8deKQcQ0G858sQPZAH8oWjjRequ7-q7xJFhc,22644
196
+ datahub/ingestion/source/elastic_search.py,sha256=FndfzzgzPVZ2FZZ6NSLDQPB3L-i5jHE5ic-ne3vyV2U,22712
196
197
  datahub/ingestion/source/feast.py,sha256=uZpeUkJsiNlvZcUkARiEuZT_3n6sbGc0yFzwqhtnefA,18103
197
198
  datahub/ingestion/source/file.py,sha256=pH-Qkjh5FQ2XvyYPE7Z8XEY4vUk_SUHxm8p8IxG12tU,15879
198
199
  datahub/ingestion/source/ge_data_profiler.py,sha256=l8ow9mnUUpvH4p8-ZJaKc6OdMHbb-45MOmehKuwWaSo,64932
199
- datahub/ingestion/source/ge_profiling_config.py,sha256=iY6DBEZBoisnKSNXVaY_pbmkI5z70-IsHbHqUN6kqfo,10807
200
+ datahub/ingestion/source/ge_profiling_config.py,sha256=FlWfXoVoayabVXNMB9qETEU0GX0az6HYqNUZRnIu_fQ,10866
200
201
  datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suabri1yV5geaAPjzg2eORDc,2559
201
- datahub/ingestion/source/ldap.py,sha256=Vnzg8tpwBYeyM-KBVVsUJvGZGBMJiCJ_i_FhxaFRQ9A,18627
202
+ datahub/ingestion/source/ldap.py,sha256=CNr3foofIpoCXu_GGqfcajlQE2qkHr5isYwVcDutdkk,18695
202
203
  datahub/ingestion/source/metabase.py,sha256=m9Gfhrs8F1z23ci8CIxdE5cW--25stgxg_IQTKwkFrk,31532
203
204
  datahub/ingestion/source/mlflow.py,sha256=pmIkmsfidi7dOGdQ61rab7m8AnKZhIRE2IA9in9HGFU,12144
204
205
  datahub/ingestion/source/mode.py,sha256=HVxhzMIY4HjkAG_T6y00Po2B9XwjALP6i5XQThuyYM4,63488
205
- datahub/ingestion/source/mongodb.py,sha256=Hucd3rfxwRcc_rNOJbpSPmSZdKqN6Fi9L7KcUZ80YKM,21104
206
- datahub/ingestion/source/nifi.py,sha256=BszXfFonfHB63Zt85lHDh4W_V-gIJKtxS6q3cdPDc4U,56021
206
+ datahub/ingestion/source/mongodb.py,sha256=0P3PHVvMSXFkFimGvQzOQZF7APjsFOyzQAVQjVlVbuk,21172
207
+ datahub/ingestion/source/nifi.py,sha256=FgIbZSCu-mcdnbIpqwvmADnUIxptogUq1sSEkrkwtrc,56089
207
208
  datahub/ingestion/source/openapi.py,sha256=MGsRLseZompW10UVMN_tU1GZgqPgTAM4lnqCJ8eVRoY,17386
208
209
  datahub/ingestion/source/openapi_parser.py,sha256=1_68wHWe_SzWYEyC1YVDw9vxoadKjW1yv8DecvyIhwY,13606
209
210
  datahub/ingestion/source/preset.py,sha256=fByqamRLnXxsfCGdLPzWN_5LJR_s2_G2f_zwSKUc8EA,3981
210
211
  datahub/ingestion/source/pulsar.py,sha256=7rTOEqYmeOuRZl5DG8d5OFkb4l9H6-1bETZfa-4DfmI,20163
211
- datahub/ingestion/source/redash.py,sha256=GH0MGV_huvKio9hMQ-jKdYIxYcHN6WnivrhWCw3I03E,29880
212
- datahub/ingestion/source/salesforce.py,sha256=O0pgRwIcHdmKUQzOpSGeaoxVUF4vvLjuHaAM4V9QhXE,31809
212
+ datahub/ingestion/source/redash.py,sha256=U0AfnYpZlAPN0peiu7pOpB8MQZ4JOO0yKEYChucMrlY,29915
213
+ datahub/ingestion/source/salesforce.py,sha256=RVFmZFq8Vo1-9YtBpl7jZiCKdRrUd_OLa3nYK3iPxS0,31983
213
214
  datahub/ingestion/source/source_registry.py,sha256=a2mLjJPLkSI-gYCTb_7U7Jo4D8jGknNQ_yScPIihXFk,1208
214
215
  datahub/ingestion/source/sql_queries.py,sha256=Ip7UZub7fgMh7P5jL_zJPY7lSkc9GGTy8GJ8lqZrcsE,9502
215
216
  datahub/ingestion/source/superset.py,sha256=-_90rfZtKG5vf5OSFS8lhqI-nGGtKPRwYYNAS_m1xmY,24592
@@ -217,7 +218,7 @@ datahub/ingestion/source/abs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
217
218
  datahub/ingestion/source/abs/config.py,sha256=Doecl1mA6JshJTNar7oTVR7wnWl4gMu64MBHp3hIVJc,6737
218
219
  datahub/ingestion/source/abs/datalake_profiler_config.py,sha256=Rkf64evufyVGPiE4VK8QAjzBiJFu85tOGMmJ0lJZ2Og,3600
219
220
  datahub/ingestion/source/abs/profiling.py,sha256=yKNCKpr6w7qpCH-baeSkNE9VjkN6eBot_weD-2_Jxzk,17579
220
- datahub/ingestion/source/abs/report.py,sha256=fzkTdTewYlWrTk4f2Cyl-e8RV4qw9wEVtm0cdKD-Xgw,542
221
+ datahub/ingestion/source/abs/report.py,sha256=CkRjsNn0Pab-ZPllxz3IUJI_r3x0T6urJePa_hJKi5U,586
221
222
  datahub/ingestion/source/abs/source.py,sha256=cuMezUzr-Smp5tok2ceYor5I5jp52NDMjfeN8kfIbvg,24816
222
223
  datahub/ingestion/source/apply/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
223
224
  datahub/ingestion/source/apply/datahub_apply.py,sha256=NGhbszi6ObfJoh3YPGjcVWHJsbNboVwKUgnrCSt8HJU,7629
@@ -228,7 +229,7 @@ datahub/ingestion/source/aws/s3_boto_utils.py,sha256=Y54jlLV5gLcuZ4Zs57kIW5dYHD8
228
229
  datahub/ingestion/source/aws/s3_util.py,sha256=OFypcgmVC6jnZM90-gjcPpAMtTV1lbnreCaMhCzNlzs,2149
229
230
  datahub/ingestion/source/aws/sagemaker.py,sha256=Bl2tkBYnrindgx61VHYgNovUF_Kp_fXNcivQn28vC2w,5254
230
231
  datahub/ingestion/source/aws/sagemaker_processors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
231
- datahub/ingestion/source/aws/sagemaker_processors/common.py,sha256=NvYfI8LHgDvhEZE7qp6qF1NSZ0_SQKhg3ivtdjsdpFg,2172
232
+ datahub/ingestion/source/aws/sagemaker_processors/common.py,sha256=x4ijMxKjZ-oJdqGyTwBp2J50uxYT1ejlVlGs8-vDXMU,2234
232
233
  datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py,sha256=3eYubXeOPARLNnNNlVawr1FxvPrtdD35QFkWldHn9w8,10384
233
234
  datahub/ingestion/source/aws/sagemaker_processors/job_classes.py,sha256=CfJkzjZU2uvZvw7qvmxfNgeWI1EvgHFY-7bn5Ih71no,9154
234
235
  datahub/ingestion/source/aws/sagemaker_processors/jobs.py,sha256=aHgQ4QMufdWAA62TNBoEPT3YSQKXg39IJ2-6MZXs8sw,32915
@@ -247,7 +248,7 @@ datahub/ingestion/source/bigquery_v2/bigquery_data_reader.py,sha256=DeT3v_Z82__8
247
248
  datahub/ingestion/source/bigquery_v2/bigquery_helper.py,sha256=QER3gY8e_k1_eNVj7cBso7ZzrWl_vO5PYSa6CpvqNx8,1554
248
249
  datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256=9_sfX8BE2vt9RjBMyq27UxCxBaSlD5o3L4gQxrwlPvA,4961
249
250
  datahub/ingestion/source/bigquery_v2/bigquery_queries.py,sha256=EoHo9twb0_QdX7Nvd1HJC1Yn0rqtrfR52EVk7Hu3XOQ,3296
250
- datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=qH8k8wyMlUVzUTVhSd3FgOMGCK1D5NYuC0KF8tez_Ys,7957
251
+ datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=v7_zkZzymKPmZKWAxnxmvmHC-8TQVGHUT-pBQFNehqc,7962
251
252
  datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=E5GOx4NWjyZM0xzdpBlNXbvDdKNfW9UtS64XtCYFpzI,31809
252
253
  datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=c1hlsgat7l27fQN8GwvHkdme7rQ4LqIQKFwwA8z7kqw,50824
253
254
  datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
@@ -286,7 +287,7 @@ datahub/ingestion/source/dbt/dbt_tests.py,sha256=Q5KISW_AOOWqyxmyOgJQquyX7xlfOqK
286
287
  datahub/ingestion/source/delta_lake/__init__.py,sha256=u5oqUeus81ONAtdl6o9Puw33ODSMun-0wLIamrZ4BUM,71
287
288
  datahub/ingestion/source/delta_lake/config.py,sha256=bVBwGjCPiXyjbCLQsamt4hAsKJMtMuxupKjwZEwtU78,3374
288
289
  datahub/ingestion/source/delta_lake/delta_lake_utils.py,sha256=VqIDPEXepOnlk4oWMeRaneSpQBlWmlCKAa1wGUl1sfk,1525
289
- datahub/ingestion/source/delta_lake/report.py,sha256=uqWWivPltlZ7dwpOOluTvHOKKsSusqihn67clCAwxoM,467
290
+ datahub/ingestion/source/delta_lake/report.py,sha256=c36maxN5yP4M69XIKx_wVRFIiNEzEkZ6I8ahvkUET9c,511
290
291
  datahub/ingestion/source/delta_lake/source.py,sha256=5VyE_ZYrop4JCTVhoXLjRXb1MRfWbIj0lMMmvNxsb80,13362
291
292
  datahub/ingestion/source/dremio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
292
293
  datahub/ingestion/source/dremio/dremio_api.py,sha256=ZsMdSAb41zB2v3pqpvW72-w7-Vg9b7TsrfgO6nL_w8k,33466
@@ -300,7 +301,7 @@ datahub/ingestion/source/dremio/dremio_source.py,sha256=XMx3EP0ciIaQjMffNljp8w-G
300
301
  datahub/ingestion/source/dremio/dremio_sql_queries.py,sha256=W0rcXawlwJOHNYr5o73rilMijtFOO3cVkn6pY-JLc6o,8186
301
302
  datahub/ingestion/source/dynamodb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
302
303
  datahub/ingestion/source/dynamodb/data_reader.py,sha256=vC77KpcP8LJN0g8wsPRDVw4sebv0ZWIP3tJkEIHaomA,3120
303
- datahub/ingestion/source/dynamodb/dynamodb.py,sha256=wcEQSfQak45yPNZN7pCUEQFmjyWCpqRk1WjJJz9E2Go,22395
304
+ datahub/ingestion/source/dynamodb/dynamodb.py,sha256=o2wM1cVmkAhur4uAbpBl-PxiRG3nO7sdA3sATQjJrMo,22463
304
305
  datahub/ingestion/source/fivetran/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
305
306
  datahub/ingestion/source/fivetran/config.py,sha256=BP3KRfAQ6H5qyEeJNu9vNfZNwLoyj4Tl2kXiLVR5DNM,9027
306
307
  datahub/ingestion/source/fivetran/data_classes.py,sha256=ecdUJH5BEze0yv-uFpKWPNaNmV1gORDA2XMFk0zhcBw,595
@@ -321,16 +322,16 @@ datahub/ingestion/source/grafana/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
321
322
  datahub/ingestion/source/grafana/grafana_source.py,sha256=3pU3xodPgS5lmnjuQ_u7F0XPzD_Y8MnPlMxRJ86qz4g,4960
322
323
  datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
323
324
  datahub/ingestion/source/iceberg/iceberg.py,sha256=2j-MKCa0o6m1btlgYssYTEAjcD6ZpOZJc14IYnY6TCA,27487
324
- datahub/ingestion/source/iceberg/iceberg_common.py,sha256=LEZaJleL5KJt1u_pLRUkeCqPEsthzH7tG8FgBwd9MC8,10218
325
+ datahub/ingestion/source/iceberg/iceberg_common.py,sha256=2zBuhUKyZ9jNHcPI4KsupHO77pv0sY1tqfLt7NP2dIo,10280
325
326
  datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=CkBB5fryMVoqqCM6eLSIeb4yP85ABHONNRm0QqZKrnw,9977
326
327
  datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
327
328
  datahub/ingestion/source/identity/azure_ad.py,sha256=9Hrvm4CSfc02yjnPUsCYSY4Qw9fXPnDFWLexab0mcpc,28559
328
- datahub/ingestion/source/identity/okta.py,sha256=LMbW5N1j9kMjcvGnmcff8LpIDPwmscmPPOTZD88KZZg,30758
329
+ datahub/ingestion/source/identity/okta.py,sha256=BZXzhT2Nusl75trvkkoovYHTIe3DK1cdvgwYRL-jDpg,30826
329
330
  datahub/ingestion/source/kafka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
330
- datahub/ingestion/source/kafka/kafka.py,sha256=ZK2NQi5GRobruwn58LR6JMKsnZl269YZzgYAMyI1Y3s,26504
331
+ datahub/ingestion/source/kafka/kafka.py,sha256=TX_9MFaecM1ZmwhX3krKsItEmNZX9c2i9024SmVo0io,26572
331
332
  datahub/ingestion/source/kafka/kafka_schema_registry_base.py,sha256=13XjSwqyVhH1CJUFHAbWdmmv_Rw0Ju_9HQdBmIzPNNA,566
332
333
  datahub/ingestion/source/kafka_connect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
333
- datahub/ingestion/source/kafka_connect/common.py,sha256=Ekb1K_J1eTgiH7LSP1AbEIf7NQh_2Vyu1lYX_Ggcqk4,7049
334
+ datahub/ingestion/source/kafka_connect/common.py,sha256=sfAiD48bDFfoXdqYiZuwP5BomJPP5DgaskgAyKkM3GY,7117
334
335
  datahub/ingestion/source/kafka_connect/kafka_connect.py,sha256=-ZmPayEYqYJ8rgWIGCkJPQd2z6C8FoZA1XXO3N55KbM,14036
335
336
  datahub/ingestion/source/kafka_connect/sink_connectors.py,sha256=sbLntDi0c52i8uUJmJ59sAcJeNErSogIJsr2-Zar-3Q,12902
336
337
  datahub/ingestion/source/kafka_connect/source_connectors.py,sha256=-rFNXKD8_EFoXuU1CiKF3wHnsBtKCJrcYDwdTno98Xk,21265
@@ -350,7 +351,7 @@ datahub/ingestion/source/looker/looker_template_language.py,sha256=W-SMICKBfIuiv
350
351
  datahub/ingestion/source/looker/looker_usage.py,sha256=qFBX7OHtIcarYIqFe0jQMrDV8MMPV_nN4PZrZRUznTw,23029
351
352
  datahub/ingestion/source/looker/looker_view_id_cache.py,sha256=92gDy6NONhJYBp92z_IBzDVZvezmUIkaBCZY1bdk6mE,4392
352
353
  datahub/ingestion/source/looker/lookml_concept_context.py,sha256=eDaze9S7cgO5eFP7-0azUMEJyR3EfMjmfj5pMPjpm8c,18066
353
- datahub/ingestion/source/looker/lookml_config.py,sha256=RuZkH3DDmII21gEsUvPsJi5gxWngbYkqBP06H8_n_Hs,11353
354
+ datahub/ingestion/source/looker/lookml_config.py,sha256=lulLcjAS1d8ihQseBe4HYn6ALKmJX1vl0H5mxiBzZ74,11395
354
355
  datahub/ingestion/source/looker/lookml_refinement.py,sha256=MkVreI0BylaCFyDHihDHaCcXyDSP84eF9p1h5d-ZHnM,9504
355
356
  datahub/ingestion/source/looker/lookml_source.py,sha256=PJBUJgZfZyvmasDf_LJC39SggLCA6vSfAbf1PdzviZU,43889
356
357
  datahub/ingestion/source/looker/str_functions.py,sha256=zceEX2ka_4WaWwWgEdyknUSz7X3GrO951BkwSbF2afo,766
@@ -362,7 +363,7 @@ datahub/ingestion/source/metadata/lineage.py,sha256=XiZGuY6k3O9qBmgo7AzosIndJHwr
362
363
  datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
363
364
  datahub/ingestion/source/neo4j/neo4j_source.py,sha256=76Z-2Td4_3PH2wWL1XJrpV2Egre5YVh6bMXeDS5ZonE,12405
364
365
  datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
365
- datahub/ingestion/source/powerbi/config.py,sha256=CzG-kdcGqB0nYnQ8W40Anb1gsbMZ5TcF_dL_I02xfrE,22764
366
+ datahub/ingestion/source/powerbi/config.py,sha256=DadG3Y3R-emmEL7vW2vutL3TXXVe-_t6DA_S2kWUvLA,22784
366
367
  datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=-njW1kJOy-LY5JFwJLhVQ0bMBj9NQz5TZhQqsSi_KsM,2285
367
368
  datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=5df3qvalCS9hZ46DPXs6XDcw9-IofGf8Eol_rUC7LHI,20329
368
369
  datahub/ingestion/source/powerbi/powerbi.py,sha256=xCNMgL-KuPGpIFv_PP1woyiddY_PpbX1HEl3aDk7F1c,54535
@@ -382,7 +383,7 @@ datahub/ingestion/source/powerbi/rest_api_wrapper/profiling_utils.py,sha256=bgcP
382
383
  datahub/ingestion/source/powerbi/rest_api_wrapper/query.py,sha256=VNw1Uvli6g0pnu9FpigYmnCdEPbVEipz7vdZU_WmHf4,616
383
384
  datahub/ingestion/source/powerbi_report_server/__init__.py,sha256=N9fGcrHXBbuPmx9rpGjd_jkMC3smXmfiwISDP1QZapk,324
384
385
  datahub/ingestion/source/powerbi_report_server/constants.py,sha256=i_hXcvPHjwk3VpTT5ef7s8dN9F6pJzPyRUiG5UzCCYI,3544
385
- datahub/ingestion/source/powerbi_report_server/report_server.py,sha256=Xsvu_FeteYNyLW_U0pER94-zQLLGUzU5tUEkhsLTQ2Y,20176
386
+ datahub/ingestion/source/powerbi_report_server/report_server.py,sha256=MVtCTNcrRwvcvr3J5eg3ZDUI__qD-kSU2hBYbX125d8,20244
386
387
  datahub/ingestion/source/powerbi_report_server/report_server_domain.py,sha256=bBYr9fz4zPEFeZZOkldzKm4SBMQdisdp-MMtaYI0n3s,11783
387
388
  datahub/ingestion/source/profiling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
388
389
  datahub/ingestion/source/profiling/common.py,sha256=4sZ58AeBV64KRfKAgjkg-UyNjAc3YERahQMmW4algAw,1426
@@ -442,7 +443,7 @@ datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=FBmiONx4EGHWV8
442
443
  datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
443
444
  datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=x6__7kmlIKXdnvENyN9AloE9h-vOlrjcWL95A2DGW5g,26968
444
445
  datahub/ingestion/source/snowflake/snowflake_query.py,sha256=-vjc2-sGKN0odt-IWEbx6Lhz4UlRwctlEHUnOr3_Mkg,38821
445
- datahub/ingestion/source/snowflake/snowflake_report.py,sha256=0K1g2ET-eHLRzIIsmR3iEuwyFKNYb0aETv7hAu2i92E,6386
446
+ datahub/ingestion/source/snowflake/snowflake_report.py,sha256=xSRNSG_iZcLTf9inNtlCQTDSNiDYm-jGpvAAGrRMTWI,6454
446
447
  datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=qG9MXutXcVyp5p59j2CPsj1zRPTdeActEqVpwlsgMKk,22217
447
448
  datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=c2PTnsMDD21qw_71T96xi9ylMpAXnTEyA1SK4qq528w,46105
448
449
  datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=maZyFkfrbVogEFM0tTKRiNp9c_1muv6YfleSd3q0umI,6341
@@ -497,7 +498,7 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
497
498
  datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=DziD57PbHn2Tcy51tYXCG-GQgyTGMUxnkuzVS_xihFY,4079
498
499
  datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
499
500
  datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
500
- datahub/ingestion/source/tableau/tableau.py,sha256=JMJL3v9P4NeU37HHk5JnDBKj__a26djaVUyM3ysEm38,153355
501
+ datahub/ingestion/source/tableau/tableau.py,sha256=iesiz5xbbWwQchdZraoyGt0Csiu7eUc-OxtD0K3uK5E,153423
501
502
  datahub/ingestion/source/tableau/tableau_common.py,sha256=iVyRI1cZcOEU_VPnR9CWVzv-OnbhDPJZApbggDUBaXk,26926
502
503
  datahub/ingestion/source/tableau/tableau_constant.py,sha256=ZcAeHsQUXVVL26ORly0ByZk_GJAFbxaKuJAlX_sYMac,2686
503
504
  datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=nSyx9RzC6TCQDm-cTVJ657qT8iDwzk_8JMKpohhmOc4,1046
@@ -506,12 +507,12 @@ datahub/ingestion/source/unity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
506
507
  datahub/ingestion/source/unity/analyze_profiler.py,sha256=2pqkFY30CfN4aHgFZZntjeG0hNhBytZJvXC13VfTc1I,4689
507
508
  datahub/ingestion/source/unity/config.py,sha256=IE20ybsTd082ilDrnfPXl1GmjRxbWipTTt_E_3JE7FI,14770
508
509
  datahub/ingestion/source/unity/connection_test.py,sha256=B143Wb28fS0V4GhygU9hzKqiArWBjsQO54IUCPf23dc,2586
509
- datahub/ingestion/source/unity/ge_profiler.py,sha256=DFQKOqryMWFg-NqwfFGPklNH2hHSmZGKs8ij8QmXd7w,6402
510
+ datahub/ingestion/source/unity/ge_profiler.py,sha256=rCwcXK-n_5tGQb_f-3BTO5LWOGH57flzEmrtCKFT_T8,8348
510
511
  datahub/ingestion/source/unity/hive_metastore_proxy.py,sha256=IAWWJjaW0si_UF52Se2D7wmdYRY_afUG4QlVmQu6xaw,15351
511
512
  datahub/ingestion/source/unity/proxy.py,sha256=qYgjw0COscvUk8TvgWwZKgYvkYyA3j4yc826IwfhIZg,18428
512
513
  datahub/ingestion/source/unity/proxy_profiling.py,sha256=WLqvYP6MziaisA4LYL4T_GA-kPt6Xdde7bfaYsjYw40,9663
513
514
  datahub/ingestion/source/unity/proxy_types.py,sha256=qrvHiwPzl5cPX-KRvcIGGeJVdr0I8XUQmoAI6ErZ-v8,9371
514
- datahub/ingestion/source/unity/report.py,sha256=0Y-ciHVTI6ZKNCJ5zWoQh3Ze1c_GMqmTMKFwzXDuuOg,2788
515
+ datahub/ingestion/source/unity/report.py,sha256=XFT9oQfvEB4RkTvWGgFOoQuLPUN_AIoPXZ79xeDhGHQ,2831
515
516
  datahub/ingestion/source/unity/source.py,sha256=hdHthF3c9bdGwiyhu324WB7oElTv7N6bA_70hja4Zbk,41929
516
517
  datahub/ingestion/source/unity/usage.py,sha256=0wETBAaZvHI_EGgBlxX3bKsVHEAdnUV8_bKI_lbyWjY,11500
517
518
  datahub/ingestion/source/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -524,7 +525,7 @@ datahub/ingestion/source_config/operation_config.py,sha256=hxF2RM0jk0HUPXYiliMni
524
525
  datahub/ingestion/source_config/pulsar.py,sha256=sklDkh62CrWV-i7Ifh6R3T3smYVso6gyRJG8HVc6RdA,5533
525
526
  datahub/ingestion/source_report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
526
527
  datahub/ingestion/source_report/ingestion_stage.py,sha256=SU_FKFZhShZATLcFr735i_hWpdqNGdAWoZxh22p3P1k,1787
527
- datahub/ingestion/source_report/pulsar.py,sha256=iKhzy644AjoFTV-gxyqBoXKMLwSMPxJFxU-3WDQRww0,1037
528
+ datahub/ingestion/source_report/pulsar.py,sha256=f6CMNw8TyPp3tuSGsLLPEhSvoQLXwxtaaM6GmNvsANU,1119
528
529
  datahub/ingestion/source_report/time_window.py,sha256=9yI5l2S1DcF7ClvUHLeN8m62I5vlhV9k-aQqSZh2l7w,229
529
530
  datahub/ingestion/transformer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
530
531
  datahub/ingestion/transformer/add_dataset_browse_path.py,sha256=7ngPAAAsdV8nsqFHZyHfO5j1vkCcf5zNqOkG2Cpx1Tw,3420
@@ -1000,9 +1001,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1000
1001
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1001
1002
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1002
1003
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1003
- acryl_datahub-0.15.0.5rc9.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1004
- acryl_datahub-0.15.0.5rc9.dist-info/METADATA,sha256=VuKgWOBCJTCnPCsvt0eB4LoZYj-ig56pUIgrs86xB7w,175375
1005
- acryl_datahub-0.15.0.5rc9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
1006
- acryl_datahub-0.15.0.5rc9.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
1007
- acryl_datahub-0.15.0.5rc9.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1008
- acryl_datahub-0.15.0.5rc9.dist-info/RECORD,,
1004
+ acryl_datahub-0.15.0.5rc10.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1005
+ acryl_datahub-0.15.0.5rc10.dist-info/METADATA,sha256=9INBTHoM-5OTwef7bW9Y31njlOljSaconFYwwbNHeSc,175378
1006
+ acryl_datahub-0.15.0.5rc10.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
1007
+ acryl_datahub-0.15.0.5rc10.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
1008
+ acryl_datahub-0.15.0.5rc10.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1009
+ acryl_datahub-0.15.0.5rc10.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "0.15.0.5rc9"
3
+ __version__ = "0.15.0.5rc10"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -40,6 +40,7 @@ from datahub.ingestion.graph.connections import (
40
40
  connections_gql,
41
41
  get_id_from_connection_urn,
42
42
  )
43
+ from datahub.ingestion.graph.entity_versioning import EntityVersioningAPI
43
44
  from datahub.ingestion.graph.filters import (
44
45
  RemovedStatusFilter,
45
46
  SearchFilterRule,
@@ -125,7 +126,7 @@ def _graphql_entity_type(entity_type: str) -> str:
125
126
  return entity_type
126
127
 
127
128
 
128
- class DataHubGraph(DatahubRestEmitter):
129
+ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
129
130
  def __init__(self, config: DatahubClientConfig) -> None:
130
131
  self.config = config
131
132
  super().__init__(
@@ -0,0 +1,201 @@
1
+ import uuid
2
+ from typing import Dict, Optional, Protocol, Type
3
+
4
+ from datahub.emitter.mce_builder import Aspect
5
+ from datahub.metadata.schema_classes import (
6
+ VersionPropertiesClass,
7
+ VersionSetPropertiesClass,
8
+ )
9
+ from datahub.metadata.urns import VersionSetUrn
10
+ from datahub.utilities.urns.urn import guess_entity_type
11
+
12
+
13
+ class DataHubGraphProtocol(Protocol):
14
+ def execute_graphql(
15
+ self,
16
+ query: str,
17
+ variables: Optional[Dict],
18
+ operation_name: Optional[str] = None,
19
+ format_exception: bool = True,
20
+ ) -> Dict: ...
21
+
22
+ def get_aspect(
23
+ self,
24
+ entity_urn: str,
25
+ aspect_type: Type[Aspect],
26
+ version: int = 0,
27
+ ) -> Optional[Aspect]: ...
28
+
29
+
30
+ class EntityVersioningAPI(DataHubGraphProtocol):
31
+ LINK_VERSION_MUTATION = """
32
+ mutation($input: LinkVersionInput!) {
33
+ linkAssetVersion(input: $input) {
34
+ urn
35
+ }
36
+ }
37
+ """
38
+
39
+ UNLINK_VERSION_MUTATION = """
40
+ mutation($input: UnlinkVersionInput!) {
41
+ unlinkAssetVersion(input: $input) {
42
+ urn
43
+ }
44
+ }
45
+ """
46
+
47
+ def link_asset_to_version_set(
48
+ self,
49
+ asset_urn: str,
50
+ version_set_urn: Optional[str],
51
+ label: str,
52
+ *,
53
+ comment: Optional[str] = None,
54
+ ) -> Optional[str]:
55
+ """Sets an entity as the latest version of a version set.
56
+ Can also be used to create a new version set, with `asset_urn` as the first version.
57
+
58
+ Args:
59
+ asset_urn: URN of the entity.
60
+ version_set_urn: URN of the version set, or None to generate a new version set urn
61
+ label: Label of the version.
62
+ comment: Comment about the version.
63
+
64
+ Returns:
65
+ URN of the version set to which `asset_urn` was linked,
66
+ or None if the `asset_urn` was already linked to `version_set_urn`.
67
+ """
68
+
69
+ entity_type = guess_entity_type(asset_urn)
70
+ if version_set_urn is None:
71
+ version_set_urn = VersionSetUrn(str(uuid.uuid4()), entity_type).urn()
72
+ elif guess_entity_type(version_set_urn) != "versionSet":
73
+ raise ValueError(f"Expected version set URN, got {version_set_urn}")
74
+
75
+ entity_version = self.get_aspect(asset_urn, VersionPropertiesClass)
76
+ if entity_version and entity_version.versionSet:
77
+ if entity_version.versionSet == version_set_urn:
78
+ return None
79
+ else:
80
+ raise ValueError(
81
+ f"Asset {asset_urn} is already a version of {entity_version.versionSet}"
82
+ )
83
+
84
+ variables = {
85
+ "input": {
86
+ "versionSet": version_set_urn,
87
+ "linkedEntity": asset_urn,
88
+ "version": label,
89
+ "comment": comment,
90
+ }
91
+ }
92
+ response = self.execute_graphql(self.LINK_VERSION_MUTATION, variables)
93
+ try:
94
+ return response["linkAssetVersion"]["urn"]
95
+ except KeyError:
96
+ raise ValueError(f"Unexpected response: {response}")
97
+
98
+ def link_asset_to_versioned_asset(
99
+ self,
100
+ new_asset_urn: str,
101
+ old_asset_urn: str,
102
+ label: str,
103
+ *,
104
+ comment: Optional[str] = None,
105
+ ) -> Optional[str]:
106
+ """Sets an entity as the latest version of an existing versioned entity.
107
+
108
+ Args:
109
+ new_asset_urn: URN of the new latest entity.
110
+ old_asset_urn: URN of an existing versioned entity to link onto.
111
+ label: Label of the version.
112
+ comment: Comment about the version.
113
+
114
+ Returns:
115
+ URN of the version set to which `new_asset_urn` was linked,
116
+ or None if the `new_asset_urn` was already linked to `old_asset_urn`.
117
+ """
118
+
119
+ new_entity_type = guess_entity_type(new_asset_urn)
120
+ old_entity_type = guess_entity_type(old_asset_urn)
121
+ if new_entity_type != old_entity_type:
122
+ raise ValueError(
123
+ f"Expected URNs of the same type, got {new_entity_type} and {old_entity_type}"
124
+ )
125
+
126
+ old_entity_version = self.get_aspect(old_asset_urn, VersionPropertiesClass)
127
+ if not old_entity_version:
128
+ raise ValueError(f"Asset {old_asset_urn} is not versioned")
129
+
130
+ new_entity_version = self.get_aspect(new_asset_urn, VersionPropertiesClass)
131
+ if new_entity_version:
132
+ if new_entity_version.versionSet == old_entity_version.versionSet:
133
+ return None
134
+ else:
135
+ raise ValueError(
136
+ f"Asset {new_asset_urn} is already a version of {new_entity_version.versionSet}"
137
+ )
138
+
139
+ return self.link_asset_to_version_set(
140
+ new_asset_urn, old_entity_version.versionSet, label, comment=comment
141
+ )
142
+
143
+ def unlink_asset_from_version_set(self, asset_urn: str) -> Optional[str]:
144
+ """Unlinks an entity from its version set.
145
+
146
+ Args:
147
+ asset_urn: URN of the entity to unlink from its version set.
148
+
149
+ Returns:
150
+ If successful, the URN of the version set from which `asset_urn` was unlinked,
151
+ or None if `asset_urn` was not linked to any version set.
152
+ """
153
+
154
+ entity_version = self.get_aspect(asset_urn, VersionPropertiesClass)
155
+ if not entity_version:
156
+ return None
157
+
158
+ variables = {
159
+ "input": {
160
+ "versionSet": entity_version.versionSet,
161
+ "unlinkedEntity": asset_urn,
162
+ }
163
+ }
164
+ response = self.execute_graphql(self.UNLINK_VERSION_MUTATION, variables)
165
+ try:
166
+ return response["unlinkAssetVersion"]["urn"]
167
+ except KeyError:
168
+ raise ValueError(f"Unexpected response: {response}")
169
+
170
+ def unlink_latest_asset_from_version_set(
171
+ self, version_set_urn: str
172
+ ) -> Optional[str]:
173
+ """Unlinks the latest version of a version set.
174
+
175
+ Args:
176
+ version_set_urn: URN of the version set.
177
+
178
+ Returns:
179
+ If successful, the URN of the entity that was unlinked from `version_set_urn`,
180
+ or None if no entity was unlinked.
181
+ """
182
+
183
+ version_set_properties = self.get_aspect(
184
+ version_set_urn, VersionSetPropertiesClass
185
+ )
186
+ if not version_set_properties:
187
+ raise ValueError(
188
+ f"Version set {version_set_urn} does not exist or has no versions"
189
+ )
190
+
191
+ variables = {
192
+ "input": {
193
+ "versionSet": version_set_urn,
194
+ "unlinkedEntity": version_set_properties.latest,
195
+ }
196
+ }
197
+ response = self.execute_graphql(self.UNLINK_VERSION_MUTATION, variables)
198
+ try:
199
+ return response["unlinkAssetVersion"]["urn"]
200
+ except KeyError:
201
+ raise ValueError(f"Unexpected response: {response}")
@@ -1,16 +1,16 @@
1
1
  import dataclasses
2
2
  from dataclasses import field as dataclass_field
3
- from typing import List
4
3
 
5
4
  from datahub.ingestion.source.state.stale_entity_removal_handler import (
6
5
  StaleEntityRemovalSourceReport,
7
6
  )
7
+ from datahub.utilities.lossy_collections import LossyList
8
8
 
9
9
 
10
10
  @dataclasses.dataclass
11
11
  class DataLakeSourceReport(StaleEntityRemovalSourceReport):
12
12
  files_scanned = 0
13
- filtered: List[str] = dataclass_field(default_factory=list)
13
+ filtered: LossyList[str] = dataclass_field(default_factory=LossyList)
14
14
 
15
15
  def report_file_scanned(self) -> None:
16
16
  self.files_scanned += 1
@@ -1,5 +1,5 @@
1
1
  from dataclasses import dataclass, field
2
- from typing import Dict, List, Optional, Union
2
+ from typing import Dict, Optional, Union
3
3
 
4
4
  from pydantic.fields import Field
5
5
 
@@ -9,6 +9,7 @@ from datahub.ingestion.source.state.stale_entity_removal_handler import (
9
9
  StatefulIngestionConfigBase,
10
10
  StatefulStaleMetadataRemovalConfig,
11
11
  )
12
+ from datahub.utilities.lossy_collections import LossyList
12
13
 
13
14
 
14
15
  class SagemakerSourceConfig(
@@ -42,7 +43,7 @@ class SagemakerSourceReport(StaleEntityRemovalSourceReport):
42
43
  jobs_scanned = 0
43
44
  jobs_processed = 0
44
45
  datasets_scanned = 0
45
- filtered: List[str] = field(default_factory=list)
46
+ filtered: LossyList[str] = field(default_factory=LossyList)
46
47
  model_endpoint_lineage = 0
47
48
  model_group_lineage = 0
48
49
 
@@ -141,7 +141,7 @@ class BigQueryV2Report(
141
141
  profiling_skipped_invalid_partition_type: Dict[str, str] = field(
142
142
  default_factory=TopKDict
143
143
  )
144
- profiling_skipped_partition_profiling_disabled: List[str] = field(
144
+ profiling_skipped_partition_profiling_disabled: LossyList[str] = field(
145
145
  default_factory=LossyList
146
146
  )
147
147
  allow_pattern: Optional[str] = None
@@ -1,14 +1,14 @@
1
1
  import dataclasses
2
2
  from dataclasses import field as dataclass_field
3
- from typing import List
4
3
 
5
4
  from datahub.ingestion.api.source import SourceReport
5
+ from datahub.utilities.lossy_collections import LossyList
6
6
 
7
7
 
8
8
  @dataclasses.dataclass
9
9
  class DeltaLakeSourceReport(SourceReport):
10
10
  files_scanned = 0
11
- filtered: List[str] = dataclass_field(default_factory=list)
11
+ filtered: LossyList[str] = dataclass_field(default_factory=LossyList)
12
12
 
13
13
  def report_file_scanned(self) -> None:
14
14
  self.files_scanned += 1
@@ -68,6 +68,7 @@ from datahub.metadata.schema_classes import (
68
68
  StringTypeClass,
69
69
  UnionTypeClass,
70
70
  )
71
+ from datahub.utilities.lossy_collections import LossyList
71
72
  from datahub.utilities.registries.domain_registry import DomainRegistry
72
73
 
73
74
  MAX_ITEMS_TO_RETRIEVE = 100
@@ -120,7 +121,7 @@ class DynamoDBConfig(
120
121
 
121
122
  @dataclass
122
123
  class DynamoDBSourceReport(StaleEntityRemovalSourceReport, ClassificationReportMixin):
123
- filtered: List[str] = field(default_factory=list)
124
+ filtered: LossyList[str] = field(default_factory=LossyList)
124
125
 
125
126
  def report_dropped(self, name: str) -> None:
126
127
  self.filtered.append(name)
@@ -62,6 +62,7 @@ from datahub.metadata.schema_classes import (
62
62
  SubTypesClass,
63
63
  )
64
64
  from datahub.utilities.config_clean import remove_protocol
65
+ from datahub.utilities.lossy_collections import LossyList
65
66
  from datahub.utilities.urns.dataset_urn import DatasetUrn
66
67
 
67
68
  logger = logging.getLogger(__name__)
@@ -189,7 +190,7 @@ class ElasticToSchemaFieldConverter:
189
190
  @dataclass
190
191
  class ElasticsearchSourceReport(SourceReport):
191
192
  index_scanned: int = 0
192
- filtered: List[str] = field(default_factory=list)
193
+ filtered: LossyList[str] = field(default_factory=LossyList)
193
194
 
194
195
  def report_index_scanned(self, index: str) -> None:
195
196
  self.index_scanned += 1
@@ -115,26 +115,30 @@ class GEProfilingConfig(GEProfilingBaseConfig):
115
115
  )
116
116
  max_number_of_fields_to_profile: Optional[pydantic.PositiveInt] = Field(
117
117
  default=None,
118
- description="A positive integer that specifies the maximum number of columns to profile for any table. `None` implies all columns. The cost of profiling goes up significantly as the number of columns to profile goes up.",
118
+ description="A positive integer that specifies the maximum number of columns to profile for "
119
+ "any table. `None` implies all columns. The cost of profiling goes up significantly as the "
120
+ "number of columns to profile goes up.",
119
121
  )
120
122
 
121
123
  profile_if_updated_since_days: Optional[pydantic.PositiveFloat] = Field(
122
124
  default=None,
123
- description="Profile table only if it has been updated since these many number of days. If set to `null`, no constraint of last modified time for tables to profile. Supported only in `snowflake` and `BigQuery`.",
125
+ description="Profile table only if it has been updated since these many number of days. "
126
+ "If set to `null`, no constraint of last modified time for tables to profile. "
127
+ "Supported only in `snowflake` and `BigQuery`.",
124
128
  )
125
129
 
126
130
  profile_table_size_limit: Optional[int] = Field(
127
131
  default=5,
128
132
  description="Profile tables only if their size is less than specified GBs. If set to `null`, "
129
- "no limit on the size of tables to profile. Supported only in `snowflake` and `BigQuery`"
130
- "Supported for `oracle` based on calculated size from gathered stats.",
133
+ "no limit on the size of tables to profile. Supported only in `Snowflake`, `BigQuery` and "
134
+ "`Databricks`. Supported for `Oracle` based on calculated size from gathered stats.",
131
135
  )
132
136
 
133
137
  profile_table_row_limit: Optional[int] = Field(
134
138
  default=5000000,
135
- description="Profile tables only if their row count is less than specified count. If set to `null`, "
136
- "no limit on the row count of tables to profile. Supported only in `snowflake` and `BigQuery`"
137
- "Supported for `oracle` based on gathered stats.",
139
+ description="Profile tables only if their row count is less than specified count. "
140
+ "If set to `null`, no limit on the row count of tables to profile. Supported only in "
141
+ "`Snowflake`, `BigQuery`. Supported for `Oracle` based on gathered stats.",
138
142
  )
139
143
 
140
144
  profile_table_row_count_estimate_only: bool = Field(
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
  from dataclasses import dataclass, field
3
- from typing import Any, Dict, List, Optional
3
+ from typing import Any, Dict, Optional
4
4
 
5
5
  from humanfriendly import format_timespan
6
6
  from pydantic import Field, validator
@@ -20,6 +20,7 @@ from datahub.ingestion.source_config.operation_config import (
20
20
  OperationConfig,
21
21
  is_profiling_enabled,
22
22
  )
23
+ from datahub.utilities.lossy_collections import LossyList
23
24
  from datahub.utilities.stats_collections import TopKDict, int_top_k_dict
24
25
 
25
26
  logger = logging.getLogger(__name__)
@@ -198,7 +199,7 @@ class TimingClass:
198
199
  class IcebergSourceReport(StaleEntityRemovalSourceReport):
199
200
  tables_scanned: int = 0
200
201
  entities_profiled: int = 0
201
- filtered: List[str] = field(default_factory=list)
202
+ filtered: LossyList[str] = field(default_factory=LossyList)
202
203
  load_table_timings: TimingClass = field(default_factory=TimingClass)
203
204
  processing_table_timings: TimingClass = field(default_factory=TimingClass)
204
205
  profiling_table_timings: TimingClass = field(default_factory=TimingClass)
@@ -50,6 +50,7 @@ from datahub.metadata.schema_classes import (
50
50
  OriginTypeClass,
51
51
  StatusClass,
52
52
  )
53
+ from datahub.utilities.lossy_collections import LossyList
53
54
 
54
55
  logger = logging.getLogger(__name__)
55
56
  nest_asyncio.apply()
@@ -173,7 +174,7 @@ class OktaConfig(StatefulIngestionConfigBase, ConfigModel):
173
174
 
174
175
  @dataclass
175
176
  class OktaSourceReport(StaleEntityRemovalSourceReport):
176
- filtered: List[str] = field(default_factory=list)
177
+ filtered: LossyList[str] = field(default_factory=LossyList)
177
178
 
178
179
  def report_filtered(self, name: str) -> None:
179
180
  self.filtered.append(name)
@@ -73,6 +73,7 @@ from datahub.metadata.schema_classes import (
73
73
  OwnershipSourceTypeClass,
74
74
  SubTypesClass,
75
75
  )
76
+ from datahub.utilities.lossy_collections import LossyList
76
77
  from datahub.utilities.mapping import Constants, OperationProcessor
77
78
  from datahub.utilities.registries.domain_registry import DomainRegistry
78
79
  from datahub.utilities.str_enum import StrEnum
@@ -190,7 +191,7 @@ def get_kafka_admin_client(
190
191
  @dataclass
191
192
  class KafkaSourceReport(StaleEntityRemovalSourceReport):
192
193
  topics_scanned: int = 0
193
- filtered: List[str] = field(default_factory=list)
194
+ filtered: LossyList[str] = field(default_factory=LossyList)
194
195
 
195
196
  def report_topic_scanned(self, topic: str) -> None:
196
197
  self.topics_scanned += 1