acryl-datahub 1.0.0.1rc5__py3-none-any.whl → 1.0.0.1rc7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (32) hide show
  1. {acryl_datahub-1.0.0.1rc5.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/METADATA +2451 -2451
  2. {acryl_datahub-1.0.0.1rc5.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/RECORD +32 -32
  3. datahub/_version.py +1 -1
  4. datahub/cli/specific/dataset_cli.py +26 -10
  5. datahub/emitter/mcp_builder.py +8 -0
  6. datahub/emitter/rest_emitter.py +13 -5
  7. datahub/errors.py +4 -0
  8. datahub/ingestion/api/source.py +2 -1
  9. datahub/ingestion/api/source_helpers.py +9 -1
  10. datahub/ingestion/graph/client.py +20 -9
  11. datahub/ingestion/graph/filters.py +41 -16
  12. datahub/ingestion/sink/datahub_rest.py +2 -2
  13. datahub/ingestion/source/cassandra/cassandra.py +1 -10
  14. datahub/ingestion/source/common/subtypes.py +1 -0
  15. datahub/ingestion/source/iceberg/iceberg.py +159 -102
  16. datahub/ingestion/source/iceberg/iceberg_profiler.py +21 -18
  17. datahub/ingestion/source/powerbi/config.py +31 -4
  18. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  19. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +111 -10
  20. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  21. datahub/ingestion/source/powerbi/powerbi.py +12 -1
  22. datahub/ingestion/source/sigma/config.py +3 -4
  23. datahub/ingestion/source/sigma/sigma.py +10 -6
  24. datahub/ingestion/source/sql/oracle.py +51 -4
  25. datahub/ingestion/source/usage/usage_common.py +0 -65
  26. datahub/sdk/search_client.py +81 -8
  27. datahub/sdk/search_filters.py +73 -11
  28. datahub/utilities/threaded_iterator_executor.py +16 -3
  29. {acryl_datahub-1.0.0.1rc5.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/WHEEL +0 -0
  30. {acryl_datahub-1.0.0.1rc5.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/entry_points.txt +0 -0
  31. {acryl_datahub-1.0.0.1rc5.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/licenses/LICENSE +0 -0
  32. {acryl_datahub-1.0.0.1rc5.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,9 @@
1
- acryl_datahub-1.0.0.1rc5.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.0.0.1rc7.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=WcqnUOhppm3OjJxRxReX0PiJpy4Wv0kAAnaBQ0m4FCw,323
4
+ datahub/_version.py,sha256=AZj-rwp4edRcZvS9Mq4fxTeV64QHFW-6zysNAtjc2qg,323
5
5
  datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
6
- datahub/errors.py,sha256=bwtiNzFdVFze0IVKDEXQutkwk5j7cZkfXCUYCZIDSYg,565
6
+ datahub/errors.py,sha256=BzKdcmYseHOt36zfjJXc17WNutFhp9Y23cU_L6cIkxc,612
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  datahub/_codegen/aspect.py,sha256=PJRa-Z4ouXHq3OkulfyWhwZn-fFUBDK_UPvmqaWdbWk,1063
@@ -86,7 +86,7 @@ datahub/cli/specific/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
86
86
  datahub/cli/specific/assertions_cli.py,sha256=q0ODpyWS3LVR8UbF3NM6KRisabodJ0UUwyPty9a8AIk,5375
87
87
  datahub/cli/specific/datacontract_cli.py,sha256=IkBovwuPT5jNB8X-8AQJRO4C9cFSNm1at8v4YctLFgQ,2531
88
88
  datahub/cli/specific/dataproduct_cli.py,sha256=wSksU4xjGvAZJiiI7rSyjSItTu72oBPiXZ0-UL81zn0,15091
89
- datahub/cli/specific/dataset_cli.py,sha256=mR5YIDos4McjqlaqufNNVPx6YSUsbOm21eyNKqAFAJA,7839
89
+ datahub/cli/specific/dataset_cli.py,sha256=SYxhGLzv8ZClTiE3rZE99blxh15ZMyQCQqpJWx9SRKc,8570
90
90
  datahub/cli/specific/file_loader.py,sha256=YMyv_evdKyHSft5Tm_kOcqJ4ALpRmMm54ZJAyl7Nxqs,773
91
91
  datahub/cli/specific/forms_cli.py,sha256=OLVeG8NtK1eDBuUKCT5Ald35np8__f8mLzbZM_zUfWU,1484
92
92
  datahub/cli/specific/group_cli.py,sha256=xPUYk48VbVXLMj-z9VNW0RZzXOe4rQsc2jLwSOGCoec,1967
@@ -122,11 +122,11 @@ datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvVi
122
122
  datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
123
123
  datahub/emitter/mce_builder.py,sha256=i-iLLdnuy7h1JrzwC2sCtQthbso-cNj1uijOQZKHbeA,16717
124
124
  datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
125
- datahub/emitter/mcp_builder.py,sha256=Q1bX2BthNvZ7ae71XYF6ICoiN8IOqaAd_h3zOct57Q0,11752
125
+ datahub/emitter/mcp_builder.py,sha256=JyAC8obvkf6ZpINJ8I2p-Ofr52-tuoQBDbxp-bhjyrM,11871
126
126
  datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
127
127
  datahub/emitter/request_helper.py,sha256=HpI9a9W0TzoVbrs584rF8P8w-IT_iKLmvYmO_6IHhXs,1008
128
128
  datahub/emitter/response_helper.py,sha256=h2hrZYiv4xfauD_lHPW_fN_AV8KhWNM4CVd-Lat2vT0,4608
129
- datahub/emitter/rest_emitter.py,sha256=NhA-4LnLxAHVz1cLiYqFnfX2YZTsCLdOBaFUtqqJpPs,30197
129
+ datahub/emitter/rest_emitter.py,sha256=PzZkt0JlCnexJgqzYQxSQgMcHj-qDg8lIsmhUn4GPfU,30631
130
130
  datahub/emitter/serialization_helper.py,sha256=q12Avmf70Vy4ttQGMJoTKlE5EsybMKNg2w3MQeZiHvk,3652
131
131
  datahub/emitter/sql_parsing_builder.py,sha256=Cr5imZrm3dYDSCACt5MFscgHCtVbHTD6IjUmsvsKoEs,11991
132
132
  datahub/emitter/synchronized_file_emitter.py,sha256=s4ATuxalI4GDAkrZTaGSegxBdvvNPZ9jRSdtElU0kNs,1805
@@ -145,8 +145,8 @@ datahub/ingestion/api/registry.py,sha256=LbdZr89465Lj7ptQRVB4vI1JR1igWABvQFj9-WX
145
145
  datahub/ingestion/api/report.py,sha256=eM_TWWz6iJNd-c_S2_4eg2qKLGYP8vSROb_TMiCwBhY,4644
146
146
  datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
147
147
  datahub/ingestion/api/sink.py,sha256=nfal7nsYY1AT2WQRjqO48uAHitpjax7TsRVzYXnqbeM,4918
148
- datahub/ingestion/api/source.py,sha256=Kz8xo0IY_5O3p5WE1i5dTmSK9IU20nqo4x6fvWcMAYw,19303
149
- datahub/ingestion/api/source_helpers.py,sha256=poP6EvkLsaiPM5fhjS5bcf-JMHMdPNMLv-eXCIVMUzM,19971
148
+ datahub/ingestion/api/source.py,sha256=HrQahSEBeapMDnW8S6wSEyNLLE9RCs2R6eUrVaibuuc,19349
149
+ datahub/ingestion/api/source_helpers.py,sha256=OhgBLdpUIuqF_gl4uV8Y2csp-z97zzXeFj2I5aUypCI,20158
150
150
  datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
151
151
  datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL74lo,6327
152
152
  datahub/ingestion/api/auto_work_units/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -171,11 +171,11 @@ datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGo
171
171
  datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
172
172
  datahub/ingestion/glossary/datahub_classifier.py,sha256=O7wm6gQT1Jf2QSKdWjJQbS5oSzJwplXzfza26Gdq5Mg,7555
173
173
  datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
174
- datahub/ingestion/graph/client.py,sha256=rdX2DXqTXyLyS1_qiUzc3zzIE8CFheP2pYi1I68r6Dc,65567
174
+ datahub/ingestion/graph/client.py,sha256=Qtjf5YrQeQzcTb0qxr6-y4MSEKSJm8f0hO6BoeRA_yI,65916
175
175
  datahub/ingestion/graph/config.py,sha256=_oha8Je7P80ZmrkZUAaRHyYbdMmTkMI5JkYjEP2Ri1Q,751
176
176
  datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
177
177
  datahub/ingestion/graph/entity_versioning.py,sha256=nrcNz0Qm6kpE6oTu_mrYUQDx14KPspBTc6R9SyFUY6c,6901
178
- datahub/ingestion/graph/filters.py,sha256=VFZKmef7ay1sQ5zRDDC1M_i6T96VzIgs-FzMs5eibiQ,7347
178
+ datahub/ingestion/graph/filters.py,sha256=hZ8YOQRxC0_mbAx_SLkgqyYXr0Fw3O4U2wo2UMuDHJY,8653
179
179
  datahub/ingestion/reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
180
180
  datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py,sha256=iEulcZMLBQuUfe9MAYyobMekvMcNm4dqVcS_C_2KfrI,9736
181
181
  datahub/ingestion/reporting/file_reporter.py,sha256=tiWukmMxHrTQI3rOAumsq6lRlw8T6spqpS6XBDYnrZU,1640
@@ -190,7 +190,7 @@ datahub/ingestion/sink/blackhole.py,sha256=-jYcWo4i8q7312bCIoHrGr7nT9JdPvA7c4jvS
190
190
  datahub/ingestion/sink/console.py,sha256=TZfhA0Ec2eNCrMH7RRy2JOdUE-U-hkoIQrPm1CmKLQs,591
191
191
  datahub/ingestion/sink/datahub_kafka.py,sha256=_cjuXu5I6G0zJ2UK7hMbaKjMPZXeIwRMgm7CVeTiNtc,2578
192
192
  datahub/ingestion/sink/datahub_lite.py,sha256=7u2aWm7ENLshKHl-PkjJg6Mrw4bWs8sTfKIBz4mm8Ak,1879
193
- datahub/ingestion/sink/datahub_rest.py,sha256=4hvMDUxHMJXGgk3Iy7fcYGKixjvVd9DHD03X-F3kOg0,12976
193
+ datahub/ingestion/sink/datahub_rest.py,sha256=0te9kxDXAJU8A7wfEhDb4R9VDBYYDpy-YMPJZWEEKCM,12982
194
194
  datahub/ingestion/sink/file.py,sha256=SxXJPJpkIGoaqRjCcSmj2ZE3xE4rLlBABBGwpTj5LWI,3271
195
195
  datahub/ingestion/sink/sink_registry.py,sha256=JRBWx8qEYg0ubSTyhqwgSWctgxwyp6fva9GoN2LwBao,490
196
196
  datahub/ingestion/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -263,7 +263,7 @@ datahub/ingestion/source/bigquery_v2/queries.py,sha256=c1BpeQP8p8y-FOhmiQkkY2IqG
263
263
  datahub/ingestion/source/bigquery_v2/queries_extractor.py,sha256=_5cAXVU8b8T_nAPDsvN2JRd2dmM1t1J1mRylfKiPen4,19530
264
264
  datahub/ingestion/source/bigquery_v2/usage.py,sha256=A9c-ofclaRk0NSnc4IRaqJYqMPv6ecCld_TPy3V2qFs,40748
265
265
  datahub/ingestion/source/cassandra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
266
- datahub/ingestion/source/cassandra/cassandra.py,sha256=eIB2qiE6UaXCDmogWHhEafoPrBu3nlHsgc_1-2udOcc,14834
266
+ datahub/ingestion/source/cassandra/cassandra.py,sha256=lKvPP0Uahi9xw_yh9cArPPtwvAauXolaEk-6f-jhpz4,14558
267
267
  datahub/ingestion/source/cassandra/cassandra_api.py,sha256=UVGQTsk6O57Q6wrWo54bQPLtStTWhw_Fq6fgW3Bjgk8,12515
268
268
  datahub/ingestion/source/cassandra/cassandra_config.py,sha256=vIMUOzazWTGi03B51vI0-YMxaMJHUGmCxJJgd8pKhC8,3791
269
269
  datahub/ingestion/source/cassandra/cassandra_profiling.py,sha256=DkSIryZNwLei5PaKuu9fNEKxEbhIrPI-T9gaVoM87NQ,11063
@@ -272,7 +272,7 @@ datahub/ingestion/source/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
272
272
  datahub/ingestion/source/common/data_platforms.py,sha256=HhuP3YIEi2WpyKDjUU8RiM0a2qjHWQcvc8kcqub0cVo,548
273
273
  datahub/ingestion/source/common/data_reader.py,sha256=XbSxiRTYrk6seOz0ZjVjzSpGvP8lEjmqXrNI4cdYYmQ,1819
274
274
  datahub/ingestion/source/common/gcp_credentials_config.py,sha256=_NapGkAqZMbXNClLlmOfANS7U6rChhdthRX9s9iUv9k,2411
275
- datahub/ingestion/source/common/subtypes.py,sha256=nSGKiCD491Bz9MRWzJK3sb10z8vLWP2wwSuSq-L8ELk,3068
275
+ datahub/ingestion/source/common/subtypes.py,sha256=UZca0ZQUQdoXr5Z-3AIUT9gIlPt-XwbMNjj7WEEiR_4,3107
276
276
  datahub/ingestion/source/data_lake_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
277
277
  datahub/ingestion/source/data_lake_common/config.py,sha256=qUk83B01hjuBKHvVz8SmXnVCy5eFj-2-2QLEOrAdbgk,359
278
278
  datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=nxu7osuzqxScPFc-1ODA2M1c_xPNPpRH_SMMU7zKOIE,6212
@@ -333,9 +333,9 @@ datahub/ingestion/source/hex/hex.py,sha256=DPpsi5e-sdUgbS0Okyvx1mvc00Adu47zA65oF
333
333
  datahub/ingestion/source/hex/mapper.py,sha256=6dsGvvhPAOAbAG1ayxLwipgJGt1q7YanWYfMX3rZeiM,12603
334
334
  datahub/ingestion/source/hex/model.py,sha256=hmMfOLEGZcKjwy2DW29OPf_9_Q_TesgnUTCen2br_fA,1471
335
335
  datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
336
- datahub/ingestion/source/iceberg/iceberg.py,sha256=eVeDXn0YZnnG4qkSbTCNi-Pcod9wEh-LnUkjPP66RtM,28753
336
+ datahub/ingestion/source/iceberg/iceberg.py,sha256=i9o0ia2vQUGqoagN7GgsoaUlhjj9xKBNP-3ia2cMgHY,30762
337
337
  datahub/ingestion/source/iceberg/iceberg_common.py,sha256=VGosqYPmn_j6GETSnDHZ8Ay1BVOedmx2x5LHxw16I3A,12278
338
- datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=CkBB5fryMVoqqCM6eLSIeb4yP85ABHONNRm0QqZKrnw,9977
338
+ datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=9iwp2vpQTi4OMbIKoDZV5lAdvjMR0ls6Llpck9grJIE,9875
339
339
  datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
340
340
  datahub/ingestion/source/identity/azure_ad.py,sha256=9Hrvm4CSfc02yjnPUsCYSY4Qw9fXPnDFWLexab0mcpc,28559
341
341
  datahub/ingestion/source/identity/okta.py,sha256=jC21myJuMRTaPgj0OD9heaC-mz8ECjqpy2hSJwlUSwM,31943
@@ -375,16 +375,16 @@ datahub/ingestion/source/metadata/lineage.py,sha256=2iK-hsORWm7NSvMZcG4D5hb8_PH5
375
375
  datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
376
376
  datahub/ingestion/source/neo4j/neo4j_source.py,sha256=O3jjdnsx7IyYPBLbxowL85Qo4zs4H-maMOH4-6ZNCk4,13063
377
377
  datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
378
- datahub/ingestion/source/powerbi/config.py,sha256=1WFK-JxcgBEIZ2XTwuH1PvNXYcwqEJR-IYTUTv3Z4o8,22820
378
+ datahub/ingestion/source/powerbi/config.py,sha256=5rG62dspGF9jIo8l6HLpB6ECv5n-t1un2ZyGiisD784,24219
379
379
  datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=-njW1kJOy-LY5JFwJLhVQ0bMBj9NQz5TZhQqsSi_KsM,2285
380
380
  datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=5df3qvalCS9hZ46DPXs6XDcw9-IofGf8Eol_rUC7LHI,20329
381
- datahub/ingestion/source/powerbi/powerbi.py,sha256=VlXgaImfUYjdXkJ1cCrJawkzl-pE1R-XCVr27eVbZ-E,55512
381
+ datahub/ingestion/source/powerbi/powerbi.py,sha256=a5itVuGmg-0xAQK5a-cXB5UxpR3rLJx0o2x_lz-8ox8,55955
382
382
  datahub/ingestion/source/powerbi/m_query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
383
- datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=EbaEasEOGZ73jz0cQofH9ez65wSvRBof0R6GQaIVLnM,2009
383
+ datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=yDi0C13ko2dVxdLJBYvUuGbT4Q2hxQRse3sL7Ul1ZU0,2050
384
384
  datahub/ingestion/source/powerbi/m_query/native_sql_parser.py,sha256=zzKVDGeUM3Yv3-zNah4D6mSnr6jXsstNuLmzczcPQEE,3683
385
385
  datahub/ingestion/source/powerbi/m_query/parser.py,sha256=5KqhUwj9H9yL9ZMPP9oSeVGiZjvXjw6Iu_HrGr95E5M,5876
386
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py,sha256=CHGlpZ4ahNksZ6bUk3rrTF4__rLGhUgWZTa_Ivt_zaI,32565
387
- datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=t0n1dDYjlzElSJo5zteabdSHQuHlMug23f4RodUgmIk,16959
386
+ datahub/ingestion/source/powerbi/m_query/pattern_handler.py,sha256=aOhAb8U4OEZnO4ufnb-Cm3KMpdy-JF6r9YMK3RNZs5A,35906
387
+ datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=ISH8Xjx51q2S81fn2v5RhCCU-kRAW3juxM0rMFs4TDo,17413
388
388
  datahub/ingestion/source/powerbi/m_query/tree_function.py,sha256=NIKNNHAE4kTJefTM1WR-StJi9NuingaRYn_mS_kV6A8,6180
389
389
  datahub/ingestion/source/powerbi/m_query/validator.py,sha256=crG-VZy2XPieiDliP9yVMgiFcc8b2xbZyDFEATXqEAQ,1155
390
390
  datahub/ingestion/source/powerbi/rest_api_wrapper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -438,9 +438,9 @@ datahub/ingestion/source/schema_inference/json.py,sha256=p5S-3idn65V2uad5T8txs1U
438
438
  datahub/ingestion/source/schema_inference/object.py,sha256=dhSOtxVJHbTDY0hWeHwdLYHnOsW07Omk7Y4DPeztie0,5847
439
439
  datahub/ingestion/source/schema_inference/parquet.py,sha256=CdqsNuiabLLCulWbuPMssijeFmKLv3M5MKFIhlatpWA,3456
440
440
  datahub/ingestion/source/sigma/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
441
- datahub/ingestion/source/sigma/config.py,sha256=zGh0ZU2Ty5NHfNXAVwFxVkK4NlsNSxtAyfCgMJJvzdc,3795
441
+ datahub/ingestion/source/sigma/config.py,sha256=yfdKQYvI5hKVl8gNAKIcJe-VW3klvdDqYbUP76gJQDI,3812
442
442
  datahub/ingestion/source/sigma/data_classes.py,sha256=YZkkzwftV34mq5c_4jlC2PCSiRKt4hvHjmqikLQhl1I,2012
443
- datahub/ingestion/source/sigma/sigma.py,sha256=dgaIiiOGTu2trL_OVMOOEANA2UlB-M7LQ4TcDVBqiJA,24086
443
+ datahub/ingestion/source/sigma/sigma.py,sha256=ApVtToI1cnpscvdEMcC-3EuTYnczW9CH-s912z-pDpk,24262
444
444
  datahub/ingestion/source/sigma/sigma_api.py,sha256=SVvbUs2vjueUdDa-3FzeMsaX5pNpApVI192P7EZzPcI,17870
445
445
  datahub/ingestion/source/slack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
446
446
  datahub/ingestion/source/slack/slack.py,sha256=3N7Yp-u9DvBmo536Z6-pQTrJgSJ3i742GePSgjlBOUU,27616
@@ -475,7 +475,7 @@ datahub/ingestion/source/sql/hive.py,sha256=n0XCGkNkVAe-TEyXbxlefvohbmtALbWaC1a0
475
475
  datahub/ingestion/source/sql/hive_metastore.py,sha256=HW0zoHKarBYb8oVCy5fHvPOn-pTo25LctW_AusmH0hQ,36252
476
476
  datahub/ingestion/source/sql/mariadb.py,sha256=Hm102kmfs_1rd4lsTYhzVMZq5S3B6cyfvpHSzJjqvMw,737
477
477
  datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_k94o7g-0,3350
478
- datahub/ingestion/source/sql/oracle.py,sha256=it9qhUkGRHTq_F5DoEsCBLYnB02divzxDlBvXACH4Pk,27712
478
+ datahub/ingestion/source/sql/oracle.py,sha256=PhQZGiZbf_rRtNMlNV-MXSoN3geDBb9zXsGJRVvaKbo,29831
479
479
  datahub/ingestion/source/sql/postgres.py,sha256=uC1kYEI8VdxiZ1Y9IxMWzwmg11wtMqYN0e2fkok1rxo,11972
480
480
  datahub/ingestion/source/sql/presto.py,sha256=PB-CS5MX2dSRFRHjlxfkLHGXLZXFNCsVAAyRBtY6HMg,3611
481
481
  datahub/ingestion/source/sql/sql_common.py,sha256=jsweel_-vesNtcPonnfS11OUrlcZnS3wGt5r0dYTPnM,48637
@@ -533,7 +533,7 @@ datahub/ingestion/source/unity/usage.py,sha256=0wETBAaZvHI_EGgBlxX3bKsVHEAdnUV8_
533
533
  datahub/ingestion/source/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
534
534
  datahub/ingestion/source/usage/clickhouse_usage.py,sha256=jJ-EUJdS7t4d9RVjLWQQ2e36wmYzs8xtpD632z6pLiw,9974
535
535
  datahub/ingestion/source/usage/starburst_trino_usage.py,sha256=O3EDEZsXPNdsKGD-jStREA8e4-iTlnqd3ocqtAYFKNA,10544
536
- datahub/ingestion/source/usage/usage_common.py,sha256=YGszLjmESiUXnpcPfnyQHtoM57HyWsIiLOQd5_sxECg,12238
536
+ datahub/ingestion/source/usage/usage_common.py,sha256=uuCgIduhlRL2zIAN8rymZ5cZn1WF6akZ-ZbbaVYo9_w,9813
537
537
  datahub/ingestion/source/vertexai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
538
538
  datahub/ingestion/source/vertexai/vertexai.py,sha256=TXTa-Cm1C7xkbTNXNmKr_hi2FDJ9VG-ahrxk9yuxaTg,43635
539
539
  datahub/ingestion/source/vertexai/vertexai_config.py,sha256=uMnsv3b6TsPRH26u_JE_v1u0db7ANEAFlVxU5A6ELRM,989
@@ -906,8 +906,8 @@ datahub/sdk/entity.py,sha256=Q29AbpS58L4gD8ETwoNIwG-ouytz4c0MSSFi6-jLl_4,6742
906
906
  datahub/sdk/entity_client.py,sha256=Sxe6H6Vr_tqLJu5KW7MJfLWJ6mgh4mbsx7u7MOBpM64,5052
907
907
  datahub/sdk/main_client.py,sha256=h2MKRhR-BO0zGCMhF7z2bTncX4hagKrAYwR3wTNTtzA,3666
908
908
  datahub/sdk/resolver_client.py,sha256=nKMAZJt2tRSGfKSzoREIh43PXqjM3umLiYkYHJjo1io,3243
909
- datahub/sdk/search_client.py,sha256=h9O_rsphkTdpd5hMPay3xSXfJM761cf4PjNCBwCnFzU,1309
910
- datahub/sdk/search_filters.py,sha256=WaJKFUKT9P70NBkh36f44rZrJ7zRJpRwu8mN-rEx5y0,11364
909
+ datahub/sdk/search_client.py,sha256=BJR5t7Ff2oDNOGLcSCp9YHzrGKbgOQr7T8XQKGEpucw,3437
910
+ datahub/sdk/search_filters.py,sha256=BcMhvG5hGYAATtLPLz4WLRjKApX2oLYrrcGn-CG__ek,12901
911
911
  datahub/secret/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
912
912
  datahub/secret/datahub_secret_store.py,sha256=9u9S87-15jwhj4h0EsAVIMdQLgvstKc8voQux2slxgU,2477
913
913
  datahub/secret/datahub_secrets_client.py,sha256=nDmhziKdvseJHlaDVUcAwK8Fv8maeAaG-ktZtWG2b70,1316
@@ -996,7 +996,7 @@ datahub/utilities/sqllineage_patch.py,sha256=0Buh50bmEqJFg1HFRCknCnePo1cecI4JmGx
996
996
  datahub/utilities/stats_collections.py,sha256=CxaTcrF7J6am7iX5jPhFKne535UcyDk_oreVwR013fU,1625
997
997
  datahub/utilities/str_enum.py,sha256=EsqCLPbrqyQ2YU_wt7QP-a6P5fnpIshXJ3AI8gLBlVA,474
998
998
  datahub/utilities/tee_io.py,sha256=jBrsUfTPTk9IICntfGOG0HR-Fjp8BQMde-FPQ4r3kuI,601
999
- datahub/utilities/threaded_iterator_executor.py,sha256=WC4tvJ4TQRkH0VO_FD91GbedcKUqx0lc4tHDNOiF6ps,1770
999
+ datahub/utilities/threaded_iterator_executor.py,sha256=6BpCE0os3d-uMYxHBilPQC-JvEBkU6JQY4bGs06JKYI,2004
1000
1000
  datahub/utilities/threading_timeout.py,sha256=hOzDI55E3onXblHNwGsePJUWMXo5zqaWCnoYdL2-KPM,1316
1001
1001
  datahub/utilities/time.py,sha256=Q7S_Zyom8C2zcl2xFbjNw6K8nZsCub5XGAB4OEmIS34,1847
1002
1002
  datahub/utilities/topological_sort.py,sha256=kcK5zPSR393fgItr-KSLV3bDqfJfBRS8E5kkCpPBgUY,1358
@@ -1043,8 +1043,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1043
1043
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1044
1044
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1045
1045
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1046
- acryl_datahub-1.0.0.1rc5.dist-info/METADATA,sha256=tMnH_4TdNIZMpke-1KadBIdM0nx_sJtliYXrZJXUkbs,176849
1047
- acryl_datahub-1.0.0.1rc5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
1048
- acryl_datahub-1.0.0.1rc5.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1049
- acryl_datahub-1.0.0.1rc5.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1050
- acryl_datahub-1.0.0.1rc5.dist-info/RECORD,,
1046
+ acryl_datahub-1.0.0.1rc7.dist-info/METADATA,sha256=QkeMAnAXXez9FFTnJVpxhOJMNDadF0gfVQ3uz3Fh4i8,176849
1047
+ acryl_datahub-1.0.0.1rc7.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
1048
+ acryl_datahub-1.0.0.1rc7.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1049
+ acryl_datahub-1.0.0.1rc7.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1050
+ acryl_datahub-1.0.0.1rc7.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.0.0.1rc5"
3
+ __version__ = "1.0.0.1rc7"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -29,13 +29,16 @@ def dataset() -> None:
29
29
  name="upsert",
30
30
  )
31
31
  @click.option("-f", "--file", required=True, type=click.Path(exists=True))
32
+ @click.option(
33
+ "-n", "--dry-run", type=bool, is_flag=True, default=False, help="Perform a dry run"
34
+ )
32
35
  @upgrade.check_upgrade
33
36
  @telemetry.with_telemetry()
34
- def upsert(file: Path) -> None:
37
+ def upsert(file: Path, dry_run: bool) -> None:
35
38
  """Upsert attributes to a Dataset in DataHub."""
36
39
  # Call the sync command with to_datahub=True to perform the upsert operation
37
40
  ctx = click.get_current_context()
38
- ctx.invoke(sync, file=str(file), to_datahub=True)
41
+ ctx.invoke(sync, file=str(file), dry_run=dry_run, to_datahub=True)
39
42
 
40
43
 
41
44
  @dataset.command(
@@ -167,11 +170,16 @@ def file(lintcheck: bool, lintfix: bool, file: str) -> None:
167
170
  )
168
171
  @click.option("-f", "--file", required=True, type=click.Path(exists=True))
169
172
  @click.option("--to-datahub/--from-datahub", required=True, is_flag=True)
173
+ @click.option(
174
+ "-n", "--dry-run", type=bool, is_flag=True, default=False, help="Perform a dry run"
175
+ )
170
176
  @upgrade.check_upgrade
171
177
  @telemetry.with_telemetry()
172
- def sync(file: str, to_datahub: bool) -> None:
178
+ def sync(file: str, to_datahub: bool, dry_run: bool) -> None:
173
179
  """Sync a Dataset file to/from DataHub"""
174
180
 
181
+ dry_run_prefix = "[dry-run]: " if dry_run else "" # prefix to use in messages
182
+
175
183
  failures: List[str] = []
176
184
  with get_default_graph() as graph:
177
185
  datasets = Dataset.from_yaml(file)
@@ -189,7 +197,7 @@ def sync(file: str, to_datahub: bool) -> None:
189
197
  click.secho(
190
198
  "\n\t- ".join(
191
199
  [
192
- f"Skipping Dataset {dataset.urn} due to missing entity references: "
200
+ f"{dry_run_prefix}Skipping Dataset {dataset.urn} due to missing entity references: "
193
201
  ]
194
202
  + missing_entity_references
195
203
  ),
@@ -199,13 +207,18 @@ def sync(file: str, to_datahub: bool) -> None:
199
207
  continue
200
208
  try:
201
209
  for mcp in dataset.generate_mcp():
202
- graph.emit(mcp)
203
- click.secho(f"Update succeeded for urn {dataset.urn}.", fg="green")
210
+ if not dry_run:
211
+ graph.emit(mcp)
212
+ click.secho(
213
+ f"{dry_run_prefix}Update succeeded for urn {dataset.urn}.",
214
+ fg="green",
215
+ )
204
216
  except Exception as e:
205
217
  click.secho(
206
- f"Update failed for id {id}. due to {e}",
218
+ f"{dry_run_prefix}Update failed for id {id}. due to {e}",
207
219
  fg="red",
208
220
  )
221
+ failures.append(dataset.urn)
209
222
  else:
210
223
  # Sync from DataHub
211
224
  if graph.exists(dataset.urn):
@@ -215,13 +228,16 @@ def sync(file: str, to_datahub: bool) -> None:
215
228
  existing_dataset: Dataset = Dataset.from_datahub(
216
229
  graph=graph, urn=dataset.urn, config=dataset_get_config
217
230
  )
218
- existing_dataset.to_yaml(Path(file))
231
+ if not dry_run:
232
+ existing_dataset.to_yaml(Path(file))
233
+ else:
234
+ click.secho(f"{dry_run_prefix}Will update file {file}")
219
235
  else:
220
- click.secho(f"Dataset {dataset.urn} does not exist")
236
+ click.secho(f"{dry_run_prefix}Dataset {dataset.urn} does not exist")
221
237
  failures.append(dataset.urn)
222
238
  if failures:
223
239
  click.secho(
224
- f"\nFailed to sync the following Datasets: {', '.join(failures)}",
240
+ f"\n{dry_run_prefix}Failed to sync the following Datasets: {', '.join(failures)}",
225
241
  fg="red",
226
242
  )
227
243
  raise click.Abort()
@@ -117,6 +117,14 @@ class ContainerKey(DatahubKey):
117
117
  PlatformKey = ContainerKey
118
118
 
119
119
 
120
+ class NamespaceKey(ContainerKey):
121
+ """
122
+ For Iceberg namespaces (databases/schemas)
123
+ """
124
+
125
+ namespace: str
126
+
127
+
120
128
  class DatabaseKey(ContainerKey):
121
129
  database: str
122
130
 
@@ -41,7 +41,7 @@ from datahub.configuration.common import (
41
41
  TraceTimeoutError,
42
42
  TraceValidationError,
43
43
  )
44
- from datahub.emitter.aspect import JSON_CONTENT_TYPE
44
+ from datahub.emitter.aspect import JSON_CONTENT_TYPE, JSON_PATCH_CONTENT_TYPE
45
45
  from datahub.emitter.generic_emitter import Emitter
46
46
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
47
47
  from datahub.emitter.request_helper import make_curl_command
@@ -109,9 +109,9 @@ class RestSinkEndpoint(ConfigEnum):
109
109
  OPENAPI = auto()
110
110
 
111
111
 
112
- DEFAULT_REST_SINK_ENDPOINT = pydantic.parse_obj_as(
112
+ DEFAULT_REST_EMITTER_ENDPOINT = pydantic.parse_obj_as(
113
113
  RestSinkEndpoint,
114
- os.getenv("DATAHUB_REST_SINK_DEFAULT_ENDPOINT", RestSinkEndpoint.RESTLI),
114
+ os.getenv("DATAHUB_REST_EMITTER_DEFAULT_ENDPOINT", RestSinkEndpoint.RESTLI),
115
115
  )
116
116
 
117
117
 
@@ -229,7 +229,9 @@ class DataHubRestEmitter(Closeable, Emitter):
229
229
  ca_certificate_path: Optional[str] = None,
230
230
  client_certificate_path: Optional[str] = None,
231
231
  disable_ssl_verification: bool = False,
232
- openapi_ingestion: bool = False,
232
+ openapi_ingestion: bool = (
233
+ DEFAULT_REST_EMITTER_ENDPOINT == RestSinkEndpoint.OPENAPI
234
+ ),
233
235
  default_trace_mode: bool = False,
234
236
  ):
235
237
  if not gms_server:
@@ -359,8 +361,14 @@ class DataHubRestEmitter(Closeable, Emitter):
359
361
  )["aspect"]["json"]
360
362
  else:
361
363
  obj = mcp.aspect.to_obj()
362
- if obj.get("value") and obj.get("contentType") == JSON_CONTENT_TYPE:
364
+ content_type = obj.get("contentType")
365
+ if obj.get("value") and content_type == JSON_CONTENT_TYPE:
366
+ # Undo double serialization.
363
367
  obj = json.loads(obj["value"])
368
+ elif content_type == JSON_PATCH_CONTENT_TYPE:
369
+ raise NotImplementedError(
370
+ "Patches are not supported for OpenAPI ingestion. Set the endpoint to RESTLI."
371
+ )
364
372
  aspect_value = pre_json_transform(obj)
365
373
  return (
366
374
  url,
datahub/errors.py CHANGED
@@ -31,6 +31,10 @@ class MultipleSubtypesWarning(Warning):
31
31
  pass
32
32
 
33
33
 
34
+ class SearchFilterWarning(Warning):
35
+ pass
36
+
37
+
34
38
  class ExperimentalWarning(Warning):
35
39
  pass
36
40
 
@@ -51,6 +51,7 @@ from datahub.ingestion.api.source_helpers import (
51
51
  from datahub.ingestion.api.workunit import MetadataWorkUnit
52
52
  from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
53
53
  from datahub.metadata.schema_classes import UpstreamLineageClass
54
+ from datahub.sdk.entity import Entity
54
55
  from datahub.utilities.lossy_collections import LossyDict, LossyList
55
56
  from datahub.utilities.type_annotations import get_class_from_annotation
56
57
 
@@ -480,7 +481,7 @@ class Source(Closeable, metaclass=ABCMeta):
480
481
 
481
482
  def get_workunits_internal(
482
483
  self,
483
- ) -> Iterable[Union[MetadataWorkUnit, MetadataChangeProposalWrapper]]:
484
+ ) -> Iterable[Union[MetadataWorkUnit, MetadataChangeProposalWrapper, Entity]]:
484
485
  raise NotImplementedError(
485
486
  "get_workunits_internal must be implemented if get_workunits is not overriden."
486
487
  )
@@ -35,6 +35,7 @@ from datahub.metadata.schema_classes import (
35
35
  TimeWindowSizeClass,
36
36
  )
37
37
  from datahub.metadata.urns import DatasetUrn, GlossaryTermUrn, TagUrn, Urn
38
+ from datahub.sdk.entity import Entity
38
39
  from datahub.specific.dataset import DatasetPatchBuilder
39
40
  from datahub.telemetry import telemetry
40
41
  from datahub.utilities.urns.error import InvalidUrnError
@@ -49,7 +50,12 @@ logger = logging.getLogger(__name__)
49
50
 
50
51
  def auto_workunit(
51
52
  stream: Iterable[
52
- Union[MetadataChangeEventClass, MetadataChangeProposalWrapper, MetadataWorkUnit]
53
+ Union[
54
+ MetadataChangeEventClass,
55
+ MetadataChangeProposalWrapper,
56
+ MetadataWorkUnit,
57
+ Entity,
58
+ ]
53
59
  ],
54
60
  ) -> Iterable[MetadataWorkUnit]:
55
61
  """Convert a stream of MCEs and MCPs to a stream of :class:`MetadataWorkUnit`s."""
@@ -62,6 +68,8 @@ def auto_workunit(
62
68
  )
63
69
  elif isinstance(item, MetadataChangeProposalWrapper):
64
70
  yield item.as_workunit()
71
+ elif isinstance(item, Entity):
72
+ yield from item.as_workunits()
65
73
  else:
66
74
  yield item
67
75
 
@@ -33,7 +33,7 @@ from datahub.emitter.aspect import TIMESERIES_ASPECT_MAP
33
33
  from datahub.emitter.mce_builder import DEFAULT_ENV, Aspect
34
34
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
35
35
  from datahub.emitter.rest_emitter import (
36
- DEFAULT_REST_SINK_ENDPOINT,
36
+ DEFAULT_REST_EMITTER_ENDPOINT,
37
37
  DEFAULT_REST_TRACE_MODE,
38
38
  DatahubRestEmitter,
39
39
  RestSinkEndpoint,
@@ -49,6 +49,7 @@ from datahub.ingestion.graph.connections import (
49
49
  )
50
50
  from datahub.ingestion.graph.entity_versioning import EntityVersioningAPI
51
51
  from datahub.ingestion.graph.filters import (
52
+ RawSearchFilter,
52
53
  RawSearchFilterRule,
53
54
  RemovedStatusFilter,
54
55
  generate_filter,
@@ -75,10 +76,11 @@ from datahub.metadata.schema_classes import (
75
76
  SystemMetadataClass,
76
77
  TelemetryClientIdClass,
77
78
  )
79
+ from datahub.metadata.urns import CorpUserUrn, Urn
78
80
  from datahub.telemetry.telemetry import telemetry_instance
79
81
  from datahub.utilities.perf_timer import PerfTimer
80
82
  from datahub.utilities.str_enum import StrEnum
81
- from datahub.utilities.urns.urn import Urn, guess_entity_type
83
+ from datahub.utilities.urns.urn import guess_entity_type
82
84
 
83
85
  if TYPE_CHECKING:
84
86
  from datahub.ingestion.sink.datahub_rest import (
@@ -116,7 +118,7 @@ def entity_type_to_graphql(entity_type: str) -> str:
116
118
  """Convert the entity types into GraphQL "EntityType" enum values."""
117
119
 
118
120
  # Hard-coded special cases.
119
- if entity_type == "corpuser":
121
+ if entity_type == CorpUserUrn.ENTITY_TYPE:
120
122
  return "CORP_USER"
121
123
 
122
124
  # Convert camelCase to UPPER_UNDERSCORE.
@@ -133,6 +135,14 @@ def entity_type_to_graphql(entity_type: str) -> str:
133
135
  return entity_type
134
136
 
135
137
 
138
+ def flexible_entity_type_to_graphql(entity_type: str) -> str:
139
+ if entity_type.upper() == entity_type:
140
+ # Assume that we were passed a graphql EntityType enum value,
141
+ # so no conversion is needed.
142
+ return entity_type
143
+ return entity_type_to_graphql(entity_type)
144
+
145
+
136
146
  class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
137
147
  def __init__(self, config: DatahubClientConfig) -> None:
138
148
  self.config = config
@@ -147,7 +157,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
147
157
  ca_certificate_path=self.config.ca_certificate_path,
148
158
  client_certificate_path=self.config.client_certificate_path,
149
159
  disable_ssl_verification=self.config.disable_ssl_verification,
150
- openapi_ingestion=DEFAULT_REST_SINK_ENDPOINT == RestSinkEndpoint.OPENAPI,
160
+ openapi_ingestion=DEFAULT_REST_EMITTER_ENDPOINT == RestSinkEndpoint.OPENAPI,
151
161
  default_trace_mode=DEFAULT_REST_TRACE_MODE == RestTraceMode.ENABLED,
152
162
  )
153
163
 
@@ -805,7 +815,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
805
815
 
806
816
  :return: An iterable of (urn, schema info) tuple that match the filters.
807
817
  """
808
- types = [entity_type_to_graphql("dataset")]
818
+ types = self._get_types(["dataset"])
809
819
 
810
820
  # Add the query default of * if no query is specified.
811
821
  query = query or "*"
@@ -873,10 +883,10 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
873
883
  env: Optional[str] = None,
874
884
  query: Optional[str] = None,
875
885
  container: Optional[str] = None,
876
- status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
886
+ status: Optional[RemovedStatusFilter] = RemovedStatusFilter.NOT_SOFT_DELETED,
877
887
  batch_size: int = 10000,
878
888
  extraFilters: Optional[List[RawSearchFilterRule]] = None,
879
- extra_or_filters: Optional[List[Dict[str, List[RawSearchFilterRule]]]] = None,
889
+ extra_or_filters: Optional[RawSearchFilter] = None,
880
890
  ) -> Iterable[str]:
881
891
  """Fetch all urns that match all of the given filters.
882
892
 
@@ -968,7 +978,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
968
978
  status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
969
979
  batch_size: int = 10000,
970
980
  extra_and_filters: Optional[List[RawSearchFilterRule]] = None,
971
- extra_or_filters: Optional[List[Dict[str, List[RawSearchFilterRule]]]] = None,
981
+ extra_or_filters: Optional[RawSearchFilter] = None,
972
982
  extra_source_fields: Optional[List[str]] = None,
973
983
  skip_cache: bool = False,
974
984
  ) -> Iterable[dict]:
@@ -1121,7 +1131,8 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1121
1131
  )
1122
1132
 
1123
1133
  types = [
1124
- entity_type_to_graphql(entity_type) for entity_type in entity_types
1134
+ flexible_entity_type_to_graphql(entity_type)
1135
+ for entity_type in entity_types
1125
1136
  ]
1126
1137
  return types
1127
1138
 
@@ -1,6 +1,7 @@
1
1
  import dataclasses
2
2
  import enum
3
- from typing import Any, Dict, List, Literal, Optional
3
+ import warnings
4
+ from typing import Dict, List, Literal, Optional, Union
4
5
 
5
6
  from typing_extensions import TypeAlias
6
7
 
@@ -8,9 +9,14 @@ from datahub.emitter.mce_builder import (
8
9
  make_data_platform_urn,
9
10
  make_dataplatform_instance_urn,
10
11
  )
12
+ from datahub.errors import SearchFilterWarning
11
13
  from datahub.utilities.urns.urn import guess_entity_type
12
14
 
13
- RawSearchFilterRule = Dict[str, Any]
15
+ RawSearchFilterRule: TypeAlias = Dict[str, Union[str, bool, List[str]]]
16
+
17
+ # This is a list of OR filters, each of which is a list of AND filters.
18
+ # This can be put directly into the orFilters parameter in GraphQL.
19
+ RawSearchFilter: TypeAlias = List[Dict[Literal["and"], List[RawSearchFilterRule]]]
14
20
 
15
21
  # Mirrors our GraphQL enum: https://datahubproject.io/docs/graphql/enums#filteroperator
16
22
  FilterOperator: TypeAlias = Literal[
@@ -39,12 +45,14 @@ class SearchFilterRule:
39
45
  negated: bool = False
40
46
 
41
47
  def to_raw(self) -> RawSearchFilterRule:
42
- return {
48
+ rule: RawSearchFilterRule = {
43
49
  "field": self.field,
44
50
  "condition": self.condition,
45
51
  "values": self.values,
46
- "negated": self.negated,
47
52
  }
53
+ if self.negated:
54
+ rule["negated"] = True
55
+ return rule
48
56
 
49
57
  def negate(self) -> "SearchFilterRule":
50
58
  return SearchFilterRule(
@@ -73,10 +81,10 @@ def generate_filter(
73
81
  platform_instance: Optional[str],
74
82
  env: Optional[str],
75
83
  container: Optional[str],
76
- status: RemovedStatusFilter,
84
+ status: Optional[RemovedStatusFilter],
77
85
  extra_filters: Optional[List[RawSearchFilterRule]],
78
- extra_or_filters: Optional[List[RawSearchFilterRule]] = None,
79
- ) -> List[Dict[str, List[RawSearchFilterRule]]]:
86
+ extra_or_filters: Optional[RawSearchFilter] = None,
87
+ ) -> RawSearchFilter:
80
88
  """
81
89
  Generate a search filter based on the provided parameters.
82
90
  :param platform: The platform to filter by.
@@ -105,15 +113,16 @@ def generate_filter(
105
113
  and_filters.append(_get_container_filter(container).to_raw())
106
114
 
107
115
  # Status filter.
108
- status_filter = _get_status_filter(status)
109
- if status_filter:
110
- and_filters.append(status_filter.to_raw())
116
+ if status:
117
+ status_filter = _get_status_filter(status)
118
+ if status_filter:
119
+ and_filters.append(status_filter.to_raw())
111
120
 
112
121
  # Extra filters.
113
122
  if extra_filters:
114
123
  and_filters += extra_filters
115
124
 
116
- or_filters: List[Dict[str, List[RawSearchFilterRule]]] = [{"and": and_filters}]
125
+ or_filters: RawSearchFilter = [{"and": and_filters}]
117
126
 
118
127
  # Env filter
119
128
  if env:
@@ -127,11 +136,27 @@ def generate_filter(
127
136
 
128
137
  # Extra OR filters are distributed across the top level and lists.
129
138
  if extra_or_filters:
130
- or_filters = [
131
- {"and": and_filter["and"] + [extra_or_filter]}
132
- for extra_or_filter in extra_or_filters
133
- for and_filter in or_filters
134
- ]
139
+ new_or_filters: RawSearchFilter = []
140
+ for and_filter in or_filters:
141
+ for extra_or_filter in extra_or_filters:
142
+ if isinstance(extra_or_filter, dict) and "and" in extra_or_filter:
143
+ new_or_filters.append(
144
+ {"and": and_filter["and"] + extra_or_filter["and"]}
145
+ )
146
+ else:
147
+ # Hack for backwards compatibility.
148
+ # We have some code that erroneously passed a List[RawSearchFilterRule]
149
+ # instead of a List[Dict["and", List[RawSearchFilterRule]]].
150
+ warnings.warn(
151
+ "Passing a List[RawSearchFilterRule] to extra_or_filters is deprecated. "
152
+ "Please pass a List[Dict[str, List[RawSearchFilterRule]]] instead.",
153
+ SearchFilterWarning,
154
+ stacklevel=3,
155
+ )
156
+ new_or_filters.append(
157
+ {"and": and_filter["and"] + [extra_or_filter]} # type: ignore
158
+ )
159
+ or_filters = new_or_filters
135
160
 
136
161
  return or_filters
137
162
 
@@ -20,7 +20,7 @@ from datahub.emitter.mcp import MetadataChangeProposalWrapper
20
20
  from datahub.emitter.mcp_builder import mcps_from_mce
21
21
  from datahub.emitter.rest_emitter import (
22
22
  BATCH_INGEST_MAX_PAYLOAD_LENGTH,
23
- DEFAULT_REST_SINK_ENDPOINT,
23
+ DEFAULT_REST_EMITTER_ENDPOINT,
24
24
  DEFAULT_REST_TRACE_MODE,
25
25
  DataHubRestEmitter,
26
26
  RestSinkEndpoint,
@@ -70,7 +70,7 @@ _DEFAULT_REST_SINK_MODE = pydantic.parse_obj_as(
70
70
 
71
71
  class DatahubRestSinkConfig(DatahubClientConfig):
72
72
  mode: RestSinkMode = _DEFAULT_REST_SINK_MODE
73
- endpoint: RestSinkEndpoint = DEFAULT_REST_SINK_ENDPOINT
73
+ endpoint: RestSinkEndpoint = DEFAULT_REST_EMITTER_ENDPOINT
74
74
  default_trace_mode: RestTraceMode = DEFAULT_REST_TRACE_MODE
75
75
 
76
76
  # These only apply in async modes.