acryl-datahub 1.0.0.1rc5__py3-none-any.whl → 1.0.0.1rc7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0.1rc5.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/METADATA +2451 -2451
- {acryl_datahub-1.0.0.1rc5.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/RECORD +32 -32
- datahub/_version.py +1 -1
- datahub/cli/specific/dataset_cli.py +26 -10
- datahub/emitter/mcp_builder.py +8 -0
- datahub/emitter/rest_emitter.py +13 -5
- datahub/errors.py +4 -0
- datahub/ingestion/api/source.py +2 -1
- datahub/ingestion/api/source_helpers.py +9 -1
- datahub/ingestion/graph/client.py +20 -9
- datahub/ingestion/graph/filters.py +41 -16
- datahub/ingestion/sink/datahub_rest.py +2 -2
- datahub/ingestion/source/cassandra/cassandra.py +1 -10
- datahub/ingestion/source/common/subtypes.py +1 -0
- datahub/ingestion/source/iceberg/iceberg.py +159 -102
- datahub/ingestion/source/iceberg/iceberg_profiler.py +21 -18
- datahub/ingestion/source/powerbi/config.py +31 -4
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +111 -10
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +12 -1
- datahub/ingestion/source/sigma/config.py +3 -4
- datahub/ingestion/source/sigma/sigma.py +10 -6
- datahub/ingestion/source/sql/oracle.py +51 -4
- datahub/ingestion/source/usage/usage_common.py +0 -65
- datahub/sdk/search_client.py +81 -8
- datahub/sdk/search_filters.py +73 -11
- datahub/utilities/threaded_iterator_executor.py +16 -3
- {acryl_datahub-1.0.0.1rc5.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0.1rc5.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0.1rc5.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.0.0.1rc5.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
acryl_datahub-1.0.0.
|
|
1
|
+
acryl_datahub-1.0.0.1rc7.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=AZj-rwp4edRcZvS9Mq4fxTeV64QHFW-6zysNAtjc2qg,323
|
|
5
5
|
datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
|
|
6
|
-
datahub/errors.py,sha256=
|
|
6
|
+
datahub/errors.py,sha256=BzKdcmYseHOt36zfjJXc17WNutFhp9Y23cU_L6cIkxc,612
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
datahub/_codegen/aspect.py,sha256=PJRa-Z4ouXHq3OkulfyWhwZn-fFUBDK_UPvmqaWdbWk,1063
|
|
@@ -86,7 +86,7 @@ datahub/cli/specific/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
|
|
|
86
86
|
datahub/cli/specific/assertions_cli.py,sha256=q0ODpyWS3LVR8UbF3NM6KRisabodJ0UUwyPty9a8AIk,5375
|
|
87
87
|
datahub/cli/specific/datacontract_cli.py,sha256=IkBovwuPT5jNB8X-8AQJRO4C9cFSNm1at8v4YctLFgQ,2531
|
|
88
88
|
datahub/cli/specific/dataproduct_cli.py,sha256=wSksU4xjGvAZJiiI7rSyjSItTu72oBPiXZ0-UL81zn0,15091
|
|
89
|
-
datahub/cli/specific/dataset_cli.py,sha256=
|
|
89
|
+
datahub/cli/specific/dataset_cli.py,sha256=SYxhGLzv8ZClTiE3rZE99blxh15ZMyQCQqpJWx9SRKc,8570
|
|
90
90
|
datahub/cli/specific/file_loader.py,sha256=YMyv_evdKyHSft5Tm_kOcqJ4ALpRmMm54ZJAyl7Nxqs,773
|
|
91
91
|
datahub/cli/specific/forms_cli.py,sha256=OLVeG8NtK1eDBuUKCT5Ald35np8__f8mLzbZM_zUfWU,1484
|
|
92
92
|
datahub/cli/specific/group_cli.py,sha256=xPUYk48VbVXLMj-z9VNW0RZzXOe4rQsc2jLwSOGCoec,1967
|
|
@@ -122,11 +122,11 @@ datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvVi
|
|
|
122
122
|
datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
|
|
123
123
|
datahub/emitter/mce_builder.py,sha256=i-iLLdnuy7h1JrzwC2sCtQthbso-cNj1uijOQZKHbeA,16717
|
|
124
124
|
datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
|
|
125
|
-
datahub/emitter/mcp_builder.py,sha256=
|
|
125
|
+
datahub/emitter/mcp_builder.py,sha256=JyAC8obvkf6ZpINJ8I2p-Ofr52-tuoQBDbxp-bhjyrM,11871
|
|
126
126
|
datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
|
|
127
127
|
datahub/emitter/request_helper.py,sha256=HpI9a9W0TzoVbrs584rF8P8w-IT_iKLmvYmO_6IHhXs,1008
|
|
128
128
|
datahub/emitter/response_helper.py,sha256=h2hrZYiv4xfauD_lHPW_fN_AV8KhWNM4CVd-Lat2vT0,4608
|
|
129
|
-
datahub/emitter/rest_emitter.py,sha256=
|
|
129
|
+
datahub/emitter/rest_emitter.py,sha256=PzZkt0JlCnexJgqzYQxSQgMcHj-qDg8lIsmhUn4GPfU,30631
|
|
130
130
|
datahub/emitter/serialization_helper.py,sha256=q12Avmf70Vy4ttQGMJoTKlE5EsybMKNg2w3MQeZiHvk,3652
|
|
131
131
|
datahub/emitter/sql_parsing_builder.py,sha256=Cr5imZrm3dYDSCACt5MFscgHCtVbHTD6IjUmsvsKoEs,11991
|
|
132
132
|
datahub/emitter/synchronized_file_emitter.py,sha256=s4ATuxalI4GDAkrZTaGSegxBdvvNPZ9jRSdtElU0kNs,1805
|
|
@@ -145,8 +145,8 @@ datahub/ingestion/api/registry.py,sha256=LbdZr89465Lj7ptQRVB4vI1JR1igWABvQFj9-WX
|
|
|
145
145
|
datahub/ingestion/api/report.py,sha256=eM_TWWz6iJNd-c_S2_4eg2qKLGYP8vSROb_TMiCwBhY,4644
|
|
146
146
|
datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
|
|
147
147
|
datahub/ingestion/api/sink.py,sha256=nfal7nsYY1AT2WQRjqO48uAHitpjax7TsRVzYXnqbeM,4918
|
|
148
|
-
datahub/ingestion/api/source.py,sha256=
|
|
149
|
-
datahub/ingestion/api/source_helpers.py,sha256=
|
|
148
|
+
datahub/ingestion/api/source.py,sha256=HrQahSEBeapMDnW8S6wSEyNLLE9RCs2R6eUrVaibuuc,19349
|
|
149
|
+
datahub/ingestion/api/source_helpers.py,sha256=OhgBLdpUIuqF_gl4uV8Y2csp-z97zzXeFj2I5aUypCI,20158
|
|
150
150
|
datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
|
|
151
151
|
datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL74lo,6327
|
|
152
152
|
datahub/ingestion/api/auto_work_units/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -171,11 +171,11 @@ datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGo
|
|
|
171
171
|
datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
|
|
172
172
|
datahub/ingestion/glossary/datahub_classifier.py,sha256=O7wm6gQT1Jf2QSKdWjJQbS5oSzJwplXzfza26Gdq5Mg,7555
|
|
173
173
|
datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
174
|
-
datahub/ingestion/graph/client.py,sha256=
|
|
174
|
+
datahub/ingestion/graph/client.py,sha256=Qtjf5YrQeQzcTb0qxr6-y4MSEKSJm8f0hO6BoeRA_yI,65916
|
|
175
175
|
datahub/ingestion/graph/config.py,sha256=_oha8Je7P80ZmrkZUAaRHyYbdMmTkMI5JkYjEP2Ri1Q,751
|
|
176
176
|
datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
|
|
177
177
|
datahub/ingestion/graph/entity_versioning.py,sha256=nrcNz0Qm6kpE6oTu_mrYUQDx14KPspBTc6R9SyFUY6c,6901
|
|
178
|
-
datahub/ingestion/graph/filters.py,sha256=
|
|
178
|
+
datahub/ingestion/graph/filters.py,sha256=hZ8YOQRxC0_mbAx_SLkgqyYXr0Fw3O4U2wo2UMuDHJY,8653
|
|
179
179
|
datahub/ingestion/reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
180
180
|
datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py,sha256=iEulcZMLBQuUfe9MAYyobMekvMcNm4dqVcS_C_2KfrI,9736
|
|
181
181
|
datahub/ingestion/reporting/file_reporter.py,sha256=tiWukmMxHrTQI3rOAumsq6lRlw8T6spqpS6XBDYnrZU,1640
|
|
@@ -190,7 +190,7 @@ datahub/ingestion/sink/blackhole.py,sha256=-jYcWo4i8q7312bCIoHrGr7nT9JdPvA7c4jvS
|
|
|
190
190
|
datahub/ingestion/sink/console.py,sha256=TZfhA0Ec2eNCrMH7RRy2JOdUE-U-hkoIQrPm1CmKLQs,591
|
|
191
191
|
datahub/ingestion/sink/datahub_kafka.py,sha256=_cjuXu5I6G0zJ2UK7hMbaKjMPZXeIwRMgm7CVeTiNtc,2578
|
|
192
192
|
datahub/ingestion/sink/datahub_lite.py,sha256=7u2aWm7ENLshKHl-PkjJg6Mrw4bWs8sTfKIBz4mm8Ak,1879
|
|
193
|
-
datahub/ingestion/sink/datahub_rest.py,sha256=
|
|
193
|
+
datahub/ingestion/sink/datahub_rest.py,sha256=0te9kxDXAJU8A7wfEhDb4R9VDBYYDpy-YMPJZWEEKCM,12982
|
|
194
194
|
datahub/ingestion/sink/file.py,sha256=SxXJPJpkIGoaqRjCcSmj2ZE3xE4rLlBABBGwpTj5LWI,3271
|
|
195
195
|
datahub/ingestion/sink/sink_registry.py,sha256=JRBWx8qEYg0ubSTyhqwgSWctgxwyp6fva9GoN2LwBao,490
|
|
196
196
|
datahub/ingestion/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -263,7 +263,7 @@ datahub/ingestion/source/bigquery_v2/queries.py,sha256=c1BpeQP8p8y-FOhmiQkkY2IqG
|
|
|
263
263
|
datahub/ingestion/source/bigquery_v2/queries_extractor.py,sha256=_5cAXVU8b8T_nAPDsvN2JRd2dmM1t1J1mRylfKiPen4,19530
|
|
264
264
|
datahub/ingestion/source/bigquery_v2/usage.py,sha256=A9c-ofclaRk0NSnc4IRaqJYqMPv6ecCld_TPy3V2qFs,40748
|
|
265
265
|
datahub/ingestion/source/cassandra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
266
|
-
datahub/ingestion/source/cassandra/cassandra.py,sha256=
|
|
266
|
+
datahub/ingestion/source/cassandra/cassandra.py,sha256=lKvPP0Uahi9xw_yh9cArPPtwvAauXolaEk-6f-jhpz4,14558
|
|
267
267
|
datahub/ingestion/source/cassandra/cassandra_api.py,sha256=UVGQTsk6O57Q6wrWo54bQPLtStTWhw_Fq6fgW3Bjgk8,12515
|
|
268
268
|
datahub/ingestion/source/cassandra/cassandra_config.py,sha256=vIMUOzazWTGi03B51vI0-YMxaMJHUGmCxJJgd8pKhC8,3791
|
|
269
269
|
datahub/ingestion/source/cassandra/cassandra_profiling.py,sha256=DkSIryZNwLei5PaKuu9fNEKxEbhIrPI-T9gaVoM87NQ,11063
|
|
@@ -272,7 +272,7 @@ datahub/ingestion/source/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
|
272
272
|
datahub/ingestion/source/common/data_platforms.py,sha256=HhuP3YIEi2WpyKDjUU8RiM0a2qjHWQcvc8kcqub0cVo,548
|
|
273
273
|
datahub/ingestion/source/common/data_reader.py,sha256=XbSxiRTYrk6seOz0ZjVjzSpGvP8lEjmqXrNI4cdYYmQ,1819
|
|
274
274
|
datahub/ingestion/source/common/gcp_credentials_config.py,sha256=_NapGkAqZMbXNClLlmOfANS7U6rChhdthRX9s9iUv9k,2411
|
|
275
|
-
datahub/ingestion/source/common/subtypes.py,sha256=
|
|
275
|
+
datahub/ingestion/source/common/subtypes.py,sha256=UZca0ZQUQdoXr5Z-3AIUT9gIlPt-XwbMNjj7WEEiR_4,3107
|
|
276
276
|
datahub/ingestion/source/data_lake_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
277
277
|
datahub/ingestion/source/data_lake_common/config.py,sha256=qUk83B01hjuBKHvVz8SmXnVCy5eFj-2-2QLEOrAdbgk,359
|
|
278
278
|
datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=nxu7osuzqxScPFc-1ODA2M1c_xPNPpRH_SMMU7zKOIE,6212
|
|
@@ -333,9 +333,9 @@ datahub/ingestion/source/hex/hex.py,sha256=DPpsi5e-sdUgbS0Okyvx1mvc00Adu47zA65oF
|
|
|
333
333
|
datahub/ingestion/source/hex/mapper.py,sha256=6dsGvvhPAOAbAG1ayxLwipgJGt1q7YanWYfMX3rZeiM,12603
|
|
334
334
|
datahub/ingestion/source/hex/model.py,sha256=hmMfOLEGZcKjwy2DW29OPf_9_Q_TesgnUTCen2br_fA,1471
|
|
335
335
|
datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
336
|
-
datahub/ingestion/source/iceberg/iceberg.py,sha256=
|
|
336
|
+
datahub/ingestion/source/iceberg/iceberg.py,sha256=i9o0ia2vQUGqoagN7GgsoaUlhjj9xKBNP-3ia2cMgHY,30762
|
|
337
337
|
datahub/ingestion/source/iceberg/iceberg_common.py,sha256=VGosqYPmn_j6GETSnDHZ8Ay1BVOedmx2x5LHxw16I3A,12278
|
|
338
|
-
datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=
|
|
338
|
+
datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=9iwp2vpQTi4OMbIKoDZV5lAdvjMR0ls6Llpck9grJIE,9875
|
|
339
339
|
datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
340
340
|
datahub/ingestion/source/identity/azure_ad.py,sha256=9Hrvm4CSfc02yjnPUsCYSY4Qw9fXPnDFWLexab0mcpc,28559
|
|
341
341
|
datahub/ingestion/source/identity/okta.py,sha256=jC21myJuMRTaPgj0OD9heaC-mz8ECjqpy2hSJwlUSwM,31943
|
|
@@ -375,16 +375,16 @@ datahub/ingestion/source/metadata/lineage.py,sha256=2iK-hsORWm7NSvMZcG4D5hb8_PH5
|
|
|
375
375
|
datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
376
376
|
datahub/ingestion/source/neo4j/neo4j_source.py,sha256=O3jjdnsx7IyYPBLbxowL85Qo4zs4H-maMOH4-6ZNCk4,13063
|
|
377
377
|
datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
378
|
-
datahub/ingestion/source/powerbi/config.py,sha256=
|
|
378
|
+
datahub/ingestion/source/powerbi/config.py,sha256=5rG62dspGF9jIo8l6HLpB6ECv5n-t1un2ZyGiisD784,24219
|
|
379
379
|
datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=-njW1kJOy-LY5JFwJLhVQ0bMBj9NQz5TZhQqsSi_KsM,2285
|
|
380
380
|
datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=5df3qvalCS9hZ46DPXs6XDcw9-IofGf8Eol_rUC7LHI,20329
|
|
381
|
-
datahub/ingestion/source/powerbi/powerbi.py,sha256=
|
|
381
|
+
datahub/ingestion/source/powerbi/powerbi.py,sha256=a5itVuGmg-0xAQK5a-cXB5UxpR3rLJx0o2x_lz-8ox8,55955
|
|
382
382
|
datahub/ingestion/source/powerbi/m_query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
383
|
-
datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=
|
|
383
|
+
datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=yDi0C13ko2dVxdLJBYvUuGbT4Q2hxQRse3sL7Ul1ZU0,2050
|
|
384
384
|
datahub/ingestion/source/powerbi/m_query/native_sql_parser.py,sha256=zzKVDGeUM3Yv3-zNah4D6mSnr6jXsstNuLmzczcPQEE,3683
|
|
385
385
|
datahub/ingestion/source/powerbi/m_query/parser.py,sha256=5KqhUwj9H9yL9ZMPP9oSeVGiZjvXjw6Iu_HrGr95E5M,5876
|
|
386
|
-
datahub/ingestion/source/powerbi/m_query/pattern_handler.py,sha256=
|
|
387
|
-
datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=
|
|
386
|
+
datahub/ingestion/source/powerbi/m_query/pattern_handler.py,sha256=aOhAb8U4OEZnO4ufnb-Cm3KMpdy-JF6r9YMK3RNZs5A,35906
|
|
387
|
+
datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=ISH8Xjx51q2S81fn2v5RhCCU-kRAW3juxM0rMFs4TDo,17413
|
|
388
388
|
datahub/ingestion/source/powerbi/m_query/tree_function.py,sha256=NIKNNHAE4kTJefTM1WR-StJi9NuingaRYn_mS_kV6A8,6180
|
|
389
389
|
datahub/ingestion/source/powerbi/m_query/validator.py,sha256=crG-VZy2XPieiDliP9yVMgiFcc8b2xbZyDFEATXqEAQ,1155
|
|
390
390
|
datahub/ingestion/source/powerbi/rest_api_wrapper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -438,9 +438,9 @@ datahub/ingestion/source/schema_inference/json.py,sha256=p5S-3idn65V2uad5T8txs1U
|
|
|
438
438
|
datahub/ingestion/source/schema_inference/object.py,sha256=dhSOtxVJHbTDY0hWeHwdLYHnOsW07Omk7Y4DPeztie0,5847
|
|
439
439
|
datahub/ingestion/source/schema_inference/parquet.py,sha256=CdqsNuiabLLCulWbuPMssijeFmKLv3M5MKFIhlatpWA,3456
|
|
440
440
|
datahub/ingestion/source/sigma/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
441
|
-
datahub/ingestion/source/sigma/config.py,sha256=
|
|
441
|
+
datahub/ingestion/source/sigma/config.py,sha256=yfdKQYvI5hKVl8gNAKIcJe-VW3klvdDqYbUP76gJQDI,3812
|
|
442
442
|
datahub/ingestion/source/sigma/data_classes.py,sha256=YZkkzwftV34mq5c_4jlC2PCSiRKt4hvHjmqikLQhl1I,2012
|
|
443
|
-
datahub/ingestion/source/sigma/sigma.py,sha256=
|
|
443
|
+
datahub/ingestion/source/sigma/sigma.py,sha256=ApVtToI1cnpscvdEMcC-3EuTYnczW9CH-s912z-pDpk,24262
|
|
444
444
|
datahub/ingestion/source/sigma/sigma_api.py,sha256=SVvbUs2vjueUdDa-3FzeMsaX5pNpApVI192P7EZzPcI,17870
|
|
445
445
|
datahub/ingestion/source/slack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
446
446
|
datahub/ingestion/source/slack/slack.py,sha256=3N7Yp-u9DvBmo536Z6-pQTrJgSJ3i742GePSgjlBOUU,27616
|
|
@@ -475,7 +475,7 @@ datahub/ingestion/source/sql/hive.py,sha256=n0XCGkNkVAe-TEyXbxlefvohbmtALbWaC1a0
|
|
|
475
475
|
datahub/ingestion/source/sql/hive_metastore.py,sha256=HW0zoHKarBYb8oVCy5fHvPOn-pTo25LctW_AusmH0hQ,36252
|
|
476
476
|
datahub/ingestion/source/sql/mariadb.py,sha256=Hm102kmfs_1rd4lsTYhzVMZq5S3B6cyfvpHSzJjqvMw,737
|
|
477
477
|
datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_k94o7g-0,3350
|
|
478
|
-
datahub/ingestion/source/sql/oracle.py,sha256=
|
|
478
|
+
datahub/ingestion/source/sql/oracle.py,sha256=PhQZGiZbf_rRtNMlNV-MXSoN3geDBb9zXsGJRVvaKbo,29831
|
|
479
479
|
datahub/ingestion/source/sql/postgres.py,sha256=uC1kYEI8VdxiZ1Y9IxMWzwmg11wtMqYN0e2fkok1rxo,11972
|
|
480
480
|
datahub/ingestion/source/sql/presto.py,sha256=PB-CS5MX2dSRFRHjlxfkLHGXLZXFNCsVAAyRBtY6HMg,3611
|
|
481
481
|
datahub/ingestion/source/sql/sql_common.py,sha256=jsweel_-vesNtcPonnfS11OUrlcZnS3wGt5r0dYTPnM,48637
|
|
@@ -533,7 +533,7 @@ datahub/ingestion/source/unity/usage.py,sha256=0wETBAaZvHI_EGgBlxX3bKsVHEAdnUV8_
|
|
|
533
533
|
datahub/ingestion/source/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
534
534
|
datahub/ingestion/source/usage/clickhouse_usage.py,sha256=jJ-EUJdS7t4d9RVjLWQQ2e36wmYzs8xtpD632z6pLiw,9974
|
|
535
535
|
datahub/ingestion/source/usage/starburst_trino_usage.py,sha256=O3EDEZsXPNdsKGD-jStREA8e4-iTlnqd3ocqtAYFKNA,10544
|
|
536
|
-
datahub/ingestion/source/usage/usage_common.py,sha256=
|
|
536
|
+
datahub/ingestion/source/usage/usage_common.py,sha256=uuCgIduhlRL2zIAN8rymZ5cZn1WF6akZ-ZbbaVYo9_w,9813
|
|
537
537
|
datahub/ingestion/source/vertexai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
538
538
|
datahub/ingestion/source/vertexai/vertexai.py,sha256=TXTa-Cm1C7xkbTNXNmKr_hi2FDJ9VG-ahrxk9yuxaTg,43635
|
|
539
539
|
datahub/ingestion/source/vertexai/vertexai_config.py,sha256=uMnsv3b6TsPRH26u_JE_v1u0db7ANEAFlVxU5A6ELRM,989
|
|
@@ -906,8 +906,8 @@ datahub/sdk/entity.py,sha256=Q29AbpS58L4gD8ETwoNIwG-ouytz4c0MSSFi6-jLl_4,6742
|
|
|
906
906
|
datahub/sdk/entity_client.py,sha256=Sxe6H6Vr_tqLJu5KW7MJfLWJ6mgh4mbsx7u7MOBpM64,5052
|
|
907
907
|
datahub/sdk/main_client.py,sha256=h2MKRhR-BO0zGCMhF7z2bTncX4hagKrAYwR3wTNTtzA,3666
|
|
908
908
|
datahub/sdk/resolver_client.py,sha256=nKMAZJt2tRSGfKSzoREIh43PXqjM3umLiYkYHJjo1io,3243
|
|
909
|
-
datahub/sdk/search_client.py,sha256=
|
|
910
|
-
datahub/sdk/search_filters.py,sha256=
|
|
909
|
+
datahub/sdk/search_client.py,sha256=BJR5t7Ff2oDNOGLcSCp9YHzrGKbgOQr7T8XQKGEpucw,3437
|
|
910
|
+
datahub/sdk/search_filters.py,sha256=BcMhvG5hGYAATtLPLz4WLRjKApX2oLYrrcGn-CG__ek,12901
|
|
911
911
|
datahub/secret/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
912
912
|
datahub/secret/datahub_secret_store.py,sha256=9u9S87-15jwhj4h0EsAVIMdQLgvstKc8voQux2slxgU,2477
|
|
913
913
|
datahub/secret/datahub_secrets_client.py,sha256=nDmhziKdvseJHlaDVUcAwK8Fv8maeAaG-ktZtWG2b70,1316
|
|
@@ -996,7 +996,7 @@ datahub/utilities/sqllineage_patch.py,sha256=0Buh50bmEqJFg1HFRCknCnePo1cecI4JmGx
|
|
|
996
996
|
datahub/utilities/stats_collections.py,sha256=CxaTcrF7J6am7iX5jPhFKne535UcyDk_oreVwR013fU,1625
|
|
997
997
|
datahub/utilities/str_enum.py,sha256=EsqCLPbrqyQ2YU_wt7QP-a6P5fnpIshXJ3AI8gLBlVA,474
|
|
998
998
|
datahub/utilities/tee_io.py,sha256=jBrsUfTPTk9IICntfGOG0HR-Fjp8BQMde-FPQ4r3kuI,601
|
|
999
|
-
datahub/utilities/threaded_iterator_executor.py,sha256=
|
|
999
|
+
datahub/utilities/threaded_iterator_executor.py,sha256=6BpCE0os3d-uMYxHBilPQC-JvEBkU6JQY4bGs06JKYI,2004
|
|
1000
1000
|
datahub/utilities/threading_timeout.py,sha256=hOzDI55E3onXblHNwGsePJUWMXo5zqaWCnoYdL2-KPM,1316
|
|
1001
1001
|
datahub/utilities/time.py,sha256=Q7S_Zyom8C2zcl2xFbjNw6K8nZsCub5XGAB4OEmIS34,1847
|
|
1002
1002
|
datahub/utilities/topological_sort.py,sha256=kcK5zPSR393fgItr-KSLV3bDqfJfBRS8E5kkCpPBgUY,1358
|
|
@@ -1043,8 +1043,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1043
1043
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1044
1044
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1045
1045
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1046
|
-
acryl_datahub-1.0.0.
|
|
1047
|
-
acryl_datahub-1.0.0.
|
|
1048
|
-
acryl_datahub-1.0.0.
|
|
1049
|
-
acryl_datahub-1.0.0.
|
|
1050
|
-
acryl_datahub-1.0.0.
|
|
1046
|
+
acryl_datahub-1.0.0.1rc7.dist-info/METADATA,sha256=QkeMAnAXXez9FFTnJVpxhOJMNDadF0gfVQ3uz3Fh4i8,176849
|
|
1047
|
+
acryl_datahub-1.0.0.1rc7.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
1048
|
+
acryl_datahub-1.0.0.1rc7.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
|
|
1049
|
+
acryl_datahub-1.0.0.1rc7.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1050
|
+
acryl_datahub-1.0.0.1rc7.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
|
@@ -29,13 +29,16 @@ def dataset() -> None:
|
|
|
29
29
|
name="upsert",
|
|
30
30
|
)
|
|
31
31
|
@click.option("-f", "--file", required=True, type=click.Path(exists=True))
|
|
32
|
+
@click.option(
|
|
33
|
+
"-n", "--dry-run", type=bool, is_flag=True, default=False, help="Perform a dry run"
|
|
34
|
+
)
|
|
32
35
|
@upgrade.check_upgrade
|
|
33
36
|
@telemetry.with_telemetry()
|
|
34
|
-
def upsert(file: Path) -> None:
|
|
37
|
+
def upsert(file: Path, dry_run: bool) -> None:
|
|
35
38
|
"""Upsert attributes to a Dataset in DataHub."""
|
|
36
39
|
# Call the sync command with to_datahub=True to perform the upsert operation
|
|
37
40
|
ctx = click.get_current_context()
|
|
38
|
-
ctx.invoke(sync, file=str(file), to_datahub=True)
|
|
41
|
+
ctx.invoke(sync, file=str(file), dry_run=dry_run, to_datahub=True)
|
|
39
42
|
|
|
40
43
|
|
|
41
44
|
@dataset.command(
|
|
@@ -167,11 +170,16 @@ def file(lintcheck: bool, lintfix: bool, file: str) -> None:
|
|
|
167
170
|
)
|
|
168
171
|
@click.option("-f", "--file", required=True, type=click.Path(exists=True))
|
|
169
172
|
@click.option("--to-datahub/--from-datahub", required=True, is_flag=True)
|
|
173
|
+
@click.option(
|
|
174
|
+
"-n", "--dry-run", type=bool, is_flag=True, default=False, help="Perform a dry run"
|
|
175
|
+
)
|
|
170
176
|
@upgrade.check_upgrade
|
|
171
177
|
@telemetry.with_telemetry()
|
|
172
|
-
def sync(file: str, to_datahub: bool) -> None:
|
|
178
|
+
def sync(file: str, to_datahub: bool, dry_run: bool) -> None:
|
|
173
179
|
"""Sync a Dataset file to/from DataHub"""
|
|
174
180
|
|
|
181
|
+
dry_run_prefix = "[dry-run]: " if dry_run else "" # prefix to use in messages
|
|
182
|
+
|
|
175
183
|
failures: List[str] = []
|
|
176
184
|
with get_default_graph() as graph:
|
|
177
185
|
datasets = Dataset.from_yaml(file)
|
|
@@ -189,7 +197,7 @@ def sync(file: str, to_datahub: bool) -> None:
|
|
|
189
197
|
click.secho(
|
|
190
198
|
"\n\t- ".join(
|
|
191
199
|
[
|
|
192
|
-
f"Skipping Dataset {dataset.urn} due to missing entity references: "
|
|
200
|
+
f"{dry_run_prefix}Skipping Dataset {dataset.urn} due to missing entity references: "
|
|
193
201
|
]
|
|
194
202
|
+ missing_entity_references
|
|
195
203
|
),
|
|
@@ -199,13 +207,18 @@ def sync(file: str, to_datahub: bool) -> None:
|
|
|
199
207
|
continue
|
|
200
208
|
try:
|
|
201
209
|
for mcp in dataset.generate_mcp():
|
|
202
|
-
|
|
203
|
-
|
|
210
|
+
if not dry_run:
|
|
211
|
+
graph.emit(mcp)
|
|
212
|
+
click.secho(
|
|
213
|
+
f"{dry_run_prefix}Update succeeded for urn {dataset.urn}.",
|
|
214
|
+
fg="green",
|
|
215
|
+
)
|
|
204
216
|
except Exception as e:
|
|
205
217
|
click.secho(
|
|
206
|
-
f"Update failed for id {id}. due to {e}",
|
|
218
|
+
f"{dry_run_prefix}Update failed for id {id}. due to {e}",
|
|
207
219
|
fg="red",
|
|
208
220
|
)
|
|
221
|
+
failures.append(dataset.urn)
|
|
209
222
|
else:
|
|
210
223
|
# Sync from DataHub
|
|
211
224
|
if graph.exists(dataset.urn):
|
|
@@ -215,13 +228,16 @@ def sync(file: str, to_datahub: bool) -> None:
|
|
|
215
228
|
existing_dataset: Dataset = Dataset.from_datahub(
|
|
216
229
|
graph=graph, urn=dataset.urn, config=dataset_get_config
|
|
217
230
|
)
|
|
218
|
-
|
|
231
|
+
if not dry_run:
|
|
232
|
+
existing_dataset.to_yaml(Path(file))
|
|
233
|
+
else:
|
|
234
|
+
click.secho(f"{dry_run_prefix}Will update file {file}")
|
|
219
235
|
else:
|
|
220
|
-
click.secho(f"Dataset {dataset.urn} does not exist")
|
|
236
|
+
click.secho(f"{dry_run_prefix}Dataset {dataset.urn} does not exist")
|
|
221
237
|
failures.append(dataset.urn)
|
|
222
238
|
if failures:
|
|
223
239
|
click.secho(
|
|
224
|
-
f"\
|
|
240
|
+
f"\n{dry_run_prefix}Failed to sync the following Datasets: {', '.join(failures)}",
|
|
225
241
|
fg="red",
|
|
226
242
|
)
|
|
227
243
|
raise click.Abort()
|
datahub/emitter/mcp_builder.py
CHANGED
|
@@ -117,6 +117,14 @@ class ContainerKey(DatahubKey):
|
|
|
117
117
|
PlatformKey = ContainerKey
|
|
118
118
|
|
|
119
119
|
|
|
120
|
+
class NamespaceKey(ContainerKey):
|
|
121
|
+
"""
|
|
122
|
+
For Iceberg namespaces (databases/schemas)
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
namespace: str
|
|
126
|
+
|
|
127
|
+
|
|
120
128
|
class DatabaseKey(ContainerKey):
|
|
121
129
|
database: str
|
|
122
130
|
|
datahub/emitter/rest_emitter.py
CHANGED
|
@@ -41,7 +41,7 @@ from datahub.configuration.common import (
|
|
|
41
41
|
TraceTimeoutError,
|
|
42
42
|
TraceValidationError,
|
|
43
43
|
)
|
|
44
|
-
from datahub.emitter.aspect import JSON_CONTENT_TYPE
|
|
44
|
+
from datahub.emitter.aspect import JSON_CONTENT_TYPE, JSON_PATCH_CONTENT_TYPE
|
|
45
45
|
from datahub.emitter.generic_emitter import Emitter
|
|
46
46
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
47
47
|
from datahub.emitter.request_helper import make_curl_command
|
|
@@ -109,9 +109,9 @@ class RestSinkEndpoint(ConfigEnum):
|
|
|
109
109
|
OPENAPI = auto()
|
|
110
110
|
|
|
111
111
|
|
|
112
|
-
|
|
112
|
+
DEFAULT_REST_EMITTER_ENDPOINT = pydantic.parse_obj_as(
|
|
113
113
|
RestSinkEndpoint,
|
|
114
|
-
os.getenv("
|
|
114
|
+
os.getenv("DATAHUB_REST_EMITTER_DEFAULT_ENDPOINT", RestSinkEndpoint.RESTLI),
|
|
115
115
|
)
|
|
116
116
|
|
|
117
117
|
|
|
@@ -229,7 +229,9 @@ class DataHubRestEmitter(Closeable, Emitter):
|
|
|
229
229
|
ca_certificate_path: Optional[str] = None,
|
|
230
230
|
client_certificate_path: Optional[str] = None,
|
|
231
231
|
disable_ssl_verification: bool = False,
|
|
232
|
-
openapi_ingestion: bool =
|
|
232
|
+
openapi_ingestion: bool = (
|
|
233
|
+
DEFAULT_REST_EMITTER_ENDPOINT == RestSinkEndpoint.OPENAPI
|
|
234
|
+
),
|
|
233
235
|
default_trace_mode: bool = False,
|
|
234
236
|
):
|
|
235
237
|
if not gms_server:
|
|
@@ -359,8 +361,14 @@ class DataHubRestEmitter(Closeable, Emitter):
|
|
|
359
361
|
)["aspect"]["json"]
|
|
360
362
|
else:
|
|
361
363
|
obj = mcp.aspect.to_obj()
|
|
362
|
-
|
|
364
|
+
content_type = obj.get("contentType")
|
|
365
|
+
if obj.get("value") and content_type == JSON_CONTENT_TYPE:
|
|
366
|
+
# Undo double serialization.
|
|
363
367
|
obj = json.loads(obj["value"])
|
|
368
|
+
elif content_type == JSON_PATCH_CONTENT_TYPE:
|
|
369
|
+
raise NotImplementedError(
|
|
370
|
+
"Patches are not supported for OpenAPI ingestion. Set the endpoint to RESTLI."
|
|
371
|
+
)
|
|
364
372
|
aspect_value = pre_json_transform(obj)
|
|
365
373
|
return (
|
|
366
374
|
url,
|
datahub/errors.py
CHANGED
datahub/ingestion/api/source.py
CHANGED
|
@@ -51,6 +51,7 @@ from datahub.ingestion.api.source_helpers import (
|
|
|
51
51
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
52
52
|
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
|
53
53
|
from datahub.metadata.schema_classes import UpstreamLineageClass
|
|
54
|
+
from datahub.sdk.entity import Entity
|
|
54
55
|
from datahub.utilities.lossy_collections import LossyDict, LossyList
|
|
55
56
|
from datahub.utilities.type_annotations import get_class_from_annotation
|
|
56
57
|
|
|
@@ -480,7 +481,7 @@ class Source(Closeable, metaclass=ABCMeta):
|
|
|
480
481
|
|
|
481
482
|
def get_workunits_internal(
|
|
482
483
|
self,
|
|
483
|
-
) -> Iterable[Union[MetadataWorkUnit, MetadataChangeProposalWrapper]]:
|
|
484
|
+
) -> Iterable[Union[MetadataWorkUnit, MetadataChangeProposalWrapper, Entity]]:
|
|
484
485
|
raise NotImplementedError(
|
|
485
486
|
"get_workunits_internal must be implemented if get_workunits is not overriden."
|
|
486
487
|
)
|
|
@@ -35,6 +35,7 @@ from datahub.metadata.schema_classes import (
|
|
|
35
35
|
TimeWindowSizeClass,
|
|
36
36
|
)
|
|
37
37
|
from datahub.metadata.urns import DatasetUrn, GlossaryTermUrn, TagUrn, Urn
|
|
38
|
+
from datahub.sdk.entity import Entity
|
|
38
39
|
from datahub.specific.dataset import DatasetPatchBuilder
|
|
39
40
|
from datahub.telemetry import telemetry
|
|
40
41
|
from datahub.utilities.urns.error import InvalidUrnError
|
|
@@ -49,7 +50,12 @@ logger = logging.getLogger(__name__)
|
|
|
49
50
|
|
|
50
51
|
def auto_workunit(
|
|
51
52
|
stream: Iterable[
|
|
52
|
-
Union[
|
|
53
|
+
Union[
|
|
54
|
+
MetadataChangeEventClass,
|
|
55
|
+
MetadataChangeProposalWrapper,
|
|
56
|
+
MetadataWorkUnit,
|
|
57
|
+
Entity,
|
|
58
|
+
]
|
|
53
59
|
],
|
|
54
60
|
) -> Iterable[MetadataWorkUnit]:
|
|
55
61
|
"""Convert a stream of MCEs and MCPs to a stream of :class:`MetadataWorkUnit`s."""
|
|
@@ -62,6 +68,8 @@ def auto_workunit(
|
|
|
62
68
|
)
|
|
63
69
|
elif isinstance(item, MetadataChangeProposalWrapper):
|
|
64
70
|
yield item.as_workunit()
|
|
71
|
+
elif isinstance(item, Entity):
|
|
72
|
+
yield from item.as_workunits()
|
|
65
73
|
else:
|
|
66
74
|
yield item
|
|
67
75
|
|
|
@@ -33,7 +33,7 @@ from datahub.emitter.aspect import TIMESERIES_ASPECT_MAP
|
|
|
33
33
|
from datahub.emitter.mce_builder import DEFAULT_ENV, Aspect
|
|
34
34
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
35
35
|
from datahub.emitter.rest_emitter import (
|
|
36
|
-
|
|
36
|
+
DEFAULT_REST_EMITTER_ENDPOINT,
|
|
37
37
|
DEFAULT_REST_TRACE_MODE,
|
|
38
38
|
DatahubRestEmitter,
|
|
39
39
|
RestSinkEndpoint,
|
|
@@ -49,6 +49,7 @@ from datahub.ingestion.graph.connections import (
|
|
|
49
49
|
)
|
|
50
50
|
from datahub.ingestion.graph.entity_versioning import EntityVersioningAPI
|
|
51
51
|
from datahub.ingestion.graph.filters import (
|
|
52
|
+
RawSearchFilter,
|
|
52
53
|
RawSearchFilterRule,
|
|
53
54
|
RemovedStatusFilter,
|
|
54
55
|
generate_filter,
|
|
@@ -75,10 +76,11 @@ from datahub.metadata.schema_classes import (
|
|
|
75
76
|
SystemMetadataClass,
|
|
76
77
|
TelemetryClientIdClass,
|
|
77
78
|
)
|
|
79
|
+
from datahub.metadata.urns import CorpUserUrn, Urn
|
|
78
80
|
from datahub.telemetry.telemetry import telemetry_instance
|
|
79
81
|
from datahub.utilities.perf_timer import PerfTimer
|
|
80
82
|
from datahub.utilities.str_enum import StrEnum
|
|
81
|
-
from datahub.utilities.urns.urn import
|
|
83
|
+
from datahub.utilities.urns.urn import guess_entity_type
|
|
82
84
|
|
|
83
85
|
if TYPE_CHECKING:
|
|
84
86
|
from datahub.ingestion.sink.datahub_rest import (
|
|
@@ -116,7 +118,7 @@ def entity_type_to_graphql(entity_type: str) -> str:
|
|
|
116
118
|
"""Convert the entity types into GraphQL "EntityType" enum values."""
|
|
117
119
|
|
|
118
120
|
# Hard-coded special cases.
|
|
119
|
-
if entity_type ==
|
|
121
|
+
if entity_type == CorpUserUrn.ENTITY_TYPE:
|
|
120
122
|
return "CORP_USER"
|
|
121
123
|
|
|
122
124
|
# Convert camelCase to UPPER_UNDERSCORE.
|
|
@@ -133,6 +135,14 @@ def entity_type_to_graphql(entity_type: str) -> str:
|
|
|
133
135
|
return entity_type
|
|
134
136
|
|
|
135
137
|
|
|
138
|
+
def flexible_entity_type_to_graphql(entity_type: str) -> str:
|
|
139
|
+
if entity_type.upper() == entity_type:
|
|
140
|
+
# Assume that we were passed a graphql EntityType enum value,
|
|
141
|
+
# so no conversion is needed.
|
|
142
|
+
return entity_type
|
|
143
|
+
return entity_type_to_graphql(entity_type)
|
|
144
|
+
|
|
145
|
+
|
|
136
146
|
class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
137
147
|
def __init__(self, config: DatahubClientConfig) -> None:
|
|
138
148
|
self.config = config
|
|
@@ -147,7 +157,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
147
157
|
ca_certificate_path=self.config.ca_certificate_path,
|
|
148
158
|
client_certificate_path=self.config.client_certificate_path,
|
|
149
159
|
disable_ssl_verification=self.config.disable_ssl_verification,
|
|
150
|
-
openapi_ingestion=
|
|
160
|
+
openapi_ingestion=DEFAULT_REST_EMITTER_ENDPOINT == RestSinkEndpoint.OPENAPI,
|
|
151
161
|
default_trace_mode=DEFAULT_REST_TRACE_MODE == RestTraceMode.ENABLED,
|
|
152
162
|
)
|
|
153
163
|
|
|
@@ -805,7 +815,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
805
815
|
|
|
806
816
|
:return: An iterable of (urn, schema info) tuple that match the filters.
|
|
807
817
|
"""
|
|
808
|
-
types = [
|
|
818
|
+
types = self._get_types(["dataset"])
|
|
809
819
|
|
|
810
820
|
# Add the query default of * if no query is specified.
|
|
811
821
|
query = query or "*"
|
|
@@ -873,10 +883,10 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
873
883
|
env: Optional[str] = None,
|
|
874
884
|
query: Optional[str] = None,
|
|
875
885
|
container: Optional[str] = None,
|
|
876
|
-
status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
|
|
886
|
+
status: Optional[RemovedStatusFilter] = RemovedStatusFilter.NOT_SOFT_DELETED,
|
|
877
887
|
batch_size: int = 10000,
|
|
878
888
|
extraFilters: Optional[List[RawSearchFilterRule]] = None,
|
|
879
|
-
extra_or_filters: Optional[
|
|
889
|
+
extra_or_filters: Optional[RawSearchFilter] = None,
|
|
880
890
|
) -> Iterable[str]:
|
|
881
891
|
"""Fetch all urns that match all of the given filters.
|
|
882
892
|
|
|
@@ -968,7 +978,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
968
978
|
status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
|
|
969
979
|
batch_size: int = 10000,
|
|
970
980
|
extra_and_filters: Optional[List[RawSearchFilterRule]] = None,
|
|
971
|
-
extra_or_filters: Optional[
|
|
981
|
+
extra_or_filters: Optional[RawSearchFilter] = None,
|
|
972
982
|
extra_source_fields: Optional[List[str]] = None,
|
|
973
983
|
skip_cache: bool = False,
|
|
974
984
|
) -> Iterable[dict]:
|
|
@@ -1121,7 +1131,8 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1121
1131
|
)
|
|
1122
1132
|
|
|
1123
1133
|
types = [
|
|
1124
|
-
|
|
1134
|
+
flexible_entity_type_to_graphql(entity_type)
|
|
1135
|
+
for entity_type in entity_types
|
|
1125
1136
|
]
|
|
1126
1137
|
return types
|
|
1127
1138
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import enum
|
|
3
|
-
|
|
3
|
+
import warnings
|
|
4
|
+
from typing import Dict, List, Literal, Optional, Union
|
|
4
5
|
|
|
5
6
|
from typing_extensions import TypeAlias
|
|
6
7
|
|
|
@@ -8,9 +9,14 @@ from datahub.emitter.mce_builder import (
|
|
|
8
9
|
make_data_platform_urn,
|
|
9
10
|
make_dataplatform_instance_urn,
|
|
10
11
|
)
|
|
12
|
+
from datahub.errors import SearchFilterWarning
|
|
11
13
|
from datahub.utilities.urns.urn import guess_entity_type
|
|
12
14
|
|
|
13
|
-
RawSearchFilterRule = Dict[str,
|
|
15
|
+
RawSearchFilterRule: TypeAlias = Dict[str, Union[str, bool, List[str]]]
|
|
16
|
+
|
|
17
|
+
# This is a list of OR filters, each of which is a list of AND filters.
|
|
18
|
+
# This can be put directly into the orFilters parameter in GraphQL.
|
|
19
|
+
RawSearchFilter: TypeAlias = List[Dict[Literal["and"], List[RawSearchFilterRule]]]
|
|
14
20
|
|
|
15
21
|
# Mirrors our GraphQL enum: https://datahubproject.io/docs/graphql/enums#filteroperator
|
|
16
22
|
FilterOperator: TypeAlias = Literal[
|
|
@@ -39,12 +45,14 @@ class SearchFilterRule:
|
|
|
39
45
|
negated: bool = False
|
|
40
46
|
|
|
41
47
|
def to_raw(self) -> RawSearchFilterRule:
|
|
42
|
-
|
|
48
|
+
rule: RawSearchFilterRule = {
|
|
43
49
|
"field": self.field,
|
|
44
50
|
"condition": self.condition,
|
|
45
51
|
"values": self.values,
|
|
46
|
-
"negated": self.negated,
|
|
47
52
|
}
|
|
53
|
+
if self.negated:
|
|
54
|
+
rule["negated"] = True
|
|
55
|
+
return rule
|
|
48
56
|
|
|
49
57
|
def negate(self) -> "SearchFilterRule":
|
|
50
58
|
return SearchFilterRule(
|
|
@@ -73,10 +81,10 @@ def generate_filter(
|
|
|
73
81
|
platform_instance: Optional[str],
|
|
74
82
|
env: Optional[str],
|
|
75
83
|
container: Optional[str],
|
|
76
|
-
status: RemovedStatusFilter,
|
|
84
|
+
status: Optional[RemovedStatusFilter],
|
|
77
85
|
extra_filters: Optional[List[RawSearchFilterRule]],
|
|
78
|
-
extra_or_filters: Optional[
|
|
79
|
-
) ->
|
|
86
|
+
extra_or_filters: Optional[RawSearchFilter] = None,
|
|
87
|
+
) -> RawSearchFilter:
|
|
80
88
|
"""
|
|
81
89
|
Generate a search filter based on the provided parameters.
|
|
82
90
|
:param platform: The platform to filter by.
|
|
@@ -105,15 +113,16 @@ def generate_filter(
|
|
|
105
113
|
and_filters.append(_get_container_filter(container).to_raw())
|
|
106
114
|
|
|
107
115
|
# Status filter.
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
116
|
+
if status:
|
|
117
|
+
status_filter = _get_status_filter(status)
|
|
118
|
+
if status_filter:
|
|
119
|
+
and_filters.append(status_filter.to_raw())
|
|
111
120
|
|
|
112
121
|
# Extra filters.
|
|
113
122
|
if extra_filters:
|
|
114
123
|
and_filters += extra_filters
|
|
115
124
|
|
|
116
|
-
or_filters:
|
|
125
|
+
or_filters: RawSearchFilter = [{"and": and_filters}]
|
|
117
126
|
|
|
118
127
|
# Env filter
|
|
119
128
|
if env:
|
|
@@ -127,11 +136,27 @@ def generate_filter(
|
|
|
127
136
|
|
|
128
137
|
# Extra OR filters are distributed across the top level and lists.
|
|
129
138
|
if extra_or_filters:
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
for extra_or_filter in extra_or_filters
|
|
133
|
-
|
|
134
|
-
|
|
139
|
+
new_or_filters: RawSearchFilter = []
|
|
140
|
+
for and_filter in or_filters:
|
|
141
|
+
for extra_or_filter in extra_or_filters:
|
|
142
|
+
if isinstance(extra_or_filter, dict) and "and" in extra_or_filter:
|
|
143
|
+
new_or_filters.append(
|
|
144
|
+
{"and": and_filter["and"] + extra_or_filter["and"]}
|
|
145
|
+
)
|
|
146
|
+
else:
|
|
147
|
+
# Hack for backwards compatibility.
|
|
148
|
+
# We have some code that erroneously passed a List[RawSearchFilterRule]
|
|
149
|
+
# instead of a List[Dict["and", List[RawSearchFilterRule]]].
|
|
150
|
+
warnings.warn(
|
|
151
|
+
"Passing a List[RawSearchFilterRule] to extra_or_filters is deprecated. "
|
|
152
|
+
"Please pass a List[Dict[str, List[RawSearchFilterRule]]] instead.",
|
|
153
|
+
SearchFilterWarning,
|
|
154
|
+
stacklevel=3,
|
|
155
|
+
)
|
|
156
|
+
new_or_filters.append(
|
|
157
|
+
{"and": and_filter["and"] + [extra_or_filter]} # type: ignore
|
|
158
|
+
)
|
|
159
|
+
or_filters = new_or_filters
|
|
135
160
|
|
|
136
161
|
return or_filters
|
|
137
162
|
|
|
@@ -20,7 +20,7 @@ from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
|
20
20
|
from datahub.emitter.mcp_builder import mcps_from_mce
|
|
21
21
|
from datahub.emitter.rest_emitter import (
|
|
22
22
|
BATCH_INGEST_MAX_PAYLOAD_LENGTH,
|
|
23
|
-
|
|
23
|
+
DEFAULT_REST_EMITTER_ENDPOINT,
|
|
24
24
|
DEFAULT_REST_TRACE_MODE,
|
|
25
25
|
DataHubRestEmitter,
|
|
26
26
|
RestSinkEndpoint,
|
|
@@ -70,7 +70,7 @@ _DEFAULT_REST_SINK_MODE = pydantic.parse_obj_as(
|
|
|
70
70
|
|
|
71
71
|
class DatahubRestSinkConfig(DatahubClientConfig):
|
|
72
72
|
mode: RestSinkMode = _DEFAULT_REST_SINK_MODE
|
|
73
|
-
endpoint: RestSinkEndpoint =
|
|
73
|
+
endpoint: RestSinkEndpoint = DEFAULT_REST_EMITTER_ENDPOINT
|
|
74
74
|
default_trace_mode: RestTraceMode = DEFAULT_REST_TRACE_MODE
|
|
75
75
|
|
|
76
76
|
# These only apply in async modes.
|